mirror of
				https://github.com/illiumst/marl-factory-grid.git
				synced 2025-10-31 04:37:25 +01:00 
			
		
		
		
	new observation properties for testing of technical limitations
This commit is contained in:
		| @@ -16,7 +16,8 @@ from environments.helpers import Constants as c, Constants | ||||
| from environments import helpers as h | ||||
| from environments.factory.base.objects import Agent, Tile, Action | ||||
| from environments.factory.base.registers import Actions, Entities, Agents, Doors, FloorTiles, WallTiles, PlaceHolders | ||||
| from environments.utility_classes import MovementProperties | ||||
| from environments.utility_classes import MovementProperties, ObservationProperties | ||||
| from environments.utility_classes import AgentRenderOptions as a_obs | ||||
|  | ||||
| import simplejson | ||||
|  | ||||
| @@ -33,7 +34,7 @@ class BaseFactory(gym.Env): | ||||
|  | ||||
|     @property | ||||
|     def observation_space(self): | ||||
|         if r := self.pomdp_r: | ||||
|         if r := self._pomdp_r: | ||||
|             z = self._obs_cube.shape[0] | ||||
|             xy = r*2 + 1 | ||||
|             level_shape = (z, xy, xy) | ||||
| @@ -44,24 +45,32 @@ class BaseFactory(gym.Env): | ||||
|  | ||||
|     @property | ||||
|     def pomdp_diameter(self): | ||||
|         return self.pomdp_r * 2 + 1 | ||||
|         return self._pomdp_r * 2 + 1 | ||||
|  | ||||
|     @property | ||||
|     def movement_actions(self): | ||||
|         return self._actions.movement_actions | ||||
|  | ||||
|     def __enter__(self): | ||||
|         return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack) | ||||
|         return self if self.obs_prop.frames_to_stack == 0 else \ | ||||
|             FrameStack(self, self.obs_prop.frames_to_stack) | ||||
|  | ||||
|     def __exit__(self, exc_type, exc_val, exc_tb): | ||||
|         self.close() | ||||
|  | ||||
|     def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_r: Union[None, int] = 0, | ||||
|                  movement_properties: MovementProperties = MovementProperties(), parse_doors=False, | ||||
|                  combin_agent_obs: bool = False, frames_to_stack=0, record_episodes=False, | ||||
|                  omit_agent_in_obs=False, done_at_collision=False, cast_shadows=True, additional_agent_placeholder=None, | ||||
|     def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), | ||||
|                  mv_prop: MovementProperties = MovementProperties(), | ||||
|                  obs_prop: ObservationProperties = ObservationProperties(), | ||||
|                  parse_doors=False, record_episodes=False, done_at_collision=False, | ||||
|                  verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs): | ||||
|         assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1." | ||||
|  | ||||
|         if isinstance(mv_prop, dict): | ||||
|             mv_prop = MovementProperties(**mv_prop) | ||||
|         if isinstance(obs_prop, dict): | ||||
|             obs_prop = ObservationProperties(**obs_prop) | ||||
|  | ||||
|         assert obs_prop.frames_to_stack != 1 and \ | ||||
|                obs_prop.frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1." | ||||
|         if kwargs: | ||||
|             print(f'Following kwargs were passed, but ignored: {kwargs}') | ||||
|  | ||||
| @@ -69,24 +78,18 @@ class BaseFactory(gym.Env): | ||||
|         self.env_seed = env_seed | ||||
|         self.seed(env_seed) | ||||
|         self._base_rng = np.random.default_rng(self.env_seed) | ||||
|         if isinstance(movement_properties, dict): | ||||
|             movement_properties = MovementProperties(**movement_properties) | ||||
|         self.movement_properties = movement_properties | ||||
|         self.mv_prop = mv_prop | ||||
|         self.obs_prop = obs_prop | ||||
|         self.level_name = level_name | ||||
|         self._level_shape = None | ||||
|         self.verbose = verbose | ||||
|         self.additional_agent_placeholder = additional_agent_placeholder | ||||
|         self._renderer = None  # expensive - don't use it when not required ! | ||||
|         self._entities = Entities() | ||||
|  | ||||
|         self.n_agents = n_agents | ||||
|  | ||||
|         self.max_steps = max_steps | ||||
|         self.pomdp_r = pomdp_r | ||||
|         self.combin_agent_obs = combin_agent_obs | ||||
|         self.omit_agent_in_obs = omit_agent_in_obs | ||||
|         self.cast_shadows = cast_shadows | ||||
|         self.frames_to_stack = frames_to_stack | ||||
|         self._pomdp_r = self.obs_prop.pomdp_r | ||||
|  | ||||
|         self.done_at_collision = done_at_collision | ||||
|         self.record_episodes = record_episodes | ||||
| @@ -130,24 +133,32 @@ class BaseFactory(gym.Env): | ||||
|             parsed_doors = h.one_hot_level(parsed_level, c.DOOR) | ||||
|             if np.any(parsed_doors): | ||||
|                 door_tiles = [floor.by_pos(pos) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL.value)] | ||||
|                 doors = Doors.from_tiles(door_tiles, self._level_shape, context=floor) | ||||
|                 doors = Doors.from_tiles(door_tiles, self._level_shape, | ||||
|                                          entity_kwargs=dict(context=floor) | ||||
|                                          ) | ||||
|                 entities.update({c.DOORS: doors}) | ||||
|  | ||||
|         # Actions | ||||
|         self._actions = Actions(self.movement_properties, can_use_doors=self.parse_doors) | ||||
|         self._actions = Actions(self.mv_prop, can_use_doors=self.parse_doors) | ||||
|         if additional_actions := self.additional_actions: | ||||
|             self._actions.register_additional_items(additional_actions) | ||||
|  | ||||
|         # Agents | ||||
|         agents = Agents.from_tiles(floor.empty_tiles[:self.n_agents], self._level_shape, | ||||
|                                    individual_slices=not self.combin_agent_obs) | ||||
|                                    individual_slices=self.obs_prop.render_agents == a_obs.SEPERATE, | ||||
|                                    hide_from_obs_builder=self.obs_prop.render_agents == a_obs.LEVEL, | ||||
|                                    is_observable=self.obs_prop.render_agents != a_obs.NOT | ||||
|                                    ) | ||||
|         entities.update({c.AGENT: agents}) | ||||
|  | ||||
|         if self.additional_agent_placeholder is not None: | ||||
|         if self.obs_prop.additional_agent_placeholder is not None: | ||||
|             # TODO: Make this accept Lists for multiple placeholders | ||||
|  | ||||
|             # Empty Observations with either [0, 1, N(0, 1)] | ||||
|             placeholder = PlaceHolders.from_tiles([self._NO_POS_TILE], self._level_shape, | ||||
|                                                   fill_value=self.additional_agent_placeholder) | ||||
|                                                   entity_kwargs=dict( | ||||
|                                                       fill_value=self.obs_prop.additional_agent_placeholder) | ||||
|                                                   ) | ||||
|  | ||||
|             entities.update({c.AGENT_PLACEHOLDER: placeholder}) | ||||
|  | ||||
| @@ -163,24 +174,11 @@ class BaseFactory(gym.Env): | ||||
|         return self._entities | ||||
|  | ||||
|     def _init_obs_cube(self): | ||||
|         arrays = self._entities.observable_arrays | ||||
|         arrays = self._entities.obs_arrays | ||||
|  | ||||
|         # FIXME: Move logic to Register | ||||
|         if self.omit_agent_in_obs and self.n_agents == 1: | ||||
|             del arrays[c.AGENT] | ||||
|         # This does not seem to be necesarry, because this case is allready handled by the Agent Register Class | ||||
|         # elif self.omit_agent_in_obs: | ||||
|         #    arrays[c.AGENT] = np.delete(arrays[c.AGENT], 0, axis=0) | ||||
|         obs_cube_z = sum([a.shape[0] if not self[key].is_per_agent else 1 for key, a in arrays.items()]) | ||||
|         self._obs_cube = np.zeros((obs_cube_z, *self._level_shape), dtype=np.float32) | ||||
|  | ||||
|         # Optionally Pad this obs cube for pomdp cases | ||||
|         if r := self.pomdp_r: | ||||
|             x, y = self._level_shape | ||||
|             # was c.SHADOW | ||||
|             self._padded_obs_cube = np.full((obs_cube_z, x + r*2, y + r*2), c.SHADOWED_CELL.value, dtype=np.float32) | ||||
|             self._padded_obs_cube[:, r:r+x, r:r+y] = self._obs_cube | ||||
|  | ||||
|     def reset(self) -> (np.ndarray, int, bool, dict): | ||||
|         _ = self._base_init_env() | ||||
|         self._init_obs_cube() | ||||
| @@ -198,7 +196,6 @@ class BaseFactory(gym.Env): | ||||
|  | ||||
|         assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]' | ||||
|         self._steps += 1 | ||||
|         done = False | ||||
|  | ||||
|         # Pre step Hook for later use | ||||
|         self.hook_pre_step() | ||||
| @@ -285,17 +282,22 @@ class BaseFactory(gym.Env): | ||||
|     def _build_per_agent_obs(self, agent: Agent, state_array_dict) -> np.ndarray: | ||||
|         agent_pos_is_omitted = False | ||||
|         agent_omit_idx = None | ||||
|         if self.omit_agent_in_obs and self.n_agents == 1: | ||||
|  | ||||
|         if self.obs_prop.omit_agent_self and self.n_agents == 1: | ||||
|             # There is only a single agent and we want to omit the agent obs, so just remove the array. | ||||
|             del state_array_dict[c.AGENT] | ||||
|         elif self.omit_agent_in_obs and self.combin_agent_obs and self.n_agents > 1: | ||||
|             # del state_array_dict[c.AGENT] | ||||
|             # Not Needed any more, | ||||
|             pass | ||||
|         elif self.obs_prop.omit_agent_self and self.obs_prop.render_agents in [a_obs.COMBINED, ] and self.n_agents > 1: | ||||
|             state_array_dict[c.AGENT][0, agent.x, agent.y] -= agent.encoding | ||||
|             agent_pos_is_omitted = True | ||||
|         elif self.omit_agent_in_obs and not self.combin_agent_obs and self.n_agents > 1: | ||||
|         elif self.obs_prop.omit_agent_self and self.obs_prop.render_agents == a_obs.SEPERATE and self.n_agents > 1: | ||||
|             agent_omit_idx = next((i for i, a in enumerate(self[c.AGENT]) if a == agent)) | ||||
|  | ||||
|         running_idx, shadowing_idxs, can_be_shadowed_idxs = 0, [], [] | ||||
|         self._obs_cube[:] = 0 | ||||
|  | ||||
|         # FIXME: Refactor this! Make a globally build observation, then add individual per-agent-obs | ||||
|         for key, array in state_array_dict.items(): | ||||
|             # Flush state array object representation to obs cube | ||||
|             if not self[key].hide_from_obs_builder: | ||||
| @@ -309,12 +311,15 @@ class BaseFactory(gym.Env): | ||||
|                         for array_idx in range(array.shape[0]): | ||||
|                             self._obs_cube[running_idx: running_idx+z] = array[[x for x in range(array.shape[0]) | ||||
|                                                                                 if x != agent_omit_idx]] | ||||
|                     elif key == c.AGENT and self.omit_agent_in_obs and self.combin_agent_obs: | ||||
|                     # Agent OBS are combined | ||||
|                     elif key == c.AGENT and self.obs_prop.omit_agent_self \ | ||||
|                             and self.obs_prop.render_agents == a_obs.COMBINED: | ||||
|                         z = 1 | ||||
|                         self._obs_cube[running_idx: running_idx + z] = array | ||||
|                     # Each Agent is rendered on a seperate array slice | ||||
|                     else: | ||||
|                         z = array.shape[0] | ||||
|                         self._obs_cube[running_idx: running_idx+z] = array | ||||
|                         self._obs_cube[running_idx: running_idx + z] = array | ||||
|                 # Define which OBS SLices cast a Shadow | ||||
|                 if self[key].is_blocking_light: | ||||
|                     for i in range(z): | ||||
| @@ -328,19 +333,14 @@ class BaseFactory(gym.Env): | ||||
|         if agent_pos_is_omitted: | ||||
|             state_array_dict[c.AGENT][0, agent.x, agent.y] += agent.encoding | ||||
|  | ||||
|         if r := self.pomdp_r: | ||||
|             self._padded_obs_cube[:] = c.SHADOWED_CELL.value   # Was c.SHADOW | ||||
|             # self._padded_obs_cube[0] = c.OCCUPIED_CELL.value | ||||
|             x, y = self._level_shape | ||||
|             self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube | ||||
|             global_x, global_y = map(sum, zip(agent.pos, (r, r))) | ||||
|             x0, x1 = max(0, global_x - self.pomdp_r), global_x + self.pomdp_r + 1 | ||||
|             y0, y1 = max(0, global_y - self.pomdp_r), global_y + self.pomdp_r + 1 | ||||
|             obs = self._padded_obs_cube[:, x0:x1, y0:y1] | ||||
|         if self._pomdp_r: | ||||
|             obs = self._do_pomdp_obs_cutout(agent, self._obs_cube) | ||||
|         else: | ||||
|             obs = self._obs_cube | ||||
|  | ||||
|         if self.cast_shadows: | ||||
|         obs = obs.copy() | ||||
|  | ||||
|         if self.obs_prop.cast_shadows: | ||||
|             obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx in shadowing_idxs] | ||||
|             door_shadowing = False | ||||
|             if self.parse_doors: | ||||
| @@ -350,8 +350,8 @@ class BaseFactory(gym.Env): | ||||
|                             for group in door.connectivity_subgroups: | ||||
|                                 if agent.last_pos not in group: | ||||
|                                     door_shadowing = True | ||||
|                                     if self.pomdp_r: | ||||
|                                         blocking = [tuple(np.subtract(x, agent.pos) + (self.pomdp_r, self.pomdp_r)) | ||||
|                                     if self._pomdp_r: | ||||
|                                         blocking = [tuple(np.subtract(x, agent.pos) + (self._pomdp_r, self._pomdp_r)) | ||||
|                                                     for x in group] | ||||
|                                         xs, ys = zip(*blocking) | ||||
|                                     else: | ||||
| @@ -361,8 +361,8 @@ class BaseFactory(gym.Env): | ||||
|                                     obs_block_light[0][xs, ys] = False | ||||
|  | ||||
|             light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int)) | ||||
|             if self.pomdp_r: | ||||
|                 light_block_map = light_block_map.do_fov(self.pomdp_r, self.pomdp_r, max(self._level_shape)) | ||||
|             if self._pomdp_r: | ||||
|                 light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape)) | ||||
|             else: | ||||
|                 light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape)) | ||||
|             if door_shadowing: | ||||
| @@ -374,6 +374,20 @@ class BaseFactory(gym.Env): | ||||
|         else: | ||||
|             pass | ||||
|  | ||||
|         # Agents observe other agents as wall | ||||
|         if self.obs_prop.render_agents == a_obs.LEVEL and self.n_agents > 1: | ||||
|             other_agent_obs = self[c.AGENT].as_array() | ||||
|             if self.obs_prop.omit_agent_self: | ||||
|                 other_agent_obs[:, agent.x, agent.y] -= agent.encoding | ||||
|  | ||||
|             if self.obs_prop.pomdp_r: | ||||
|                 oobs = self._do_pomdp_obs_cutout(agent, other_agent_obs)[0] | ||||
|                 mask = (oobs != c.SHADOWED_CELL.value).astype(int) | ||||
|                 obs[0] += oobs * mask | ||||
|  | ||||
|             else: | ||||
|                 obs[0] += other_agent_obs | ||||
|  | ||||
|         # Additional Observation: | ||||
|         for additional_obs in self.additional_obs_build(): | ||||
|             obs[running_idx:running_idx+additional_obs.shape[0]] = additional_obs | ||||
| @@ -384,6 +398,37 @@ class BaseFactory(gym.Env): | ||||
|  | ||||
|         return obs | ||||
|  | ||||
|     def _do_pomdp_obs_cutout(self, agent, obs_to_be_padded): | ||||
|         assert obs_to_be_padded.ndim == 3 | ||||
|         r, d = self._pomdp_r, self.pomdp_diameter | ||||
|         x0, x1 = max(0, agent.x - r), min(agent.x + r + 1, self._level_shape[0]) | ||||
|         y0, y1 = max(0, agent.y - r), min(agent.y + r + 1, self._level_shape[1]) | ||||
|         # Other Agent Obs = oobs | ||||
|         oobs = obs_to_be_padded[:, x0:x1, y0:y1] | ||||
|         if oobs.shape[0:] != (d,) * 2: | ||||
|             if xd := oobs.shape[1] % d: | ||||
|                 if agent.x > r: | ||||
|                     x0_pad = 0 | ||||
|                     x1_pad = (d - xd) | ||||
|                 else: | ||||
|                     x0_pad = r - agent.x | ||||
|                     x1_pad = 0 | ||||
|             else: | ||||
|                 x0_pad, x1_pad = 0, 0 | ||||
|  | ||||
|             if yd := oobs.shape[2] % d: | ||||
|                 if agent.y > r: | ||||
|                     y0_pad = 0 | ||||
|                     y1_pad = (d - yd) | ||||
|                 else: | ||||
|                     y0_pad = r - agent.y | ||||
|                     y1_pad = 0 | ||||
|             else: | ||||
|                 y0_pad, y1_pad = 0, 0 | ||||
|  | ||||
|             oobs = np.pad(oobs, ((0, 0), (x0_pad, x1_pad), (y0_pad, y1_pad)), 'constant') | ||||
|         return oobs | ||||
|  | ||||
|     def get_all_tiles_with_collisions(self) -> List[Tile]: | ||||
|         tiles_with_collisions = list() | ||||
|         for tile in self[c.FLOOR]: | ||||
| @@ -449,7 +494,7 @@ class BaseFactory(gym.Env): | ||||
|             if self._actions.is_moving_action(agent.temp_action): | ||||
|                 if agent.temp_valid: | ||||
|                     # info_dict.update(movement=1) | ||||
|                     # reward += 0.00 | ||||
|                     reward -= 0.001 | ||||
|                     pass | ||||
|                 else: | ||||
|                     reward -= 0.01 | ||||
| @@ -501,7 +546,7 @@ class BaseFactory(gym.Env): | ||||
|     def render(self, mode='human'): | ||||
|         if not self._renderer:  # lazy init | ||||
|             height, width = self._obs_cube.shape[1:] | ||||
|             self._renderer = Renderer(width, height, view_radius=self.pomdp_r, fps=5) | ||||
|             self._renderer = Renderer(width, height, view_radius=self._pomdp_r, fps=5) | ||||
|  | ||||
|         walls = [RenderEntity('wall', wall.pos) for wall in self[c.WALLS]] | ||||
|  | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| import numbers | ||||
| import random | ||||
| from abc import ABC | ||||
| from typing import List, Union, Dict | ||||
| @@ -91,21 +92,18 @@ class EntityObjectRegister(ObjectRegister, ABC): | ||||
|         raise NotImplementedError | ||||
|  | ||||
|     @classmethod | ||||
|     def from_tiles(cls, tiles, *args, **kwargs): | ||||
|     def from_tiles(cls, tiles, *args, entity_kwargs=None, **kwargs): | ||||
|         # objects_name = cls._accepted_objects.__name__ | ||||
|         register_obj = cls(*args, **kwargs) | ||||
|         try: | ||||
|             del kwargs['individual_slices'] | ||||
|         except KeyError: | ||||
|             pass | ||||
|         entities = [cls._accepted_objects(tile, str_ident=i, **kwargs) | ||||
|         entities = [cls._accepted_objects(tile, str_ident=i, **entity_kwargs if entity_kwargs is not None else {}) | ||||
|                     for i, tile in enumerate(tiles)] | ||||
|         register_obj.register_additional_items(entities) | ||||
|         return register_obj | ||||
|  | ||||
|     @classmethod | ||||
|     def from_argwhere_coordinates(cls, positions: [(int, int)], tiles, *args, **kwargs): | ||||
|         return cls.from_tiles([tiles.by_pos(position) for position in positions], *args, **kwargs) | ||||
|     def from_argwhere_coordinates(cls, positions: [(int, int)], tiles, *args, entity_kwargs=None, **kwargs, ): | ||||
|         return cls.from_tiles([tiles.by_pos(position) for position in positions], *args, entity_kwargs=entity_kwargs, | ||||
|                               **kwargs) | ||||
|  | ||||
|     @property | ||||
|     def positions(self): | ||||
| @@ -166,10 +164,15 @@ class PlaceHolders(MovingEntityObjectRegister): | ||||
|  | ||||
|     # noinspection DuplicatedCode | ||||
|     def as_array(self): | ||||
|         if isinstance(self.fill_value, int): | ||||
|         if isinstance(self.fill_value, numbers.Number): | ||||
|             self._array[:] = self.fill_value | ||||
|         elif self.fill_value == "normal": | ||||
|             self._array = np.random.normal(size=self._array.shape) | ||||
|         elif isinstance(self.fill_value, str): | ||||
|             if self.fill_value.lower() in ['normal', 'n']: | ||||
|                 self._array = np.random.normal(size=self._array.shape) | ||||
|             else: | ||||
|                 raise ValueError('Choose one of: ["normal", "N"]') | ||||
|         else: | ||||
|             raise TypeError('Objects of type "str" or "number" is required here.') | ||||
|  | ||||
|         if self.individual_slices: | ||||
|             return self._array | ||||
| @@ -183,10 +186,12 @@ class Entities(Register): | ||||
|  | ||||
|     @property | ||||
|     def observable_arrays(self): | ||||
|         # FIXME: Find a better name | ||||
|         return {key: val.as_array() for key, val in self.items() if val.is_observable} | ||||
|  | ||||
|     @property | ||||
|     def obs_arrays(self): | ||||
|         # FIXME: Find a better name | ||||
|         return {key: val.as_array() for key, val in self.items() if val.is_observable and not val.hide_from_obs_builder} | ||||
|  | ||||
|     @property | ||||
| @@ -208,6 +213,10 @@ class Entities(Register): | ||||
|     def register_additional_items(self, others: Dict): | ||||
|         return self.register_item(others) | ||||
|  | ||||
|     def by_pos(self, pos: (int, int)): | ||||
|         found_entities = [y for y in (x.by_pos(pos) for x in self.values() if hasattr(x, 'by_pos')) if y is not None] | ||||
|         return found_entities | ||||
|  | ||||
|  | ||||
| class WallTiles(EntityObjectRegister): | ||||
|     _accepted_objects = Wall | ||||
| @@ -289,6 +298,10 @@ class Agents(MovingEntityObjectRegister): | ||||
|  | ||||
|     _accepted_objects = Agent | ||||
|  | ||||
|     def __init__(self, *args, hide_from_obs_builder=False, **kwargs): | ||||
|         super().__init__(*args, **kwargs) | ||||
|         self.hide_from_obs_builder = hide_from_obs_builder | ||||
|  | ||||
|     # noinspection DuplicatedCode | ||||
|     def as_array(self): | ||||
|         self._array[:] = c.FREE_CELL.value | ||||
|   | ||||
| @@ -14,7 +14,7 @@ from environments.factory.base.registers import Entities, MovingEntityObjectRegi | ||||
|  | ||||
| from environments.factory.renderer import RenderEntity | ||||
| from environments.logging.recorder import RecorderCallback | ||||
|  | ||||
| from environments.utility_classes import ObservationProperties | ||||
|  | ||||
| CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP | ||||
|  | ||||
| @@ -65,9 +65,9 @@ class DirtRegister(MovingEntityObjectRegister): | ||||
|     def as_array(self): | ||||
|         if self._array is not None: | ||||
|             self._array[:] = c.FREE_CELL.value | ||||
|             for key, dirt in self.items(): | ||||
|             for dirt in self.values(): | ||||
|                 if dirt.amount == 0: | ||||
|                     self.delete_item(key) | ||||
|                     self.delete_item(dirt) | ||||
|                 self._array[0, dirt.x, dirt.y] = dirt.amount | ||||
|         else: | ||||
|             self._array = np.zeros((1, *self._level_shape)) | ||||
| @@ -124,21 +124,21 @@ class DirtFactory(BaseFactory): | ||||
|     @property | ||||
|     def additional_actions(self) -> Union[Action, List[Action]]: | ||||
|         super_actions = super().additional_actions | ||||
|         if self.dirt_properties.agent_can_interact: | ||||
|         if self.dirt_prop.agent_can_interact: | ||||
|             super_actions.append(Action(enum_ident=CLEAN_UP_ACTION)) | ||||
|         return super_actions | ||||
|  | ||||
|     @property | ||||
|     def additional_entities(self) -> Dict[(Enum, Entities)]: | ||||
|         super_entities = super().additional_entities | ||||
|         dirt_register = DirtRegister(self.dirt_properties, self._level_shape) | ||||
|         dirt_register = DirtRegister(self.dirt_prop, self._level_shape) | ||||
|         super_entities.update(({c.DIRT: dirt_register})) | ||||
|         return super_entities | ||||
|  | ||||
|     def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), env_seed=time.time_ns(), **kwargs): | ||||
|         if isinstance(dirt_properties, dict): | ||||
|             dirt_properties = DirtProperties(**dirt_properties) | ||||
|         self.dirt_properties = dirt_properties | ||||
|     def __init__(self, *args, dirt_prop: DirtProperties = DirtProperties(), env_seed=time.time_ns(), **kwargs): | ||||
|         if isinstance(dirt_prop, dict): | ||||
|             dirt_prop = DirtProperties(**dirt_prop) | ||||
|         self.dirt_prop = dirt_prop | ||||
|         self._dirt_rng = np.random.default_rng(env_seed) | ||||
|         self._dirt: DirtRegister | ||||
|         kwargs.update(env_seed=env_seed) | ||||
| @@ -153,7 +153,7 @@ class DirtFactory(BaseFactory): | ||||
|  | ||||
|     def clean_up(self, agent: Agent) -> c: | ||||
|         if dirt := self[c.DIRT].by_pos(agent.pos): | ||||
|             new_dirt_amount = dirt.amount - self.dirt_properties.clean_amount | ||||
|             new_dirt_amount = dirt.amount - self.dirt_prop.clean_amount | ||||
|  | ||||
|             if new_dirt_amount <= 0: | ||||
|                 self[c.DIRT].delete_item(dirt) | ||||
| @@ -170,16 +170,16 @@ class DirtFactory(BaseFactory): | ||||
|                          ] | ||||
|         self._dirt_rng.shuffle(free_for_dirt) | ||||
|         if initial_spawn: | ||||
|             var = self.dirt_properties.initial_dirt_spawn_r_var | ||||
|             new_spawn = self.dirt_properties.initial_dirt_ratio + dirt_rng.uniform(-var, var) | ||||
|             var = self.dirt_prop.initial_dirt_spawn_r_var | ||||
|             new_spawn = self.dirt_prop.initial_dirt_ratio + dirt_rng.uniform(-var, var) | ||||
|         else: | ||||
|             new_spawn = dirt_rng.uniform(0, self.dirt_properties.max_spawn_ratio) | ||||
|             new_spawn = dirt_rng.uniform(0, self.dirt_prop.max_spawn_ratio) | ||||
|         n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt))) | ||||
|         self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles]) | ||||
|  | ||||
|     def do_additional_step(self) -> dict: | ||||
|         info_dict = super().do_additional_step() | ||||
|         if smear_amount := self.dirt_properties.dirt_smear_amount: | ||||
|         if smear_amount := self.dirt_prop.dirt_smear_amount: | ||||
|             for agent in self[c.AGENT]: | ||||
|                 if agent.temp_valid and agent.last_pos != c.NO_POS: | ||||
|                     if self._actions.is_moving_action(agent.temp_action): | ||||
| @@ -196,7 +196,7 @@ class DirtFactory(BaseFactory): | ||||
|             pass  # No Dirt Spawn | ||||
|         elif not self._next_dirt_spawn: | ||||
|             self.trigger_dirt_spawn() | ||||
|             self._next_dirt_spawn = self.dirt_properties.spawn_frequency | ||||
|             self._next_dirt_spawn = self.dirt_prop.spawn_frequency | ||||
|         else: | ||||
|             self._next_dirt_spawn -= 1 | ||||
|         return info_dict | ||||
| @@ -205,7 +205,7 @@ class DirtFactory(BaseFactory): | ||||
|         valid = super().do_additional_actions(agent, action) | ||||
|         if valid is None: | ||||
|             if action == CLEAN_UP_ACTION: | ||||
|                 if self.dirt_properties.agent_can_interact: | ||||
|                 if self.dirt_prop.agent_can_interact: | ||||
|                     valid = self.clean_up(agent) | ||||
|                     return valid | ||||
|                 else: | ||||
| @@ -218,11 +218,11 @@ class DirtFactory(BaseFactory): | ||||
|     def do_additional_reset(self) -> None: | ||||
|         super().do_additional_reset() | ||||
|         self.trigger_dirt_spawn(initial_spawn=True) | ||||
|         self._next_dirt_spawn = self.dirt_properties.spawn_frequency if self.dirt_properties.spawn_frequency else -1 | ||||
|         self._next_dirt_spawn = self.dirt_prop.spawn_frequency if self.dirt_prop.spawn_frequency else -1 | ||||
|  | ||||
|     def check_additional_done(self): | ||||
|         super_done = super().check_additional_done() | ||||
|         done = self.dirt_properties.done_when_clean and (len(self[c.DIRT]) == 0) | ||||
|         done = self.dirt_prop.done_when_clean and (len(self[c.DIRT]) == 0) | ||||
|         return super_done or done | ||||
|  | ||||
|     def calculate_additional_reward(self, agent: Agent) -> (int, dict): | ||||
| @@ -256,41 +256,40 @@ class DirtFactory(BaseFactory): | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     from environments.utility_classes import AgentRenderOptions as ARO | ||||
|     render = True | ||||
|  | ||||
|     dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0.0) | ||||
|     dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0) | ||||
|  | ||||
|     obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True, pomdp_r=2, additional_agent_placeholder=None) | ||||
|  | ||||
|     move_props = {'allow_square_movement': True, | ||||
|                   'allow_diagonal_movement': False, | ||||
|                   'allow_no_op': False} #MovementProperties(True, True, False) | ||||
|                   'allow_no_op': False} | ||||
|  | ||||
|     with RecorderCallback(filepath=Path('debug_out') / f'recorder_xxxx.json', occupation_map=False, | ||||
|                           trajectory_map=False) as recorder: | ||||
|     factory = DirtFactory(n_agents=3, done_at_collision=False, | ||||
|                           level_name='rooms', max_steps=400, | ||||
|                           obs_prop=obs_props, parse_doors=True, | ||||
|                           record_episodes=True, verbose=True, | ||||
|                           mv_prop=move_props, dirt_prop=dirt_props | ||||
|                           ) | ||||
|  | ||||
|         factory = DirtFactory(n_agents=1, done_at_collision=False, frames_to_stack=0, | ||||
|                               level_name='rooms', max_steps=400, combin_agent_obs=True, | ||||
|                               omit_agent_in_obs=True, parse_doors=True, pomdp_r=3, | ||||
|                               record_episodes=True, verbose=True, cast_shadows=True, | ||||
|                               movement_properties=move_props, dirt_properties=dirt_props | ||||
|                               ) | ||||
|     # noinspection DuplicatedCode | ||||
|     n_actions = factory.action_space.n - 1 | ||||
|     _ = factory.observation_space | ||||
|  | ||||
|         # noinspection DuplicatedCode | ||||
|         n_actions = factory.action_space.n - 1 | ||||
|         _ = factory.observation_space | ||||
|  | ||||
|         for epoch in range(4): | ||||
|             random_actions = [[random.randint(0, n_actions) for _ | ||||
|                                in range(factory.n_agents)] for _ | ||||
|                               in range(factory.max_steps+1)] | ||||
|             env_state = factory.reset() | ||||
|             r = 0 | ||||
|             for agent_i_action in random_actions: | ||||
|                 env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) | ||||
|                 #recorder.read_info(0, info_obj) | ||||
|                 r += step_r | ||||
|                 if render: | ||||
|                     factory.render() | ||||
|                 if done_bool: | ||||
|                 #    recorder.read_done(0, done_bool) | ||||
|                     break | ||||
|             print(f'Factory run {epoch} done, reward is:\n    {r}') | ||||
|     pass | ||||
|     for epoch in range(4): | ||||
|         random_actions = [[random.randint(0, n_actions) for _ | ||||
|                            in range(factory.n_agents)] for _ | ||||
|                           in range(factory.max_steps+1)] | ||||
|         env_state = factory.reset() | ||||
|         r = 0 | ||||
|         for agent_i_action in random_actions: | ||||
|             env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) | ||||
|             r += step_r | ||||
|             if render: | ||||
|                 factory.render() | ||||
|             if done_bool: | ||||
|                 break | ||||
|         print(f'Factory run {epoch} done, reward is:\n    {r}') | ||||
| pass | ||||
|   | ||||
| @@ -3,6 +3,7 @@ from collections import deque, UserList | ||||
| from enum import Enum | ||||
| from typing import List, Union, NamedTuple, Dict | ||||
| import numpy as np | ||||
| import random | ||||
|  | ||||
| from environments.factory.base.base_factory import BaseFactory | ||||
| from environments.helpers import Constants as c | ||||
| @@ -18,13 +19,6 @@ NO_ITEM = 0 | ||||
| ITEM_DROP_OFF = 1 | ||||
|  | ||||
|  | ||||
| def inventory_slice_name(agent_i): | ||||
|     if isinstance(agent_i, int): | ||||
|         return f'{c.INVENTORY.name}_{c.AGENT.value}#{agent_i}' | ||||
|     else: | ||||
|         return f'{c.INVENTORY.name}_{agent_i}' | ||||
|  | ||||
|  | ||||
| class Item(MoveableEntity): | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
| @@ -77,7 +71,7 @@ class Inventory(UserList): | ||||
|  | ||||
|     @property | ||||
|     def name(self): | ||||
|         return self.agent.name | ||||
|         return f'{self.__class__.__name__}({self.agent.name})' | ||||
|  | ||||
|     def __init__(self, pomdp_r: int, level_shape: (int, int), agent: Agent, capacity: int): | ||||
|         super(Inventory, self).__init__() | ||||
| @@ -111,7 +105,8 @@ class Inventory(UserList): | ||||
|  | ||||
|     def summarize_state(self, **kwargs): | ||||
|         attr_dict = {key: str(val) for key, val in self.__dict__.items() if not key.startswith('_') and key != 'data'} | ||||
|         attr_dict.update({val.name: val.summarize_state(**kwargs) for val in self}) | ||||
|         attr_dict.update(dict(items={val.name: val.summarize_state(**kwargs) for val in self})) | ||||
|         attr_dict.update(dict(name=self.name)) | ||||
|         return attr_dict | ||||
|  | ||||
|  | ||||
| @@ -149,6 +144,11 @@ class Inventories(ObjectRegister): | ||||
|         except StopIteration: | ||||
|             return None | ||||
|  | ||||
|     def summarize_states(self, n_steps=None): | ||||
|         # as dict with additional nesting | ||||
|         # return dict(items=super(Inventories, self).summarize_states()) | ||||
|         return super(Inventories, self).summarize_states(n_steps=n_steps) | ||||
|  | ||||
|  | ||||
| class DropOffLocation(Entity): | ||||
|  | ||||
| @@ -194,6 +194,9 @@ class DropOffLocations(EntityObjectRegister): | ||||
|                 self._array[0, item.x, item.y] = item.encoding | ||||
|         return self._array | ||||
|  | ||||
|     def __repr__(self): | ||||
|         super(DropOffLocations, self).__repr__() | ||||
|  | ||||
|  | ||||
| class ItemProperties(NamedTuple): | ||||
|     n_items:                   int  = 5     # How many items are there at the same time | ||||
| @@ -207,13 +210,13 @@ class ItemProperties(NamedTuple): | ||||
| # noinspection PyAttributeOutsideInit, PyAbstractClass | ||||
| class ItemFactory(BaseFactory): | ||||
|     # noinspection PyMissingConstructor | ||||
|     def __init__(self, *args, item_properties: ItemProperties = ItemProperties(),  env_seed=time.time_ns(), **kwargs): | ||||
|         if isinstance(item_properties, dict): | ||||
|             item_properties = ItemProperties(**item_properties) | ||||
|         self.item_properties = item_properties | ||||
|     def __init__(self, *args, item_prop: ItemProperties = ItemProperties(), env_seed=time.time_ns(), **kwargs): | ||||
|         if isinstance(item_prop, dict): | ||||
|             item_prop = ItemProperties(**item_prop) | ||||
|         self.item_prop = item_prop | ||||
|         kwargs.update(env_seed=env_seed) | ||||
|         self._item_rng = np.random.default_rng(env_seed) | ||||
|         assert (item_properties.n_items <= ((1 + kwargs.get('pomdp_r', 0) * 2) ** 2)) or not kwargs.get('pomdp_r', 0) | ||||
|         assert (item_prop.n_items <= ((1 + kwargs.get('_pomdp_r', 0) * 2) ** 2)) or not kwargs.get('_pomdp_r', 0) | ||||
|         super().__init__(*args, **kwargs) | ||||
|  | ||||
|     @property | ||||
| @@ -228,16 +231,19 @@ class ItemFactory(BaseFactory): | ||||
|         # noinspection PyUnresolvedReferences | ||||
|         super_entities = super().additional_entities | ||||
|  | ||||
|         empty_tiles = self[c.FLOOR].empty_tiles[:self.item_properties.n_drop_off_locations] | ||||
|         drop_offs = DropOffLocations.from_tiles(empty_tiles, self._level_shape, | ||||
|                                                 storage_size_until_full=self.item_properties.max_dropoff_storage_size) | ||||
|         empty_tiles = self[c.FLOOR].empty_tiles[:self.item_prop.n_drop_off_locations] | ||||
|         drop_offs = DropOffLocations.from_tiles( | ||||
|             empty_tiles, self._level_shape, | ||||
|             entity_kwargs=dict( | ||||
|                 storage_size_until_full=self.item_prop.max_dropoff_storage_size) | ||||
|         ) | ||||
|         item_register = ItemRegister(self._level_shape) | ||||
|         empty_tiles = self[c.FLOOR].empty_tiles[:self.item_properties.n_items] | ||||
|         empty_tiles = self[c.FLOOR].empty_tiles[:self.item_prop.n_items] | ||||
|         item_register.spawn_items(empty_tiles) | ||||
|  | ||||
|         inventories = Inventories(self._level_shape if not self.pomdp_r else ((self.pomdp_diameter,) * 2)) | ||||
|         inventories.spawn_inventories(self[c.AGENT], self.pomdp_r, | ||||
|                                       self.item_properties.max_agent_inventory_capacity) | ||||
|         inventories = Inventories(self._level_shape if not self._pomdp_r else ((self.pomdp_diameter,) * 2)) | ||||
|         inventories.spawn_inventories(self[c.AGENT], self._pomdp_r, | ||||
|                                       self.item_prop.max_agent_inventory_capacity) | ||||
|  | ||||
|         super_entities.update({c.DROP_OFF: drop_offs, c.ITEM: item_register, c.INVENTORY: inventories}) | ||||
|         return super_entities | ||||
| @@ -270,7 +276,7 @@ class ItemFactory(BaseFactory): | ||||
|         valid = super().do_additional_actions(agent, action) | ||||
|         if valid is None: | ||||
|             if action == h.EnvActions.ITEM_ACTION: | ||||
|                 if self.item_properties.agent_can_interact: | ||||
|                 if self.item_prop.agent_can_interact: | ||||
|                     valid = self.do_item_action(agent) | ||||
|                     return valid | ||||
|                 else: | ||||
| @@ -283,14 +289,14 @@ class ItemFactory(BaseFactory): | ||||
|     def do_additional_reset(self) -> None: | ||||
|         # noinspection PyUnresolvedReferences | ||||
|         super().do_additional_reset() | ||||
|         self._next_item_spawn = self.item_properties.spawn_frequency | ||||
|         self._next_item_spawn = self.item_prop.spawn_frequency | ||||
|         self.trigger_item_spawn() | ||||
|  | ||||
|     def trigger_item_spawn(self): | ||||
|         if item_to_spawns := max(0, (self.item_properties.n_items - len(self[c.ITEM]))): | ||||
|         if item_to_spawns := max(0, (self.item_prop.n_items - len(self[c.ITEM]))): | ||||
|             empty_tiles = self[c.FLOOR].empty_tiles[:item_to_spawns] | ||||
|             self[c.ITEM].spawn_items(empty_tiles) | ||||
|             self._next_item_spawn = self.item_properties.spawn_frequency | ||||
|             self._next_item_spawn = self.item_prop.spawn_frequency | ||||
|             self.print(f'{item_to_spawns} new items have been spawned; next spawn in {self._next_item_spawn}') | ||||
|         else: | ||||
|             self.print('No Items are spawning, limit is reached.') | ||||
| @@ -351,30 +357,41 @@ class ItemFactory(BaseFactory): | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     import random | ||||
|     from environments.utility_classes import AgentRenderOptions as ARO, ObservationProperties | ||||
|  | ||||
|     render = True | ||||
|  | ||||
|     item_props = ItemProperties() | ||||
|     item_probs = ItemProperties() | ||||
|  | ||||
|     factory = ItemFactory(item_properties=item_props, n_agents=3, done_at_collision=False, frames_to_stack=0, | ||||
|                           level_name='rooms', max_steps=4000, | ||||
|                           omit_agent_in_obs=True, parse_doors=True, pomdp_r=3, | ||||
|                           record_episodes=False, verbose=False | ||||
|     obs_props = ObservationProperties(render_agents=ARO.LEVEL, omit_agent_self=True, pomdp_r=2) | ||||
|  | ||||
|     move_props = {'allow_square_movement': True, | ||||
|                   'allow_diagonal_movement': False, | ||||
|                   'allow_no_op': False} | ||||
|  | ||||
|     factory = ItemFactory(n_agents=3, done_at_collision=False, | ||||
|                           level_name='rooms', max_steps=400, | ||||
|                           obs_prop=obs_props, parse_doors=True, | ||||
|                           record_episodes=True, verbose=True, | ||||
|                           mv_prop=move_props, item_prop=item_probs | ||||
|                           ) | ||||
|  | ||||
|     # noinspection DuplicatedCode | ||||
|     n_actions = factory.action_space.n - 1 | ||||
|     _ = factory.observation_space | ||||
|  | ||||
|     for epoch in range(100): | ||||
|         random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)] | ||||
|     for epoch in range(4): | ||||
|         random_actions = [[random.randint(0, n_actions) for _ | ||||
|                            in range(factory.n_agents)] for _ | ||||
|                           in range(factory.max_steps + 1)] | ||||
|         env_state = factory.reset() | ||||
|         rew = 0 | ||||
|         r = 0 | ||||
|         for agent_i_action in random_actions: | ||||
|             env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) | ||||
|             rew += step_r | ||||
|             r += step_r | ||||
|             if render: | ||||
|                 factory.render() | ||||
|             if done_bool: | ||||
|                 break | ||||
|         print(f'Factory run {epoch} done, reward is:\n    {rew}') | ||||
|         print(f'Factory run {epoch} done, reward is:\n    {r}') | ||||
| pass | ||||
|   | ||||
| @@ -1,7 +1,24 @@ | ||||
| from typing import NamedTuple | ||||
| from enum import Enum | ||||
| from typing import NamedTuple, Union | ||||
|  | ||||
|  | ||||
| class AgentRenderOptions(object): | ||||
|     SEPERATE = 'each' | ||||
|     COMBINED = 'combined' | ||||
|     LEVEL = 'lvl' | ||||
|     NOT = 'not' | ||||
|  | ||||
|  | ||||
| class MovementProperties(NamedTuple): | ||||
|     allow_square_movement: bool = True | ||||
|     allow_diagonal_movement: bool = False | ||||
|     allow_no_op: bool = False | ||||
|  | ||||
|  | ||||
| class ObservationProperties(NamedTuple): | ||||
|     render_agents: AgentRenderOptions = AgentRenderOptions.SEPERATE | ||||
|     omit_agent_self: bool = True | ||||
|     additional_agent_placeholder: Union[None, str, int] = None | ||||
|     cast_shadows = True | ||||
|     frames_to_stack: int = 0 | ||||
|     pomdp_r: int = 0 | ||||
|   | ||||
							
								
								
									
										2
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								main.py
									
									
									
									
									
								
							| @@ -56,7 +56,7 @@ if __name__ == '__main__': | ||||
|     for modeL_type in [A2C, PPO, DQN]:  # ,RegDQN, QRDQN]: | ||||
|         for seed in range(3): | ||||
|             env_kwargs = dict(n_agents=1, | ||||
|                               # item_properties=item_props, | ||||
|                               # item_prop=item_props, | ||||
|                               dirt_properties=dirt_props, | ||||
|                               movement_properties=move_props, | ||||
|                               pomdp_r=2, max_steps=1000, parse_doors=False, | ||||
|   | ||||
| @@ -48,7 +48,7 @@ if __name__ == '__main__': | ||||
|             env_kwargs = yaml.load(f, Loader=yaml.FullLoader) | ||||
|         dirt_props = DirtProperties(clean_amount=3, gain_amount=0.2, max_global_amount=30, | ||||
|                                     max_local_amount=3, spawn_frequency=1, max_spawn_ratio=0.05) | ||||
|         # env_kwargs.update(n_agents=1, dirt_properties=dirt_props) | ||||
|         # env_kwargs.update(n_agents=1, dirt_prop=dirt_props) | ||||
|         env = DirtFactory(**env_kwargs) | ||||
|  | ||||
|         env = FrameStack(env, 4) | ||||
|   | ||||
| @@ -5,6 +5,7 @@ import numpy as np | ||||
| import yaml | ||||
|  | ||||
| from environments import helpers as h | ||||
| from environments.helpers import Constants as c | ||||
| from environments.factory.factory_dirt import DirtFactory | ||||
| from environments.factory.factory_dirt_item import DirtItemFactory | ||||
| from environments.logging.recorder import RecorderCallback | ||||
| @@ -15,29 +16,30 @@ warnings.filterwarnings('ignore', category=UserWarning) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|  | ||||
|     model_name = 'DQN_1631187073' | ||||
|     model_name = 'DQN_163519000' | ||||
|     run_id = 0 | ||||
|     seed = 69 | ||||
|     out_path = Path('debug_out/DQN_1635176929/0_DQN_1635176929') | ||||
|     n_agents = 2 | ||||
|     out_path = Path('debug_out/DQN_163519000/1_DQN_163519000') | ||||
|     model_path = out_path | ||||
|  | ||||
|     with (out_path / f'env_params.json').open('r') as f: | ||||
|         env_kwargs = yaml.load(f, Loader=yaml.FullLoader) | ||||
|         env_kwargs.update(additional_agent_placeholder=None, n_agents=4) | ||||
|         if gain_amount := env_kwargs.get('dirt_properties', {}).get('gain_amount', None): | ||||
|             env_kwargs['dirt_properties']['max_spawn_amount'] = gain_amount | ||||
|             del env_kwargs['dirt_properties']['gain_amount'] | ||||
|         env_kwargs.update(additional_agent_placeholder=None, n_agents=n_agents) | ||||
|         if gain_amount := env_kwargs.get('dirt_prop', {}).get('gain_amount', None): | ||||
|             env_kwargs['dirt_prop']['max_spawn_amount'] = gain_amount | ||||
|             del env_kwargs['dirt_prop']['gain_amount'] | ||||
|  | ||||
|         env_kwargs.update(record_episodes=True) | ||||
|         env_kwargs.update(record_episodes=False) | ||||
|  | ||||
|     this_model = out_path / 'model.zip' | ||||
|  | ||||
|     model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name) | ||||
|     models = [model_cls.load(this_model) for _ in range(4)] | ||||
|     models = [model_cls.load(this_model) for _ in range(n_agents)] | ||||
|  | ||||
|     with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json') as recorder: | ||||
|         # Init Env | ||||
|         with DirtItemFactory(**env_kwargs) as env: | ||||
|         with DirtFactory(**env_kwargs) as env: | ||||
|             obs_shape = env.observation_space.shape | ||||
|             # Evaluation Loop for i in range(n Episodes) | ||||
|             for episode in range(5): | ||||
| @@ -46,11 +48,11 @@ if __name__ == '__main__': | ||||
|                 while not done_bool: | ||||
|                     actions = [model.predict( | ||||
|                         np.stack([env_state[i][j] for i in range(env_state.shape[0])]), | ||||
|                         deterministic=True)[0] for j, model in enumerate(models)] | ||||
|                         deterministic=False)[0] for j, model in enumerate(models)] | ||||
|                     env_state, step_r, done_bool, info_obj = env.step(actions) | ||||
|                     recorder.read_info(0, info_obj) | ||||
|                     rew += step_r | ||||
|                     # env.render() | ||||
|                     env.render() | ||||
|                     if done_bool: | ||||
|                         recorder.read_done(0, done_bool) | ||||
|                         break | ||||
|   | ||||
							
								
								
									
										254
									
								
								studies/e_1.py
									
									
									
									
									
								
							
							
						
						
									
										254
									
								
								studies/e_1.py
									
									
									
									
									
								
							| @@ -26,16 +26,12 @@ from environments.factory.factory_dirt import DirtProperties, DirtFactory | ||||
| from environments.factory.factory_dirt_item import DirtItemFactory | ||||
| from environments.factory.factory_item import ItemProperties, ItemFactory | ||||
| from environments.logging.monitor import MonitorCallback | ||||
| from environments.utility_classes import MovementProperties | ||||
| from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions | ||||
| import pickle | ||||
| from plotting.compare_runs import compare_seed_runs, compare_model_runs, compare_all_parameter_runs | ||||
| import pandas as pd | ||||
| import seaborn as sns | ||||
|  | ||||
| # Define a global studi save path | ||||
| start_time = 163519000  # int(time.time()) | ||||
| study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' | ||||
|  | ||||
| """ | ||||
| In this studie, we want to explore the macro behaviour of multi agents which are trained on the same task,  | ||||
| but never saw each other in training. | ||||
| @@ -68,6 +64,10 @@ There are further distinctions to be made: | ||||
| - We are out of distribution. | ||||
| """ | ||||
|  | ||||
| n_agents = 4 | ||||
| ood_monitor_file = f'e_1_monitor_{n_agents}_agents.pick' | ||||
| baseline_monitor_file = 'e_1_baseline_monitor.pick' | ||||
|  | ||||
|  | ||||
| def policy_model_kwargs(): | ||||
|     return dict(ent_coef=0.05) | ||||
| @@ -92,11 +92,96 @@ def encapsule_env_factory(env_fctry, env_kwrgs): | ||||
|     return _init | ||||
|  | ||||
|  | ||||
| def load_model_run_baseline(seed_path, env_to_run): | ||||
|     # retrieve model class | ||||
|     model_cls = next(val for key, val in h.MODEL_MAP.items() if key in seed_path.parent.name) | ||||
|     # Load both agents | ||||
|     model = model_cls.load(seed_path / 'model.zip') | ||||
|     # Load old env kwargs | ||||
|     with next(seed_path.glob('*.json')).open('r') as f: | ||||
|         env_kwargs = simplejson.load(f) | ||||
|     # Monitor Init | ||||
|     with MonitorCallback(filepath=seed_path / baseline_monitor_file) as monitor: | ||||
|         # Init Env | ||||
|         with env_to_run(**env_kwargs) as env_factory: | ||||
|             # Evaluation Loop for i in range(n Episodes) | ||||
|             for episode in range(100): | ||||
|                 env_state = env_factory.reset() | ||||
|                 rew, done_bool = 0, False | ||||
|                 while not done_bool: | ||||
|                     action = model.predict(env_state, deterministic=True)[0] | ||||
|                     env_state, step_r, done_bool, info_obj = env_factory.step(action) | ||||
|                     monitor.read_info(0, info_obj) | ||||
|                     rew += step_r | ||||
|                     if done_bool: | ||||
|                         monitor.read_done(0, done_bool) | ||||
|                         break | ||||
|                 print(f'Factory run {episode} done, reward is:\n    {rew}') | ||||
|         # Eval monitor outputs are automatically stored by the monitor object | ||||
|         # del model, env_kwargs, env_factory | ||||
|         # import gc | ||||
|         # gc.collect() | ||||
|  | ||||
|  | ||||
| def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): | ||||
|     global model_cls | ||||
|     # retrieve model class | ||||
|     model_cls = next(val for key, val in h.MODEL_MAP.items() if key in seed_path.parent.name) | ||||
|     # Load both agents | ||||
|     models = [model_cls.load(seed_path / 'model.zip') for _ in range(n_agents)] | ||||
|     # Load old env kwargs | ||||
|     with next(seed_path.glob('*.json')).open('r') as f: | ||||
|         env_kwargs = simplejson.load(f) | ||||
|         env_kwargs.update( | ||||
|             n_agents=n_agents, | ||||
|             **additional_kwargs_dict.get('post_training_kwargs', {})) | ||||
|     # Monitor Init | ||||
|     with MonitorCallback(filepath=seed_path / ood_monitor_file) as monitor: | ||||
|         # Init Env | ||||
|         with env_to_run(**env_kwargs) as env_factory: | ||||
|             # Evaluation Loop for i in range(n Episodes) | ||||
|             for episode in range(50): | ||||
|                 env_state = env_factory.reset() | ||||
|                 rew, done_bool = 0, False | ||||
|                 while not done_bool: | ||||
|                     try: | ||||
|                         actions = [model.predict( | ||||
|                             np.stack([env_state[i][j] for i in range(env_state.shape[0])]), | ||||
|                             deterministic=False)[0] for j, model in enumerate(models)] | ||||
|                     except ValueError as e: | ||||
|                         print(e) | ||||
|                         print('Env_Kwargs are:\n') | ||||
|                         print(env_kwargs) | ||||
|                         print('Path is:\n') | ||||
|                         print(seed_path) | ||||
|                         exit() | ||||
|                     env_state, step_r, done_bool, info_obj = env_factory.step(actions) | ||||
|                     monitor.read_info(0, info_obj) | ||||
|                     rew += step_r | ||||
|                     if done_bool: | ||||
|                         monitor.read_done(0, done_bool) | ||||
|                         break | ||||
|                 print(f'Factory run {episode} done, reward is:\n    {rew}') | ||||
|     # Eval monitor outputs are automatically stored by the monitor object | ||||
|     del models, env_kwargs, env_factory | ||||
|     import gc | ||||
|     gc.collect() | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     train_steps = 8e5 | ||||
|  | ||||
|     # Define a global studi save path | ||||
|     start_time = '900000'  # int(time.time()) | ||||
|     study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' | ||||
|  | ||||
|     # Define Global Env Parameters | ||||
|     # Define properties object parameters | ||||
|     obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, | ||||
|                                       omit_agent_self=True, | ||||
|                                       frames_to_stack=3, | ||||
|                                       pomdp_r=2 | ||||
|                                       ) | ||||
|     move_props = MovementProperties(allow_diagonal_movement=True, | ||||
|                                     allow_square_movement=True, | ||||
|                                     allow_no_op=False) | ||||
| @@ -108,33 +193,67 @@ if __name__ == '__main__': | ||||
|     item_props = ItemProperties(n_items=10, agent_can_interact=True, | ||||
|                                 spawn_frequency=30, n_drop_off_locations=2, | ||||
|                                 max_agent_inventory_capacity=15) | ||||
|     factory_kwargs = dict(n_agents=1, | ||||
|                           pomdp_r=2, max_steps=400, parse_doors=True, | ||||
|                           level_name='rooms', frames_to_stack=3, | ||||
|                           omit_agent_in_obs=True, combin_agent_obs=True, record_episodes=False, | ||||
|                           cast_shadows=True, doors_have_area=False, verbose=False, | ||||
|                           movement_properties=move_props | ||||
|     factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True, | ||||
|                           level_name='rooms', record_episodes=False, doors_have_area=False, | ||||
|                           verbose=False, | ||||
|                           mv_prop=move_props, | ||||
|                           obs_prop=obs_props | ||||
|                           ) | ||||
|  | ||||
|     # Bundle both environments with global kwargs and parameters | ||||
|     env_map = {'dirt': (DirtFactory, dict(dirt_properties=dirt_props, **factory_kwargs)), | ||||
|                'item': (ItemFactory, dict(item_properties=item_props, **factory_kwargs)), | ||||
|                'itemdirt': (DirtItemFactory, dict(dirt_properties=dirt_props, item_properties=item_props, | ||||
|     env_map = {'dirt': (DirtFactory, dict(dirt_prop=dirt_props, | ||||
|                                           **factory_kwargs)), | ||||
|                'item': (ItemFactory, dict(item_prop=item_props, | ||||
|                                           **factory_kwargs)), | ||||
|                'itemdirt': (DirtItemFactory, dict(dirt_prop=dirt_props, | ||||
|                                                   item_prop=item_props, | ||||
|                                                   **factory_kwargs))} | ||||
|     env_names = list(env_map.keys()) | ||||
|  | ||||
|     # Define parameter versions according with #1,2[1,0,N],3 | ||||
|     observation_modes = { | ||||
|         #  Fill-value = 0 | ||||
|          # DEACTIVATED 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)), | ||||
|         # DEACTIVATED 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)), | ||||
|         #  Fill-value = 1 | ||||
|         # DEACTIVATED 'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)), | ||||
|         #  Fill-value = N(0, 1) | ||||
|         'seperate_N': dict(additional_env_kwargs=dict(additional_agent_placeholder='N')), | ||||
|         #  Further Adjustments are done post-training | ||||
|         'in_lvl_obs': dict(post_training_kwargs=dict(other_agent_obs='in_lvl')), | ||||
|         'seperate_N': dict( | ||||
|             post_training_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.COMBINED, | ||||
|                 additional_agent_placeholder=None, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ), | ||||
|             additional_env_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.NOT, | ||||
|                 additional_agent_placeholder='N', | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ) | ||||
|         ), | ||||
|         'in_lvl_obs': dict( | ||||
|             post_training_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.LEVEL, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ) | ||||
|         ), | ||||
|         #  No further adjustment needed | ||||
|         'no_obs': {} | ||||
|         'no_obs': dict( | ||||
|             post_training_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.NOT, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ) | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     # Train starts here ############################################################ | ||||
| @@ -223,52 +342,27 @@ if __name__ == '__main__': | ||||
|  | ||||
|     # Evaluation starts here ##################################################### | ||||
|     # First Iterate over every model and monitor "as trained" | ||||
|     baseline_monitor_file = 'e_1_baseline_monitor.pick' | ||||
|     if True: | ||||
|         render = False | ||||
|         for observation_mode in observation_modes: | ||||
|             obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode) | ||||
|             # For trained policy in study_root_path / identifier | ||||
|             for env_path in [x for x in obs_mode_path.iterdir() if x.is_dir()]: | ||||
|                 for policy_path in [x for x in env_path.iterdir() if x. is_dir()]: | ||||
|                     # Iteration | ||||
|                     for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): | ||||
|                         # retrieve model class | ||||
|                         for model_cls in (val for key, val in h.MODEL_MAP.items() if key in policy_path.name): | ||||
|                             # Load both agents | ||||
|                             model = model_cls.load(seed_path / 'model.zip') | ||||
|                             # Load old env kwargs | ||||
|                             with next(seed_path.glob('*.json')).open('r') as f: | ||||
|                                 env_kwargs = simplejson.load(f) | ||||
|                             # Monitor Init | ||||
|                             with MonitorCallback(filepath=seed_path / baseline_monitor_file) as monitor: | ||||
|                                 # Init Env | ||||
|                                 with env_map[env_path.name][0](**env_kwargs) as env_factory: | ||||
|                                     # Evaluation Loop for i in range(n Episodes) | ||||
|                                     for episode in range(100): | ||||
|                                         env_state = env_factory.reset() | ||||
|                                         rew, done_bool = 0, False | ||||
|                                         while not done_bool: | ||||
|                                             action = model.predict(env_state, deterministic=True)[0] | ||||
|                                             env_state, step_r, done_bool, info_obj = env_factory.step(action) | ||||
|                                             monitor.read_info(0, info_obj) | ||||
|                                             rew += step_r | ||||
|                                             if render: | ||||
|                                                 env_factory.render() | ||||
|                                             if done_bool: | ||||
|                                                 monitor.read_done(0, done_bool) | ||||
|                                                 break | ||||
|                                         print(f'Factory run {episode} done, reward is:\n    {rew}') | ||||
|                                 # Eval monitor outputs are automatically stored by the monitor object | ||||
|                             del model, env_kwargs, env_factory | ||||
|                             import gc | ||||
|                     paths = list(y for y in policy_path.iterdir() if y.is_dir() \ | ||||
|                                  and not (y / baseline_monitor_file).exists()) | ||||
|                     import multiprocessing as mp | ||||
|                     import itertools as it | ||||
|                     pool = mp.Pool(mp.cpu_count()) | ||||
|                     result = pool.starmap(load_model_run_baseline, | ||||
|                                           it.product(paths, | ||||
|                                                      (env_map[env_path.name][0],)) | ||||
|                                           ) | ||||
|  | ||||
|                             gc.collect() | ||||
|                     # for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): | ||||
|                     #    load_model_run_baseline(seed_path) | ||||
|  | ||||
|     # Then iterate over every model and monitor "ood behavior" - "is it ood?" | ||||
|     n_agents = 4 | ||||
|     ood_monitor_file = f'e_1_monitor_{n_agents}_agents.pick' | ||||
|  | ||||
|     if True: | ||||
|         for observation_mode in observation_modes: | ||||
|             obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode) | ||||
| @@ -279,44 +373,18 @@ if __name__ == '__main__': | ||||
|                     # First seed path version | ||||
|                     # seed_path = next((y for y in policy_path.iterdir() if y.is_dir())) | ||||
|                     # Iteration | ||||
|                     for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): | ||||
|                         if (seed_path / ood_monitor_file).exists(): | ||||
|                             continue | ||||
|                         # retrieve model class | ||||
|                         for model_cls in (val for key, val in h.MODEL_MAP.items() if key in policy_path.name): | ||||
|                             # Load both agents | ||||
|                             models = [model_cls.load(seed_path / 'model.zip') for _ in range(n_agents)] | ||||
|                             # Load old env kwargs | ||||
|                             with next(seed_path.glob('*.json')).open('r') as f: | ||||
|                                 env_kwargs = simplejson.load(f) | ||||
|                                 env_kwargs.update( | ||||
|                                     n_agents=n_agents, additional_agent_placeholder=None, | ||||
|                                     **observation_modes[observation_mode].get('post_training_env_kwargs', {})) | ||||
|  | ||||
|                             # Monitor Init | ||||
|                             with MonitorCallback(filepath=seed_path / ood_monitor_file) as monitor: | ||||
|                                 # Init Env | ||||
|                                 with env_map[env_path.name][0](**env_kwargs) as env_factory: | ||||
|                                     # Evaluation Loop for i in range(n Episodes) | ||||
|                                     for episode in range(50): | ||||
|                                         env_state = env_factory.reset() | ||||
|                                         rew, done_bool = 0, False | ||||
|                                         while not done_bool: | ||||
|                                             actions = [model.predict( | ||||
|                                                 np.stack([env_state[i][j] for i in range(env_state.shape[0])]), | ||||
|                                                 deterministic=False)[0] for j, model in enumerate(models)] | ||||
|                                             env_state, step_r, done_bool, info_obj = env_factory.step(actions) | ||||
|                                             monitor.read_info(0, info_obj) | ||||
|                                             rew += step_r | ||||
|                                             if done_bool: | ||||
|                                                 monitor.read_done(0, done_bool) | ||||
|                                                 break | ||||
|                                         print(f'Factory run {episode} done, reward is:\n    {rew}') | ||||
|                                     # Eval monitor outputs are automatically stored by the monitor object | ||||
|                             del models, env_kwargs, env_factory | ||||
|                             import gc | ||||
|  | ||||
|                             gc.collect() | ||||
|                     import multiprocessing as mp | ||||
|                     import itertools as it | ||||
|                     pool = mp.Pool(mp.cpu_count()) | ||||
|                     paths = list(y for y in policy_path.iterdir() if y.is_dir() \ | ||||
|                                  and not (y / ood_monitor_file).exists()) | ||||
|                     result = pool.starmap(load_model_run_study, | ||||
|                                           it.product(paths, | ||||
|                                                      (env_map[env_path.name][0],), | ||||
|                                                      (observation_modes[observation_mode],)) | ||||
|                                           ) | ||||
|                     # for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): | ||||
|                     #     load_model_run_study(seed_path) | ||||
|  | ||||
|     # Plotting | ||||
|     if True: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Steffen Illium
					Steffen Illium