diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py index ac0be21..36d58ed 100644 --- a/environments/factory/base/base_factory.py +++ b/environments/factory/base/base_factory.py @@ -16,7 +16,8 @@ from environments.helpers import Constants as c, Constants from environments import helpers as h from environments.factory.base.objects import Agent, Tile, Action from environments.factory.base.registers import Actions, Entities, Agents, Doors, FloorTiles, WallTiles, PlaceHolders -from environments.utility_classes import MovementProperties +from environments.utility_classes import MovementProperties, ObservationProperties +from environments.utility_classes import AgentRenderOptions as a_obs import simplejson @@ -33,7 +34,7 @@ class BaseFactory(gym.Env): @property def observation_space(self): - if r := self.pomdp_r: + if r := self._pomdp_r: z = self._obs_cube.shape[0] xy = r*2 + 1 level_shape = (z, xy, xy) @@ -44,24 +45,32 @@ class BaseFactory(gym.Env): @property def pomdp_diameter(self): - return self.pomdp_r * 2 + 1 + return self._pomdp_r * 2 + 1 @property def movement_actions(self): return self._actions.movement_actions def __enter__(self): - return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack) + return self if self.obs_prop.frames_to_stack == 0 else \ + FrameStack(self, self.obs_prop.frames_to_stack) def __exit__(self, exc_type, exc_val, exc_tb): self.close() - def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_r: Union[None, int] = 0, - movement_properties: MovementProperties = MovementProperties(), parse_doors=False, - combin_agent_obs: bool = False, frames_to_stack=0, record_episodes=False, - omit_agent_in_obs=False, done_at_collision=False, cast_shadows=True, additional_agent_placeholder=None, + def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), + mv_prop: MovementProperties = MovementProperties(), + obs_prop: ObservationProperties = ObservationProperties(), + parse_doors=False, record_episodes=False, done_at_collision=False, verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs): - assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1." + + if isinstance(mv_prop, dict): + mv_prop = MovementProperties(**mv_prop) + if isinstance(obs_prop, dict): + obs_prop = ObservationProperties(**obs_prop) + + assert obs_prop.frames_to_stack != 1 and \ + obs_prop.frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1." if kwargs: print(f'Following kwargs were passed, but ignored: {kwargs}') @@ -69,24 +78,18 @@ class BaseFactory(gym.Env): self.env_seed = env_seed self.seed(env_seed) self._base_rng = np.random.default_rng(self.env_seed) - if isinstance(movement_properties, dict): - movement_properties = MovementProperties(**movement_properties) - self.movement_properties = movement_properties + self.mv_prop = mv_prop + self.obs_prop = obs_prop self.level_name = level_name self._level_shape = None self.verbose = verbose - self.additional_agent_placeholder = additional_agent_placeholder self._renderer = None # expensive - don't use it when not required ! self._entities = Entities() self.n_agents = n_agents self.max_steps = max_steps - self.pomdp_r = pomdp_r - self.combin_agent_obs = combin_agent_obs - self.omit_agent_in_obs = omit_agent_in_obs - self.cast_shadows = cast_shadows - self.frames_to_stack = frames_to_stack + self._pomdp_r = self.obs_prop.pomdp_r self.done_at_collision = done_at_collision self.record_episodes = record_episodes @@ -130,24 +133,32 @@ class BaseFactory(gym.Env): parsed_doors = h.one_hot_level(parsed_level, c.DOOR) if np.any(parsed_doors): door_tiles = [floor.by_pos(pos) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL.value)] - doors = Doors.from_tiles(door_tiles, self._level_shape, context=floor) + doors = Doors.from_tiles(door_tiles, self._level_shape, + entity_kwargs=dict(context=floor) + ) entities.update({c.DOORS: doors}) # Actions - self._actions = Actions(self.movement_properties, can_use_doors=self.parse_doors) + self._actions = Actions(self.mv_prop, can_use_doors=self.parse_doors) if additional_actions := self.additional_actions: self._actions.register_additional_items(additional_actions) # Agents agents = Agents.from_tiles(floor.empty_tiles[:self.n_agents], self._level_shape, - individual_slices=not self.combin_agent_obs) + individual_slices=self.obs_prop.render_agents == a_obs.SEPERATE, + hide_from_obs_builder=self.obs_prop.render_agents == a_obs.LEVEL, + is_observable=self.obs_prop.render_agents != a_obs.NOT + ) entities.update({c.AGENT: agents}) - if self.additional_agent_placeholder is not None: + if self.obs_prop.additional_agent_placeholder is not None: + # TODO: Make this accept Lists for multiple placeholders # Empty Observations with either [0, 1, N(0, 1)] placeholder = PlaceHolders.from_tiles([self._NO_POS_TILE], self._level_shape, - fill_value=self.additional_agent_placeholder) + entity_kwargs=dict( + fill_value=self.obs_prop.additional_agent_placeholder) + ) entities.update({c.AGENT_PLACEHOLDER: placeholder}) @@ -163,24 +174,11 @@ class BaseFactory(gym.Env): return self._entities def _init_obs_cube(self): - arrays = self._entities.observable_arrays + arrays = self._entities.obs_arrays - # FIXME: Move logic to Register - if self.omit_agent_in_obs and self.n_agents == 1: - del arrays[c.AGENT] - # This does not seem to be necesarry, because this case is allready handled by the Agent Register Class - # elif self.omit_agent_in_obs: - # arrays[c.AGENT] = np.delete(arrays[c.AGENT], 0, axis=0) obs_cube_z = sum([a.shape[0] if not self[key].is_per_agent else 1 for key, a in arrays.items()]) self._obs_cube = np.zeros((obs_cube_z, *self._level_shape), dtype=np.float32) - # Optionally Pad this obs cube for pomdp cases - if r := self.pomdp_r: - x, y = self._level_shape - # was c.SHADOW - self._padded_obs_cube = np.full((obs_cube_z, x + r*2, y + r*2), c.SHADOWED_CELL.value, dtype=np.float32) - self._padded_obs_cube[:, r:r+x, r:r+y] = self._obs_cube - def reset(self) -> (np.ndarray, int, bool, dict): _ = self._base_init_env() self._init_obs_cube() @@ -198,7 +196,6 @@ class BaseFactory(gym.Env): assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]' self._steps += 1 - done = False # Pre step Hook for later use self.hook_pre_step() @@ -285,17 +282,22 @@ class BaseFactory(gym.Env): def _build_per_agent_obs(self, agent: Agent, state_array_dict) -> np.ndarray: agent_pos_is_omitted = False agent_omit_idx = None - if self.omit_agent_in_obs and self.n_agents == 1: + + if self.obs_prop.omit_agent_self and self.n_agents == 1: # There is only a single agent and we want to omit the agent obs, so just remove the array. - del state_array_dict[c.AGENT] - elif self.omit_agent_in_obs and self.combin_agent_obs and self.n_agents > 1: + # del state_array_dict[c.AGENT] + # Not Needed any more, + pass + elif self.obs_prop.omit_agent_self and self.obs_prop.render_agents in [a_obs.COMBINED, ] and self.n_agents > 1: state_array_dict[c.AGENT][0, agent.x, agent.y] -= agent.encoding agent_pos_is_omitted = True - elif self.omit_agent_in_obs and not self.combin_agent_obs and self.n_agents > 1: + elif self.obs_prop.omit_agent_self and self.obs_prop.render_agents == a_obs.SEPERATE and self.n_agents > 1: agent_omit_idx = next((i for i, a in enumerate(self[c.AGENT]) if a == agent)) running_idx, shadowing_idxs, can_be_shadowed_idxs = 0, [], [] + self._obs_cube[:] = 0 + # FIXME: Refactor this! Make a globally build observation, then add individual per-agent-obs for key, array in state_array_dict.items(): # Flush state array object representation to obs cube if not self[key].hide_from_obs_builder: @@ -309,12 +311,15 @@ class BaseFactory(gym.Env): for array_idx in range(array.shape[0]): self._obs_cube[running_idx: running_idx+z] = array[[x for x in range(array.shape[0]) if x != agent_omit_idx]] - elif key == c.AGENT and self.omit_agent_in_obs and self.combin_agent_obs: + # Agent OBS are combined + elif key == c.AGENT and self.obs_prop.omit_agent_self \ + and self.obs_prop.render_agents == a_obs.COMBINED: z = 1 self._obs_cube[running_idx: running_idx + z] = array + # Each Agent is rendered on a seperate array slice else: z = array.shape[0] - self._obs_cube[running_idx: running_idx+z] = array + self._obs_cube[running_idx: running_idx + z] = array # Define which OBS SLices cast a Shadow if self[key].is_blocking_light: for i in range(z): @@ -328,19 +333,14 @@ class BaseFactory(gym.Env): if agent_pos_is_omitted: state_array_dict[c.AGENT][0, agent.x, agent.y] += agent.encoding - if r := self.pomdp_r: - self._padded_obs_cube[:] = c.SHADOWED_CELL.value # Was c.SHADOW - # self._padded_obs_cube[0] = c.OCCUPIED_CELL.value - x, y = self._level_shape - self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube - global_x, global_y = map(sum, zip(agent.pos, (r, r))) - x0, x1 = max(0, global_x - self.pomdp_r), global_x + self.pomdp_r + 1 - y0, y1 = max(0, global_y - self.pomdp_r), global_y + self.pomdp_r + 1 - obs = self._padded_obs_cube[:, x0:x1, y0:y1] + if self._pomdp_r: + obs = self._do_pomdp_obs_cutout(agent, self._obs_cube) else: obs = self._obs_cube - if self.cast_shadows: + obs = obs.copy() + + if self.obs_prop.cast_shadows: obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx in shadowing_idxs] door_shadowing = False if self.parse_doors: @@ -350,8 +350,8 @@ class BaseFactory(gym.Env): for group in door.connectivity_subgroups: if agent.last_pos not in group: door_shadowing = True - if self.pomdp_r: - blocking = [tuple(np.subtract(x, agent.pos) + (self.pomdp_r, self.pomdp_r)) + if self._pomdp_r: + blocking = [tuple(np.subtract(x, agent.pos) + (self._pomdp_r, self._pomdp_r)) for x in group] xs, ys = zip(*blocking) else: @@ -361,8 +361,8 @@ class BaseFactory(gym.Env): obs_block_light[0][xs, ys] = False light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int)) - if self.pomdp_r: - light_block_map = light_block_map.do_fov(self.pomdp_r, self.pomdp_r, max(self._level_shape)) + if self._pomdp_r: + light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape)) else: light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape)) if door_shadowing: @@ -374,6 +374,20 @@ class BaseFactory(gym.Env): else: pass + # Agents observe other agents as wall + if self.obs_prop.render_agents == a_obs.LEVEL and self.n_agents > 1: + other_agent_obs = self[c.AGENT].as_array() + if self.obs_prop.omit_agent_self: + other_agent_obs[:, agent.x, agent.y] -= agent.encoding + + if self.obs_prop.pomdp_r: + oobs = self._do_pomdp_obs_cutout(agent, other_agent_obs)[0] + mask = (oobs != c.SHADOWED_CELL.value).astype(int) + obs[0] += oobs * mask + + else: + obs[0] += other_agent_obs + # Additional Observation: for additional_obs in self.additional_obs_build(): obs[running_idx:running_idx+additional_obs.shape[0]] = additional_obs @@ -384,6 +398,37 @@ class BaseFactory(gym.Env): return obs + def _do_pomdp_obs_cutout(self, agent, obs_to_be_padded): + assert obs_to_be_padded.ndim == 3 + r, d = self._pomdp_r, self.pomdp_diameter + x0, x1 = max(0, agent.x - r), min(agent.x + r + 1, self._level_shape[0]) + y0, y1 = max(0, agent.y - r), min(agent.y + r + 1, self._level_shape[1]) + # Other Agent Obs = oobs + oobs = obs_to_be_padded[:, x0:x1, y0:y1] + if oobs.shape[0:] != (d,) * 2: + if xd := oobs.shape[1] % d: + if agent.x > r: + x0_pad = 0 + x1_pad = (d - xd) + else: + x0_pad = r - agent.x + x1_pad = 0 + else: + x0_pad, x1_pad = 0, 0 + + if yd := oobs.shape[2] % d: + if agent.y > r: + y0_pad = 0 + y1_pad = (d - yd) + else: + y0_pad = r - agent.y + y1_pad = 0 + else: + y0_pad, y1_pad = 0, 0 + + oobs = np.pad(oobs, ((0, 0), (x0_pad, x1_pad), (y0_pad, y1_pad)), 'constant') + return oobs + def get_all_tiles_with_collisions(self) -> List[Tile]: tiles_with_collisions = list() for tile in self[c.FLOOR]: @@ -449,7 +494,7 @@ class BaseFactory(gym.Env): if self._actions.is_moving_action(agent.temp_action): if agent.temp_valid: # info_dict.update(movement=1) - # reward += 0.00 + reward -= 0.001 pass else: reward -= 0.01 @@ -501,7 +546,7 @@ class BaseFactory(gym.Env): def render(self, mode='human'): if not self._renderer: # lazy init height, width = self._obs_cube.shape[1:] - self._renderer = Renderer(width, height, view_radius=self.pomdp_r, fps=5) + self._renderer = Renderer(width, height, view_radius=self._pomdp_r, fps=5) walls = [RenderEntity('wall', wall.pos) for wall in self[c.WALLS]] diff --git a/environments/factory/base/registers.py b/environments/factory/base/registers.py index ef1bcd2..b058f0d 100644 --- a/environments/factory/base/registers.py +++ b/environments/factory/base/registers.py @@ -1,3 +1,4 @@ +import numbers import random from abc import ABC from typing import List, Union, Dict @@ -91,21 +92,18 @@ class EntityObjectRegister(ObjectRegister, ABC): raise NotImplementedError @classmethod - def from_tiles(cls, tiles, *args, **kwargs): + def from_tiles(cls, tiles, *args, entity_kwargs=None, **kwargs): # objects_name = cls._accepted_objects.__name__ register_obj = cls(*args, **kwargs) - try: - del kwargs['individual_slices'] - except KeyError: - pass - entities = [cls._accepted_objects(tile, str_ident=i, **kwargs) + entities = [cls._accepted_objects(tile, str_ident=i, **entity_kwargs if entity_kwargs is not None else {}) for i, tile in enumerate(tiles)] register_obj.register_additional_items(entities) return register_obj @classmethod - def from_argwhere_coordinates(cls, positions: [(int, int)], tiles, *args, **kwargs): - return cls.from_tiles([tiles.by_pos(position) for position in positions], *args, **kwargs) + def from_argwhere_coordinates(cls, positions: [(int, int)], tiles, *args, entity_kwargs=None, **kwargs, ): + return cls.from_tiles([tiles.by_pos(position) for position in positions], *args, entity_kwargs=entity_kwargs, + **kwargs) @property def positions(self): @@ -166,10 +164,15 @@ class PlaceHolders(MovingEntityObjectRegister): # noinspection DuplicatedCode def as_array(self): - if isinstance(self.fill_value, int): + if isinstance(self.fill_value, numbers.Number): self._array[:] = self.fill_value - elif self.fill_value == "normal": - self._array = np.random.normal(size=self._array.shape) + elif isinstance(self.fill_value, str): + if self.fill_value.lower() in ['normal', 'n']: + self._array = np.random.normal(size=self._array.shape) + else: + raise ValueError('Choose one of: ["normal", "N"]') + else: + raise TypeError('Objects of type "str" or "number" is required here.') if self.individual_slices: return self._array @@ -183,10 +186,12 @@ class Entities(Register): @property def observable_arrays(self): + # FIXME: Find a better name return {key: val.as_array() for key, val in self.items() if val.is_observable} @property def obs_arrays(self): + # FIXME: Find a better name return {key: val.as_array() for key, val in self.items() if val.is_observable and not val.hide_from_obs_builder} @property @@ -208,6 +213,10 @@ class Entities(Register): def register_additional_items(self, others: Dict): return self.register_item(others) + def by_pos(self, pos: (int, int)): + found_entities = [y for y in (x.by_pos(pos) for x in self.values() if hasattr(x, 'by_pos')) if y is not None] + return found_entities + class WallTiles(EntityObjectRegister): _accepted_objects = Wall @@ -289,6 +298,10 @@ class Agents(MovingEntityObjectRegister): _accepted_objects = Agent + def __init__(self, *args, hide_from_obs_builder=False, **kwargs): + super().__init__(*args, **kwargs) + self.hide_from_obs_builder = hide_from_obs_builder + # noinspection DuplicatedCode def as_array(self): self._array[:] = c.FREE_CELL.value diff --git a/environments/factory/factory_dirt.py b/environments/factory/factory_dirt.py index 15a7f3d..449e5aa 100644 --- a/environments/factory/factory_dirt.py +++ b/environments/factory/factory_dirt.py @@ -14,7 +14,7 @@ from environments.factory.base.registers import Entities, MovingEntityObjectRegi from environments.factory.renderer import RenderEntity from environments.logging.recorder import RecorderCallback - +from environments.utility_classes import ObservationProperties CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP @@ -65,9 +65,9 @@ class DirtRegister(MovingEntityObjectRegister): def as_array(self): if self._array is not None: self._array[:] = c.FREE_CELL.value - for key, dirt in self.items(): + for dirt in self.values(): if dirt.amount == 0: - self.delete_item(key) + self.delete_item(dirt) self._array[0, dirt.x, dirt.y] = dirt.amount else: self._array = np.zeros((1, *self._level_shape)) @@ -124,21 +124,21 @@ class DirtFactory(BaseFactory): @property def additional_actions(self) -> Union[Action, List[Action]]: super_actions = super().additional_actions - if self.dirt_properties.agent_can_interact: + if self.dirt_prop.agent_can_interact: super_actions.append(Action(enum_ident=CLEAN_UP_ACTION)) return super_actions @property def additional_entities(self) -> Dict[(Enum, Entities)]: super_entities = super().additional_entities - dirt_register = DirtRegister(self.dirt_properties, self._level_shape) + dirt_register = DirtRegister(self.dirt_prop, self._level_shape) super_entities.update(({c.DIRT: dirt_register})) return super_entities - def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), env_seed=time.time_ns(), **kwargs): - if isinstance(dirt_properties, dict): - dirt_properties = DirtProperties(**dirt_properties) - self.dirt_properties = dirt_properties + def __init__(self, *args, dirt_prop: DirtProperties = DirtProperties(), env_seed=time.time_ns(), **kwargs): + if isinstance(dirt_prop, dict): + dirt_prop = DirtProperties(**dirt_prop) + self.dirt_prop = dirt_prop self._dirt_rng = np.random.default_rng(env_seed) self._dirt: DirtRegister kwargs.update(env_seed=env_seed) @@ -153,7 +153,7 @@ class DirtFactory(BaseFactory): def clean_up(self, agent: Agent) -> c: if dirt := self[c.DIRT].by_pos(agent.pos): - new_dirt_amount = dirt.amount - self.dirt_properties.clean_amount + new_dirt_amount = dirt.amount - self.dirt_prop.clean_amount if new_dirt_amount <= 0: self[c.DIRT].delete_item(dirt) @@ -170,16 +170,16 @@ class DirtFactory(BaseFactory): ] self._dirt_rng.shuffle(free_for_dirt) if initial_spawn: - var = self.dirt_properties.initial_dirt_spawn_r_var - new_spawn = self.dirt_properties.initial_dirt_ratio + dirt_rng.uniform(-var, var) + var = self.dirt_prop.initial_dirt_spawn_r_var + new_spawn = self.dirt_prop.initial_dirt_ratio + dirt_rng.uniform(-var, var) else: - new_spawn = dirt_rng.uniform(0, self.dirt_properties.max_spawn_ratio) + new_spawn = dirt_rng.uniform(0, self.dirt_prop.max_spawn_ratio) n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt))) self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles]) def do_additional_step(self) -> dict: info_dict = super().do_additional_step() - if smear_amount := self.dirt_properties.dirt_smear_amount: + if smear_amount := self.dirt_prop.dirt_smear_amount: for agent in self[c.AGENT]: if agent.temp_valid and agent.last_pos != c.NO_POS: if self._actions.is_moving_action(agent.temp_action): @@ -196,7 +196,7 @@ class DirtFactory(BaseFactory): pass # No Dirt Spawn elif not self._next_dirt_spawn: self.trigger_dirt_spawn() - self._next_dirt_spawn = self.dirt_properties.spawn_frequency + self._next_dirt_spawn = self.dirt_prop.spawn_frequency else: self._next_dirt_spawn -= 1 return info_dict @@ -205,7 +205,7 @@ class DirtFactory(BaseFactory): valid = super().do_additional_actions(agent, action) if valid is None: if action == CLEAN_UP_ACTION: - if self.dirt_properties.agent_can_interact: + if self.dirt_prop.agent_can_interact: valid = self.clean_up(agent) return valid else: @@ -218,11 +218,11 @@ class DirtFactory(BaseFactory): def do_additional_reset(self) -> None: super().do_additional_reset() self.trigger_dirt_spawn(initial_spawn=True) - self._next_dirt_spawn = self.dirt_properties.spawn_frequency if self.dirt_properties.spawn_frequency else -1 + self._next_dirt_spawn = self.dirt_prop.spawn_frequency if self.dirt_prop.spawn_frequency else -1 def check_additional_done(self): super_done = super().check_additional_done() - done = self.dirt_properties.done_when_clean and (len(self[c.DIRT]) == 0) + done = self.dirt_prop.done_when_clean and (len(self[c.DIRT]) == 0) return super_done or done def calculate_additional_reward(self, agent: Agent) -> (int, dict): @@ -256,41 +256,40 @@ class DirtFactory(BaseFactory): if __name__ == '__main__': + from environments.utility_classes import AgentRenderOptions as ARO render = True - dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0.0) + dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0) + + obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True, pomdp_r=2, additional_agent_placeholder=None) + move_props = {'allow_square_movement': True, 'allow_diagonal_movement': False, - 'allow_no_op': False} #MovementProperties(True, True, False) + 'allow_no_op': False} - with RecorderCallback(filepath=Path('debug_out') / f'recorder_xxxx.json', occupation_map=False, - trajectory_map=False) as recorder: + factory = DirtFactory(n_agents=3, done_at_collision=False, + level_name='rooms', max_steps=400, + obs_prop=obs_props, parse_doors=True, + record_episodes=True, verbose=True, + mv_prop=move_props, dirt_prop=dirt_props + ) - factory = DirtFactory(n_agents=1, done_at_collision=False, frames_to_stack=0, - level_name='rooms', max_steps=400, combin_agent_obs=True, - omit_agent_in_obs=True, parse_doors=True, pomdp_r=3, - record_episodes=True, verbose=True, cast_shadows=True, - movement_properties=move_props, dirt_properties=dirt_props - ) + # noinspection DuplicatedCode + n_actions = factory.action_space.n - 1 + _ = factory.observation_space - # noinspection DuplicatedCode - n_actions = factory.action_space.n - 1 - _ = factory.observation_space - - for epoch in range(4): - random_actions = [[random.randint(0, n_actions) for _ - in range(factory.n_agents)] for _ - in range(factory.max_steps+1)] - env_state = factory.reset() - r = 0 - for agent_i_action in random_actions: - env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) - #recorder.read_info(0, info_obj) - r += step_r - if render: - factory.render() - if done_bool: - # recorder.read_done(0, done_bool) - break - print(f'Factory run {epoch} done, reward is:\n {r}') - pass + for epoch in range(4): + random_actions = [[random.randint(0, n_actions) for _ + in range(factory.n_agents)] for _ + in range(factory.max_steps+1)] + env_state = factory.reset() + r = 0 + for agent_i_action in random_actions: + env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) + r += step_r + if render: + factory.render() + if done_bool: + break + print(f'Factory run {epoch} done, reward is:\n {r}') +pass diff --git a/environments/factory/factory_item.py b/environments/factory/factory_item.py index 18fd4a5..7b135af 100644 --- a/environments/factory/factory_item.py +++ b/environments/factory/factory_item.py @@ -3,6 +3,7 @@ from collections import deque, UserList from enum import Enum from typing import List, Union, NamedTuple, Dict import numpy as np +import random from environments.factory.base.base_factory import BaseFactory from environments.helpers import Constants as c @@ -18,13 +19,6 @@ NO_ITEM = 0 ITEM_DROP_OFF = 1 -def inventory_slice_name(agent_i): - if isinstance(agent_i, int): - return f'{c.INVENTORY.name}_{c.AGENT.value}#{agent_i}' - else: - return f'{c.INVENTORY.name}_{agent_i}' - - class Item(MoveableEntity): def __init__(self, *args, **kwargs): @@ -77,7 +71,7 @@ class Inventory(UserList): @property def name(self): - return self.agent.name + return f'{self.__class__.__name__}({self.agent.name})' def __init__(self, pomdp_r: int, level_shape: (int, int), agent: Agent, capacity: int): super(Inventory, self).__init__() @@ -111,7 +105,8 @@ class Inventory(UserList): def summarize_state(self, **kwargs): attr_dict = {key: str(val) for key, val in self.__dict__.items() if not key.startswith('_') and key != 'data'} - attr_dict.update({val.name: val.summarize_state(**kwargs) for val in self}) + attr_dict.update(dict(items={val.name: val.summarize_state(**kwargs) for val in self})) + attr_dict.update(dict(name=self.name)) return attr_dict @@ -149,6 +144,11 @@ class Inventories(ObjectRegister): except StopIteration: return None + def summarize_states(self, n_steps=None): + # as dict with additional nesting + # return dict(items=super(Inventories, self).summarize_states()) + return super(Inventories, self).summarize_states(n_steps=n_steps) + class DropOffLocation(Entity): @@ -194,6 +194,9 @@ class DropOffLocations(EntityObjectRegister): self._array[0, item.x, item.y] = item.encoding return self._array + def __repr__(self): + super(DropOffLocations, self).__repr__() + class ItemProperties(NamedTuple): n_items: int = 5 # How many items are there at the same time @@ -207,13 +210,13 @@ class ItemProperties(NamedTuple): # noinspection PyAttributeOutsideInit, PyAbstractClass class ItemFactory(BaseFactory): # noinspection PyMissingConstructor - def __init__(self, *args, item_properties: ItemProperties = ItemProperties(), env_seed=time.time_ns(), **kwargs): - if isinstance(item_properties, dict): - item_properties = ItemProperties(**item_properties) - self.item_properties = item_properties + def __init__(self, *args, item_prop: ItemProperties = ItemProperties(), env_seed=time.time_ns(), **kwargs): + if isinstance(item_prop, dict): + item_prop = ItemProperties(**item_prop) + self.item_prop = item_prop kwargs.update(env_seed=env_seed) self._item_rng = np.random.default_rng(env_seed) - assert (item_properties.n_items <= ((1 + kwargs.get('pomdp_r', 0) * 2) ** 2)) or not kwargs.get('pomdp_r', 0) + assert (item_prop.n_items <= ((1 + kwargs.get('_pomdp_r', 0) * 2) ** 2)) or not kwargs.get('_pomdp_r', 0) super().__init__(*args, **kwargs) @property @@ -228,16 +231,19 @@ class ItemFactory(BaseFactory): # noinspection PyUnresolvedReferences super_entities = super().additional_entities - empty_tiles = self[c.FLOOR].empty_tiles[:self.item_properties.n_drop_off_locations] - drop_offs = DropOffLocations.from_tiles(empty_tiles, self._level_shape, - storage_size_until_full=self.item_properties.max_dropoff_storage_size) + empty_tiles = self[c.FLOOR].empty_tiles[:self.item_prop.n_drop_off_locations] + drop_offs = DropOffLocations.from_tiles( + empty_tiles, self._level_shape, + entity_kwargs=dict( + storage_size_until_full=self.item_prop.max_dropoff_storage_size) + ) item_register = ItemRegister(self._level_shape) - empty_tiles = self[c.FLOOR].empty_tiles[:self.item_properties.n_items] + empty_tiles = self[c.FLOOR].empty_tiles[:self.item_prop.n_items] item_register.spawn_items(empty_tiles) - inventories = Inventories(self._level_shape if not self.pomdp_r else ((self.pomdp_diameter,) * 2)) - inventories.spawn_inventories(self[c.AGENT], self.pomdp_r, - self.item_properties.max_agent_inventory_capacity) + inventories = Inventories(self._level_shape if not self._pomdp_r else ((self.pomdp_diameter,) * 2)) + inventories.spawn_inventories(self[c.AGENT], self._pomdp_r, + self.item_prop.max_agent_inventory_capacity) super_entities.update({c.DROP_OFF: drop_offs, c.ITEM: item_register, c.INVENTORY: inventories}) return super_entities @@ -270,7 +276,7 @@ class ItemFactory(BaseFactory): valid = super().do_additional_actions(agent, action) if valid is None: if action == h.EnvActions.ITEM_ACTION: - if self.item_properties.agent_can_interact: + if self.item_prop.agent_can_interact: valid = self.do_item_action(agent) return valid else: @@ -283,14 +289,14 @@ class ItemFactory(BaseFactory): def do_additional_reset(self) -> None: # noinspection PyUnresolvedReferences super().do_additional_reset() - self._next_item_spawn = self.item_properties.spawn_frequency + self._next_item_spawn = self.item_prop.spawn_frequency self.trigger_item_spawn() def trigger_item_spawn(self): - if item_to_spawns := max(0, (self.item_properties.n_items - len(self[c.ITEM]))): + if item_to_spawns := max(0, (self.item_prop.n_items - len(self[c.ITEM]))): empty_tiles = self[c.FLOOR].empty_tiles[:item_to_spawns] self[c.ITEM].spawn_items(empty_tiles) - self._next_item_spawn = self.item_properties.spawn_frequency + self._next_item_spawn = self.item_prop.spawn_frequency self.print(f'{item_to_spawns} new items have been spawned; next spawn in {self._next_item_spawn}') else: self.print('No Items are spawning, limit is reached.') @@ -351,30 +357,41 @@ class ItemFactory(BaseFactory): if __name__ == '__main__': - import random + from environments.utility_classes import AgentRenderOptions as ARO, ObservationProperties + render = True - item_props = ItemProperties() + item_probs = ItemProperties() - factory = ItemFactory(item_properties=item_props, n_agents=3, done_at_collision=False, frames_to_stack=0, - level_name='rooms', max_steps=4000, - omit_agent_in_obs=True, parse_doors=True, pomdp_r=3, - record_episodes=False, verbose=False + obs_props = ObservationProperties(render_agents=ARO.LEVEL, omit_agent_self=True, pomdp_r=2) + + move_props = {'allow_square_movement': True, + 'allow_diagonal_movement': False, + 'allow_no_op': False} + + factory = ItemFactory(n_agents=3, done_at_collision=False, + level_name='rooms', max_steps=400, + obs_prop=obs_props, parse_doors=True, + record_episodes=True, verbose=True, + mv_prop=move_props, item_prop=item_probs ) # noinspection DuplicatedCode n_actions = factory.action_space.n - 1 _ = factory.observation_space - for epoch in range(100): - random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)] + for epoch in range(4): + random_actions = [[random.randint(0, n_actions) for _ + in range(factory.n_agents)] for _ + in range(factory.max_steps + 1)] env_state = factory.reset() - rew = 0 + r = 0 for agent_i_action in random_actions: env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) - rew += step_r + r += step_r if render: factory.render() if done_bool: break - print(f'Factory run {epoch} done, reward is:\n {rew}') + print(f'Factory run {epoch} done, reward is:\n {r}') +pass diff --git a/environments/utility_classes.py b/environments/utility_classes.py index ea7128b..c5ecd7f 100644 --- a/environments/utility_classes.py +++ b/environments/utility_classes.py @@ -1,7 +1,24 @@ -from typing import NamedTuple +from enum import Enum +from typing import NamedTuple, Union + + +class AgentRenderOptions(object): + SEPERATE = 'each' + COMBINED = 'combined' + LEVEL = 'lvl' + NOT = 'not' class MovementProperties(NamedTuple): allow_square_movement: bool = True allow_diagonal_movement: bool = False allow_no_op: bool = False + + +class ObservationProperties(NamedTuple): + render_agents: AgentRenderOptions = AgentRenderOptions.SEPERATE + omit_agent_self: bool = True + additional_agent_placeholder: Union[None, str, int] = None + cast_shadows = True + frames_to_stack: int = 0 + pomdp_r: int = 0 diff --git a/main.py b/main.py index 4f54ab6..6c96296 100644 --- a/main.py +++ b/main.py @@ -56,7 +56,7 @@ if __name__ == '__main__': for modeL_type in [A2C, PPO, DQN]: # ,RegDQN, QRDQN]: for seed in range(3): env_kwargs = dict(n_agents=1, - # item_properties=item_props, + # item_prop=item_props, dirt_properties=dirt_props, movement_properties=move_props, pomdp_r=2, max_steps=1000, parse_doors=False, diff --git a/main_test.py b/main_test.py index ed502ef..2834288 100644 --- a/main_test.py +++ b/main_test.py @@ -48,7 +48,7 @@ if __name__ == '__main__': env_kwargs = yaml.load(f, Loader=yaml.FullLoader) dirt_props = DirtProperties(clean_amount=3, gain_amount=0.2, max_global_amount=30, max_local_amount=3, spawn_frequency=1, max_spawn_ratio=0.05) - # env_kwargs.update(n_agents=1, dirt_properties=dirt_props) + # env_kwargs.update(n_agents=1, dirt_prop=dirt_props) env = DirtFactory(**env_kwargs) env = FrameStack(env, 4) diff --git a/reload_agent.py b/reload_agent.py index 9f9342b..acc75a9 100644 --- a/reload_agent.py +++ b/reload_agent.py @@ -5,6 +5,7 @@ import numpy as np import yaml from environments import helpers as h +from environments.helpers import Constants as c from environments.factory.factory_dirt import DirtFactory from environments.factory.factory_dirt_item import DirtItemFactory from environments.logging.recorder import RecorderCallback @@ -15,29 +16,30 @@ warnings.filterwarnings('ignore', category=UserWarning) if __name__ == '__main__': - model_name = 'DQN_1631187073' + model_name = 'DQN_163519000' run_id = 0 seed = 69 - out_path = Path('debug_out/DQN_1635176929/0_DQN_1635176929') + n_agents = 2 + out_path = Path('debug_out/DQN_163519000/1_DQN_163519000') model_path = out_path with (out_path / f'env_params.json').open('r') as f: env_kwargs = yaml.load(f, Loader=yaml.FullLoader) - env_kwargs.update(additional_agent_placeholder=None, n_agents=4) - if gain_amount := env_kwargs.get('dirt_properties', {}).get('gain_amount', None): - env_kwargs['dirt_properties']['max_spawn_amount'] = gain_amount - del env_kwargs['dirt_properties']['gain_amount'] + env_kwargs.update(additional_agent_placeholder=None, n_agents=n_agents) + if gain_amount := env_kwargs.get('dirt_prop', {}).get('gain_amount', None): + env_kwargs['dirt_prop']['max_spawn_amount'] = gain_amount + del env_kwargs['dirt_prop']['gain_amount'] - env_kwargs.update(record_episodes=True) + env_kwargs.update(record_episodes=False) this_model = out_path / 'model.zip' model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name) - models = [model_cls.load(this_model) for _ in range(4)] + models = [model_cls.load(this_model) for _ in range(n_agents)] with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json') as recorder: # Init Env - with DirtItemFactory(**env_kwargs) as env: + with DirtFactory(**env_kwargs) as env: obs_shape = env.observation_space.shape # Evaluation Loop for i in range(n Episodes) for episode in range(5): @@ -46,11 +48,11 @@ if __name__ == '__main__': while not done_bool: actions = [model.predict( np.stack([env_state[i][j] for i in range(env_state.shape[0])]), - deterministic=True)[0] for j, model in enumerate(models)] + deterministic=False)[0] for j, model in enumerate(models)] env_state, step_r, done_bool, info_obj = env.step(actions) recorder.read_info(0, info_obj) rew += step_r - # env.render() + env.render() if done_bool: recorder.read_done(0, done_bool) break diff --git a/studies/e_1.py b/studies/e_1.py index 5acc40e..26c63e8 100644 --- a/studies/e_1.py +++ b/studies/e_1.py @@ -26,16 +26,12 @@ from environments.factory.factory_dirt import DirtProperties, DirtFactory from environments.factory.factory_dirt_item import DirtItemFactory from environments.factory.factory_item import ItemProperties, ItemFactory from environments.logging.monitor import MonitorCallback -from environments.utility_classes import MovementProperties +from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions import pickle from plotting.compare_runs import compare_seed_runs, compare_model_runs, compare_all_parameter_runs import pandas as pd import seaborn as sns -# Define a global studi save path -start_time = 163519000 # int(time.time()) -study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' - """ In this studie, we want to explore the macro behaviour of multi agents which are trained on the same task, but never saw each other in training. @@ -68,6 +64,10 @@ There are further distinctions to be made: - We are out of distribution. """ +n_agents = 4 +ood_monitor_file = f'e_1_monitor_{n_agents}_agents.pick' +baseline_monitor_file = 'e_1_baseline_monitor.pick' + def policy_model_kwargs(): return dict(ent_coef=0.05) @@ -92,11 +92,96 @@ def encapsule_env_factory(env_fctry, env_kwrgs): return _init +def load_model_run_baseline(seed_path, env_to_run): + # retrieve model class + model_cls = next(val for key, val in h.MODEL_MAP.items() if key in seed_path.parent.name) + # Load both agents + model = model_cls.load(seed_path / 'model.zip') + # Load old env kwargs + with next(seed_path.glob('*.json')).open('r') as f: + env_kwargs = simplejson.load(f) + # Monitor Init + with MonitorCallback(filepath=seed_path / baseline_monitor_file) as monitor: + # Init Env + with env_to_run(**env_kwargs) as env_factory: + # Evaluation Loop for i in range(n Episodes) + for episode in range(100): + env_state = env_factory.reset() + rew, done_bool = 0, False + while not done_bool: + action = model.predict(env_state, deterministic=True)[0] + env_state, step_r, done_bool, info_obj = env_factory.step(action) + monitor.read_info(0, info_obj) + rew += step_r + if done_bool: + monitor.read_done(0, done_bool) + break + print(f'Factory run {episode} done, reward is:\n {rew}') + # Eval monitor outputs are automatically stored by the monitor object + # del model, env_kwargs, env_factory + # import gc + # gc.collect() + + +def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): + global model_cls + # retrieve model class + model_cls = next(val for key, val in h.MODEL_MAP.items() if key in seed_path.parent.name) + # Load both agents + models = [model_cls.load(seed_path / 'model.zip') for _ in range(n_agents)] + # Load old env kwargs + with next(seed_path.glob('*.json')).open('r') as f: + env_kwargs = simplejson.load(f) + env_kwargs.update( + n_agents=n_agents, + **additional_kwargs_dict.get('post_training_kwargs', {})) + # Monitor Init + with MonitorCallback(filepath=seed_path / ood_monitor_file) as monitor: + # Init Env + with env_to_run(**env_kwargs) as env_factory: + # Evaluation Loop for i in range(n Episodes) + for episode in range(50): + env_state = env_factory.reset() + rew, done_bool = 0, False + while not done_bool: + try: + actions = [model.predict( + np.stack([env_state[i][j] for i in range(env_state.shape[0])]), + deterministic=False)[0] for j, model in enumerate(models)] + except ValueError as e: + print(e) + print('Env_Kwargs are:\n') + print(env_kwargs) + print('Path is:\n') + print(seed_path) + exit() + env_state, step_r, done_bool, info_obj = env_factory.step(actions) + monitor.read_info(0, info_obj) + rew += step_r + if done_bool: + monitor.read_done(0, done_bool) + break + print(f'Factory run {episode} done, reward is:\n {rew}') + # Eval monitor outputs are automatically stored by the monitor object + del models, env_kwargs, env_factory + import gc + gc.collect() + + if __name__ == '__main__': train_steps = 8e5 + # Define a global studi save path + start_time = '900000' # int(time.time()) + study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' + # Define Global Env Parameters # Define properties object parameters + obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2 + ) move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True, allow_no_op=False) @@ -108,33 +193,67 @@ if __name__ == '__main__': item_props = ItemProperties(n_items=10, agent_can_interact=True, spawn_frequency=30, n_drop_off_locations=2, max_agent_inventory_capacity=15) - factory_kwargs = dict(n_agents=1, - pomdp_r=2, max_steps=400, parse_doors=True, - level_name='rooms', frames_to_stack=3, - omit_agent_in_obs=True, combin_agent_obs=True, record_episodes=False, - cast_shadows=True, doors_have_area=False, verbose=False, - movement_properties=move_props + factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True, + level_name='rooms', record_episodes=False, doors_have_area=False, + verbose=False, + mv_prop=move_props, + obs_prop=obs_props ) # Bundle both environments with global kwargs and parameters - env_map = {'dirt': (DirtFactory, dict(dirt_properties=dirt_props, **factory_kwargs)), - 'item': (ItemFactory, dict(item_properties=item_props, **factory_kwargs)), - 'itemdirt': (DirtItemFactory, dict(dirt_properties=dirt_props, item_properties=item_props, + env_map = {'dirt': (DirtFactory, dict(dirt_prop=dirt_props, + **factory_kwargs)), + 'item': (ItemFactory, dict(item_prop=item_props, + **factory_kwargs)), + 'itemdirt': (DirtItemFactory, dict(dirt_prop=dirt_props, + item_prop=item_props, **factory_kwargs))} env_names = list(env_map.keys()) # Define parameter versions according with #1,2[1,0,N],3 observation_modes = { # Fill-value = 0 - # DEACTIVATED 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)), + # DEACTIVATED 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)), # Fill-value = 1 # DEACTIVATED 'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)), # Fill-value = N(0, 1) - 'seperate_N': dict(additional_env_kwargs=dict(additional_agent_placeholder='N')), - # Further Adjustments are done post-training - 'in_lvl_obs': dict(post_training_kwargs=dict(other_agent_obs='in_lvl')), + 'seperate_N': dict( + post_training_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.COMBINED, + additional_agent_placeholder=None, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ), + additional_env_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.NOT, + additional_agent_placeholder='N', + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ) + ), + 'in_lvl_obs': dict( + post_training_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.LEVEL, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ) + ), # No further adjustment needed - 'no_obs': {} + 'no_obs': dict( + post_training_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.NOT, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ) + ) } # Train starts here ############################################################ @@ -223,52 +342,27 @@ if __name__ == '__main__': # Evaluation starts here ##################################################### # First Iterate over every model and monitor "as trained" - baseline_monitor_file = 'e_1_baseline_monitor.pick' if True: - render = False for observation_mode in observation_modes: obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode) # For trained policy in study_root_path / identifier for env_path in [x for x in obs_mode_path.iterdir() if x.is_dir()]: for policy_path in [x for x in env_path.iterdir() if x. is_dir()]: # Iteration - for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): - # retrieve model class - for model_cls in (val for key, val in h.MODEL_MAP.items() if key in policy_path.name): - # Load both agents - model = model_cls.load(seed_path / 'model.zip') - # Load old env kwargs - with next(seed_path.glob('*.json')).open('r') as f: - env_kwargs = simplejson.load(f) - # Monitor Init - with MonitorCallback(filepath=seed_path / baseline_monitor_file) as monitor: - # Init Env - with env_map[env_path.name][0](**env_kwargs) as env_factory: - # Evaluation Loop for i in range(n Episodes) - for episode in range(100): - env_state = env_factory.reset() - rew, done_bool = 0, False - while not done_bool: - action = model.predict(env_state, deterministic=True)[0] - env_state, step_r, done_bool, info_obj = env_factory.step(action) - monitor.read_info(0, info_obj) - rew += step_r - if render: - env_factory.render() - if done_bool: - monitor.read_done(0, done_bool) - break - print(f'Factory run {episode} done, reward is:\n {rew}') - # Eval monitor outputs are automatically stored by the monitor object - del model, env_kwargs, env_factory - import gc + paths = list(y for y in policy_path.iterdir() if y.is_dir() \ + and not (y / baseline_monitor_file).exists()) + import multiprocessing as mp + import itertools as it + pool = mp.Pool(mp.cpu_count()) + result = pool.starmap(load_model_run_baseline, + it.product(paths, + (env_map[env_path.name][0],)) + ) - gc.collect() + # for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): + # load_model_run_baseline(seed_path) # Then iterate over every model and monitor "ood behavior" - "is it ood?" - n_agents = 4 - ood_monitor_file = f'e_1_monitor_{n_agents}_agents.pick' - if True: for observation_mode in observation_modes: obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode) @@ -279,44 +373,18 @@ if __name__ == '__main__': # First seed path version # seed_path = next((y for y in policy_path.iterdir() if y.is_dir())) # Iteration - for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): - if (seed_path / ood_monitor_file).exists(): - continue - # retrieve model class - for model_cls in (val for key, val in h.MODEL_MAP.items() if key in policy_path.name): - # Load both agents - models = [model_cls.load(seed_path / 'model.zip') for _ in range(n_agents)] - # Load old env kwargs - with next(seed_path.glob('*.json')).open('r') as f: - env_kwargs = simplejson.load(f) - env_kwargs.update( - n_agents=n_agents, additional_agent_placeholder=None, - **observation_modes[observation_mode].get('post_training_env_kwargs', {})) - - # Monitor Init - with MonitorCallback(filepath=seed_path / ood_monitor_file) as monitor: - # Init Env - with env_map[env_path.name][0](**env_kwargs) as env_factory: - # Evaluation Loop for i in range(n Episodes) - for episode in range(50): - env_state = env_factory.reset() - rew, done_bool = 0, False - while not done_bool: - actions = [model.predict( - np.stack([env_state[i][j] for i in range(env_state.shape[0])]), - deterministic=False)[0] for j, model in enumerate(models)] - env_state, step_r, done_bool, info_obj = env_factory.step(actions) - monitor.read_info(0, info_obj) - rew += step_r - if done_bool: - monitor.read_done(0, done_bool) - break - print(f'Factory run {episode} done, reward is:\n {rew}') - # Eval monitor outputs are automatically stored by the monitor object - del models, env_kwargs, env_factory - import gc - - gc.collect() + import multiprocessing as mp + import itertools as it + pool = mp.Pool(mp.cpu_count()) + paths = list(y for y in policy_path.iterdir() if y.is_dir() \ + and not (y / ood_monitor_file).exists()) + result = pool.starmap(load_model_run_study, + it.product(paths, + (env_map[env_path.name][0],), + (observation_modes[observation_mode],)) + ) + # for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): + # load_model_run_study(seed_path) # Plotting if True: