From 35f5bdeed4d21f759a023107e1d26402d1b530a7 Mon Sep 17 00:00:00 2001 From: steffen-illium Date: Tue, 13 Jul 2021 11:12:03 +0200 Subject: [PATCH] Restructuring and Testing Done --- .../{oo_factory => factory/base}/__init__.py | 0 environments/factory/base/base_factory.py | 370 ++++++++++++++++++ environments/factory/base/objects.py | 266 +++++++++++++ environments/factory/base/registers.py | 292 ++++++++++++++ environments/factory/base_factory.py | 364 ----------------- environments/factory/renderer.py | 2 +- environments/factory/simple_factory.py | 178 +++++---- environments/helpers.py | 61 ++- environments/logging/monitor.py | 13 +- environments/logging/recorder.py | 74 ++++ environments/oo_factory/_base_factory.py | 68 ---- environments/utility_classes.py | 293 +------------- main.py | 19 +- reload_agent.py | 2 +- 14 files changed, 1160 insertions(+), 842 deletions(-) rename environments/{oo_factory => factory/base}/__init__.py (100%) create mode 100644 environments/factory/base/base_factory.py create mode 100644 environments/factory/base/objects.py create mode 100644 environments/factory/base/registers.py delete mode 100644 environments/factory/base_factory.py create mode 100644 environments/logging/recorder.py delete mode 100644 environments/oo_factory/_base_factory.py diff --git a/environments/oo_factory/__init__.py b/environments/factory/base/__init__.py similarity index 100% rename from environments/oo_factory/__init__.py rename to environments/factory/base/__init__.py diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py new file mode 100644 index 0000000..224be7f --- /dev/null +++ b/environments/factory/base/base_factory.py @@ -0,0 +1,370 @@ +from pathlib import Path +from typing import List, Union, Iterable + +import gym +import numpy as np +from gym import spaces + +import yaml +from gym.wrappers import FrameStack + +from environments.helpers import Constants as c, Constants +from environments import helpers as h +from environments.factory.base.objects import Slice, Agent, Tile, Action, MoveableEntity +from environments.factory.base.registers import StateSlices, Actions, Entities, Agents, Doors, FloorTiles +from environments.utility_classes import MovementProperties + +REC_TAC = 'rec' + + +# noinspection PyAttributeOutsideInit +class BaseFactory(gym.Env): + + @property + def action_space(self): + return spaces.Discrete(self._actions.n) + + @property + def observation_space(self): + agent_slice = self.n_agents if self.omit_agent_slice_in_obs else 0 + agent_slice = (self.n_agents - 1) if self.combin_agent_slices_in_obs else agent_slice + if self.pomdp_radius: + shape = (self._obs_cube.shape[0] - agent_slice, self.pomdp_radius * 2 + 1, self.pomdp_radius * 2 + 1) + space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32) + return space + else: + shape = [x-agent_slice if idx == 0 else x for idx, x in enumerate(self._obs_cube.shape)] + space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32) + return space + + @property + def pomdp_diameter(self): + return self.pomdp_radius * 2 + 1 + + @property + def movement_actions(self): + return self._actions.movement_actions + + @property + def additional_actions(self) -> Union[str, List[str]]: + """ + When heriting from this Base Class, you musst implement this methode!!! + + :return: A list of Actions-object holding all additional actions. + :rtype: List[Action] + """ + raise NotImplementedError('Please register additional actions ') + + @property + def additional_entities(self) -> Union[Entities, List[Entities]]: + """ + When heriting from this Base Class, you musst implement this methode!!! + + :return: A single Entites collection or a list of such. + :rtype: Union[Entities, List[Entities]] + """ + raise NotImplementedError('Please register additional entities.') + + @property + def additional_slices(self) -> Union[Slice, List[Slice]]: + """ + When heriting from this Base Class, you musst implement this methode!!! + + :return: A list of Slice-objects. + :rtype: List[Slice] + """ + raise NotImplementedError('Please register additional slices.') + + def __enter__(self): + return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack) + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_radius: Union[None, int] = 0, + movement_properties: MovementProperties = MovementProperties(), parse_doors=False, + combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False, + omit_agent_slice_in_obs=False, done_at_collision=False, **kwargs): + assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \ + (not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \ + 'Both options are exclusive' + assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1." + + # Attribute Assignment + self.movement_properties = movement_properties + self.level_name = level_name + self._level_shape = None + + self.n_agents = n_agents + self.max_steps = max_steps + self.pomdp_radius = pomdp_radius + self.combin_agent_slices_in_obs = combin_agent_slices_in_obs + self.omit_agent_slice_in_obs = omit_agent_slice_in_obs + self.frames_to_stack = frames_to_stack + + self.done_at_collision = done_at_collision + self.record_episodes = record_episodes + self.parse_doors = parse_doors + + # Actions + self._actions = Actions(self.movement_properties, can_use_doors=self.parse_doors) + if additional_actions := self.additional_actions: + self._actions.register_additional_items(additional_actions) + + self.reset() + + def _init_state_slices(self) -> StateSlices: + state_slices = StateSlices() + + # Objects + # Level + level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt' + parsed_level = h.parse_level(level_filepath) + level = [Slice(c.LEVEL.name, h.one_hot_level(parsed_level))] + self._level_shape = level[0].shape + + # Doors + parsed_doors = h.one_hot_level(parsed_level, c.DOOR) + doors = [Slice(c.DOORS.value, parsed_doors)] if parsed_doors.any() and self.parse_doors else [] + + # Agents + agents = [] + for i in range(self.n_agents): + agents.append(Slice(f'{c.AGENT.name}#{i}', np.zeros_like(level[0].slice))) + state_slices.register_additional_items(level+doors+agents) + + # Additional Slices from SubDomains + if additional_slices := self.additional_slices: + state_slices.register_additional_items(additional_slices) + return state_slices + + def _init_obs_cube(self) -> np.ndarray: + x, y = self._slices.by_enum(c.LEVEL).shape + state = np.zeros((len(self._slices), x, y)) + state[0] = self._slices.by_enum(c.LEVEL).slice + if r := self.pomdp_radius: + self._padded_obs_cube = np.full((len(self._slices), x + r*2, y + r*2), c.FREE_CELL.value) + self._padded_obs_cube[0] = c.OCCUPIED_CELL.value + self._padded_obs_cube[:, r:r+x, r:r+y] = state + return state + + def _init_entities(self): + # Tile Init + self._tiles = FloorTiles.from_argwhere_coordinates(self._slices.by_enum(c.LEVEL).free_tiles) + + # Door Init + if self.parse_doors: + tiles = [self._tiles.by_pos(x) for x in self._slices.by_enum(c.DOORS).occupied_tiles] + self._doors = Doors.from_tiles(tiles, context=self._tiles) + + # Agent Init on random positions + self._agents = Agents.from_tiles(np.random.choice(self._tiles, self.n_agents)) + entities = Entities() + entities.register_additional_items([self._agents]) + + if self.parse_doors: + entities.register_additional_items([self._doors]) + + if additional_entities := self.additional_entities: + entities.register_additional_items([additional_entities]) + + return entities + + def reset(self) -> (np.ndarray, int, bool, dict): + self._slices = self._init_state_slices() + self._obs_cube = self._init_obs_cube() + self._entitites = self._init_entities() + self._flush_state() + self._steps = 0 + + info = self._summarize_state() if self.record_episodes else {} + return None, None, None, info + + def pre_step(self) -> None: + pass + + def post_step(self) -> dict: + pass + + def step(self, actions): + actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions + assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]' + self._steps += 1 + done = False + + # Pre step Hook for later use + self.pre_step() + + # Move this in a seperate function? + for action, agent in zip(actions, self._agents): + agent.clear_temp_sate() + action_name = self._actions[action] + if self._actions.is_moving_action(action): + valid = self._move_or_colide(agent, action_name) + elif self._actions.is_no_op(action): + valid = c.VALID.value + elif self._actions.is_door_usage(action): + # Check if agent raly stands on a door: + if door := self._doors.by_pos(agent.pos): + door.use() + valid = c.VALID.value + # When he doesn't... + else: + valid = c.NOT_VALID.value + else: + valid = self.do_additional_actions(agent, action) + agent.temp_action = action + agent.temp_valid = valid + + self._flush_state() + + tiles_with_collisions = self.get_all_tiles_with_collisions() + for tile in tiles_with_collisions: + guests = tile.guests_that_can_collide + for i, guest in enumerate(guests): + this_collisions = guests[:] + del this_collisions[i] + guest.temp_collisions = this_collisions + + if self.done_at_collision and tiles_with_collisions: + done = True + + # Step the door close intervall + if self.parse_doors: + self._doors.tick_doors() + + # Finalize + reward, info = self.calculate_reward() + if self._steps >= self.max_steps: + done = True + info.update(step_reward=reward, step=self._steps) + if self.record_episodes: + info.update(self._summarize_state()) + + # Post step Hook for later use + info.update(self.post_step()) + + obs = self._get_observations() + + return obs, reward, done, info + + def _flush_state(self): + self._obs_cube[np.arange(len(self._slices)) != self._slices.get_idx(c.LEVEL)] = c.FREE_CELL.value + if self.parse_doors: + for door in self._doors: + if door.is_open: + self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.IS_OPEN_DOOR.value + else: + self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.IS_CLOSED_DOOR.value + for agent in self._agents: + self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.pos] = c.OCCUPIED_CELL.value + if agent.last_pos != h.NO_POS: + self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.last_pos] = c.FREE_CELL.value + + def _get_observations(self) -> np.ndarray: + if self.n_agents == 1: + obs = self._build_per_agent_obs(self._agents[0]) + elif self.n_agents >= 2: + obs = np.stack([self._build_per_agent_obs(agent) for agent in self._agents]) + else: + raise ValueError('n_agents cannot be smaller than 1!!') + return obs + + def _build_per_agent_obs(self, agent: Agent) -> np.ndarray: + first_agent_slice = self._slices.AGENTSTARTIDX + if r := self.pomdp_radius: + x, y = self._level_shape + self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube + global_x, global_y = agent.pos + global_x += r + global_y += r + x0, x1 = max(0, global_x - self.pomdp_radius), global_x + self.pomdp_radius + 1 + y0, y1 = max(0, global_y - self.pomdp_radius), global_y + self.pomdp_radius + 1 + obs = self._padded_obs_cube[:, x0:x1, y0:y1] + else: + obs = self._obs_cube + if self.omit_agent_slice_in_obs: + obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val]] + return obs_new + else: + if self.combin_agent_slices_in_obs: + agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name]], + axis=0, keepdims=True) + obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:])) + return obs + else: + return obs + + def do_additional_actions(self, agent_i: int, action: int) -> bool: + raise NotImplementedError + + def get_all_tiles_with_collisions(self) -> List[Tile]: + tiles_with_collisions = list() + for tile in self._tiles: + if tile.is_occupied(): + guests = [guest for guest in tile.guests if guest.can_collide] + if len(guests) >= 2: + tiles_with_collisions.append(tile) + return tiles_with_collisions + + def _move_or_colide(self, agent: Agent, action: Action) -> Constants: + new_tile, valid = self._check_agent_move(agent, action) + if valid: + # Does not collide width level boundaries + return agent.move(new_tile) + else: + # Agent seems to be trying to collide in this step + return c.NOT_VALID + + def _check_agent_move(self, agent, action: Action) -> (Tile, bool): + # Actions + x_diff, y_diff = h.ACTIONMAP[action.name] + x_new = agent.x + x_diff + y_new = agent.y + y_diff + + new_tile = self._tiles.by_pos((x_new, y_new)) + if new_tile: + valid = c.VALID + else: + tile = agent.tile + valid = c.VALID + return tile, valid + + if self.parse_doors and agent.last_pos != h.NO_POS: + if door := self._doors.by_pos(agent.pos): + if door.is_open: + pass + else: # door.is_closed: + if door.is_linked(agent.last_pos, new_tile.pos): + pass + else: + return agent.tile, c.NOT_VALID + else: + pass + else: + pass + + return new_tile, valid + + def calculate_reward(self) -> (int, dict): + # Returns: Reward, Info + raise NotImplementedError + + def render(self, mode='human'): + raise NotImplementedError + + def save_params(self, filepath: Path): + # noinspection PyProtectedMember + # d = {key: val._asdict() if hasattr(val, '_asdict') else val for key, val in self.__dict__.items() + d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')} + filepath.parent.mkdir(parents=True, exist_ok=True) + with filepath.open('w') as f: + yaml.dump(d, f) + # pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL) + + def _summarize_state(self): + summary = {f'{REC_TAC}_step': self._steps} + for entity in self._entitites: + if hasattr(entity, 'summarize_state'): + summary.update({f'{REC_TAC}_{entity.name}': entity.summarize_state()}) + return summary diff --git a/environments/factory/base/objects.py b/environments/factory/base/objects.py new file mode 100644 index 0000000..8fbffb6 --- /dev/null +++ b/environments/factory/base/objects.py @@ -0,0 +1,266 @@ +import itertools + +import networkx as nx +import numpy as np +from environments import helpers as h +from environments.helpers import Constants as c +import itertools + + +def sub(p, q): + return p - q + + +class Object: + + def __bool__(self): + return True + + @property + def i(self): + return self._identifier + + @property + def name(self): + return self._identifier + + def __init__(self, identifier, **kwargs): + self._identifier = identifier + if kwargs: + print(f'Following kwargs were passed, but ignored: {kwargs}') + + def __repr__(self): + return f'{self.__class__.__name__}({self._identifier})' + + +class Action(Object): + + @property + def name(self): + return self.i + + def __init__(self, *args): + super(Action, self).__init__(*args) + + +class Slice(Object): + + @property + def shape(self): + return self.slice.shape + + @property + def occupied_tiles(self): + return np.argwhere(self.slice == c.OCCUPIED_CELL.value) + + @property + def free_tiles(self): + return np.argwhere(self.slice == c.FREE_CELL.value) + + def __init__(self, identifier, arrayslice): + super(Slice, self).__init__(identifier) + self.slice = arrayslice + + +class Wall(Object): + pass + + +class Tile(Object): + + @property + def guests_that_can_collide(self): + return [x for x in self.guests if x.can_collide] + + @property + def guests(self): + return self._guests.values() + + @property + def x(self): + return self.pos[0] + + @property + def y(self): + return self.pos[1] + + @property + def pos(self): + return self._pos + + def __init__(self, i, pos): + super(Tile, self).__init__(i) + self._guests = dict() + self._pos = tuple(pos) + + def __len__(self): + return len(self._guests) + + def is_empty(self): + return not len(self._guests) + + def is_occupied(self): + return len(self._guests) + + def enter(self, guest): + if guest.name not in self._guests: + self._guests.update({guest.name: guest}) + return True + else: + return False + + def leave(self, guest): + try: + del self._guests[guest.name] + except (ValueError, KeyError): + return False + return True + + +class Entity(Object): + + @property + def can_collide(self): + return True + + @property + def encoding(self): + return 1 + + @property + def x(self): + return self.pos[0] + + @property + def y(self): + return self.pos[1] + + @property + def pos(self): + return self._tile.pos + + @property + def tile(self): + return self._tile + + def __init__(self, identifier, tile: Tile, **kwargs): + super(Entity, self).__init__(identifier, **kwargs) + self._tile = tile + + def summarize_state(self): + return self.__dict__.copy() + + +class MoveableEntity(Entity): + + @property + def last_tile(self): + return self._last_tile + + @property + def last_pos(self): + if self._last_tile: + return self._last_tile.pos + else: + return h.NO_POS + + @property + def direction_of_view(self): + last_x, last_y = self.last_pos + curr_x, curr_y = self.pos + return last_x-curr_x, last_y-curr_y + + def __init__(self, *args, **kwargs): + super(MoveableEntity, self).__init__(*args, **kwargs) + self._last_tile = None + + def move(self, next_tile): + curr_tile = self.tile + if curr_tile != next_tile: + next_tile.enter(self) + curr_tile.leave(self) + self._tile = next_tile + self._last_tile = curr_tile + return True + else: + return False + + +class Door(Entity): + + @property + def can_collide(self): + return False + + @property + def encoding(self): + return 1 if self.is_closed else -1 + + def __init__(self, *args, context, closed_on_init=True, auto_close_interval=500): + super(Door, self).__init__(*args) + self._state = c.IS_CLOSED_DOOR + self.auto_close_interval = auto_close_interval + self.time_to_close = -1 + neighbor_pos = list(itertools.product([-1, 1, 0], repeat=2))[:-1] + neighbor_tiles = [context.by_pos(tuple([sum(x) for x in zip(self.pos, diff)])) for diff in neighbor_pos] + neighbor_pos = [x.pos for x in neighbor_tiles if x] + possible_connections = itertools.combinations(neighbor_pos, 2) + self.connectivity = nx.Graph() + for a, b in possible_connections: + if not max(abs(np.subtract(a, b))) > 1: + self.connectivity.add_edge(a, b) + if not closed_on_init: + self._open() + + @property + def is_closed(self): + return self._state == c.IS_CLOSED_DOOR + + @property + def is_open(self): + return self._state == c.IS_OPEN_DOOR + + @property + def status(self): + return self._state + + def use(self): + if self._state == c.IS_OPEN_DOOR: + self._close() + else: + self._open() + + def tick(self): + if self.is_open and len(self.tile) == 1 and self.time_to_close: + self.time_to_close -= 1 + elif self.is_open and not self.time_to_close and len(self.tile) == 1: + self.use() + + def _open(self): + self.connectivity.add_edges_from([(self.pos, x) for x in self.connectivity.nodes]) + self._state = c.IS_OPEN_DOOR + self.time_to_close = self.auto_close_interval + + def _close(self): + self.connectivity.remove_node(self.pos) + self._state = c.IS_CLOSED_DOOR + + def is_linked(self, old_pos, new_pos): + try: + _ = nx.shortest_path(self.connectivity, old_pos, new_pos) + return True + except nx.exception.NetworkXNoPath: + return False + + +class Agent(MoveableEntity): + + def __init__(self, *args): + super(Agent, self).__init__(*args) + self.clear_temp_sate() + + # noinspection PyAttributeOutsideInit + def clear_temp_sate(self): + self.temp_collisions = [] + self.temp_valid = None + self.temp_action = -1 \ No newline at end of file diff --git a/environments/factory/base/registers.py b/environments/factory/base/registers.py new file mode 100644 index 0000000..a83efbe --- /dev/null +++ b/environments/factory/base/registers.py @@ -0,0 +1,292 @@ +import itertools +import random +from enum import Enum +from typing import List, Union + +import networkx as nx +import numpy as np + +from environments.factory.base.objects import Entity, Tile, Agent, Door, Slice, Action +from environments.utility_classes import MovementProperties +from environments import helpers as h +from environments.helpers import Constants as c + + +class Register: + _accepted_objects = Entity + + @classmethod + def from_argwhere_coordinates(cls, positions: (int, int), tiles): + entities = [cls._accepted_objects(i, tiles.by_pos(position)) for i, position in enumerate(positions)] + registered_obj = cls() + registered_obj.register_additional_items(entities) + return registered_obj + + @property + def name(self): + return self.__class__.__name__ + + @property + def n(self): + return len(self) + + def __init__(self): + self._register = dict() + self._names = dict() + + def __len__(self): + return len(self._register) + + def __iter__(self): + return iter(self.values()) + + def __add__(self, other: _accepted_objects): + assert isinstance(other, self._accepted_objects), f'All item names have to be of type ' \ + f'{self._accepted_objects}, ' \ + f'but were {other.__class__}.,' + self._names.update({other.name: len(self._register)}) + self._register.update({len(self._register): other}) + return self + + def register_additional_items(self, others: List[_accepted_objects]): + for other in others: + self + other + return self + + def keys(self): + return self._register.keys() + + def values(self): + return self._register.values() + + def items(self): + return self._register.items() + + def __getitem__(self, item): + try: + return self._register[item] + except KeyError: + print('NO') + raise + + def by_name(self, item): + return self[self._names[item]] + + def by_enum(self, enum: Enum): + return self[self._names[enum.name]] + + def __repr__(self): + return f'{self.__class__.__name__}({self._register})' + + def get_name(self, item): + return self._register[item].name + + def get_idx_by_name(self, item): + return self._names[item] + + def get_idx(self, enum: Enum): + return self._names[enum.name] + + @classmethod + def from_tiles(cls, tiles, **kwargs): + entities = [cls._accepted_objects(f'{cls._accepted_objects.__name__.upper()}#{i}', tile, **kwargs) + for i, tile in enumerate(tiles)] + registered_obj = cls() + registered_obj.register_additional_items(entities) + return registered_obj + + +class EntityRegister(Register): + + @classmethod + def from_argwhere_coordinates(cls, argwhere_coordinates): + tiles = cls() + tiles.register_additional_items([cls._accepted_objects(i, pos) for i, pos in enumerate(argwhere_coordinates)]) + return tiles + + def __init__(self): + super(EntityRegister, self).__init__() + self._tiles = dict() + + def __add__(self, other): + super(EntityRegister, self).__add__(other) + self._tiles[other.pos] = other + + def by_pos(self, pos): + if isinstance(pos, np.ndarray): + pos = tuple(pos) + try: + return self._tiles[pos] + except KeyError: + return None + + +class Entities(Register): + + _accepted_objects = Register + + def __init__(self): + super(Entities, self).__init__() + + def __iter__(self): + return iter([x for sublist in self.values() for x in sublist]) + + @classmethod + def from_argwhere_coordinates(cls, positions): + raise AttributeError() + + +class FloorTiles(EntityRegister): + _accepted_objects = Tile + + @property + def occupied_tiles(self): + tiles = [tile for tile in self if tile.is_occupied()] + random.shuffle(tiles) + return tiles + + @property + def empty_tiles(self): + tiles = [tile for tile in self if tile.is_empty()] + random.shuffle(tiles) + return tiles + + +class Agents(Register): + + _accepted_objects = Agent + + @property + def positions(self): + return [agent.pos for agent in self] + + +class Doors(EntityRegister): + _accepted_objects = Door + + def tick_doors(self): + for door in self: + door.tick() + + +class Actions(Register): + + _accepted_objects = Action + + @property + def movement_actions(self): + return self._movement_actions + + def __init__(self, movement_properties: MovementProperties, can_use_doors=False): + self.allow_no_op = movement_properties.allow_no_op + self.allow_diagonal_movement = movement_properties.allow_diagonal_movement + self.allow_square_movement = movement_properties.allow_square_movement + self.can_use_doors = can_use_doors + super(Actions, self).__init__() + + if self.allow_square_movement: + self.register_additional_items([self._accepted_objects(direction) for direction in h.MANHATTAN_MOVES]) + if self.allow_diagonal_movement: + self.register_additional_items([self._accepted_objects(direction) for direction in h.DIAGONAL_MOVES]) + self._movement_actions = self._register.copy() + if self.can_use_doors: + self.register_additional_items([self._accepted_objects('use_door')]) + if self.allow_no_op: + self.register_additional_items([self._accepted_objects('no-op')]) + + def is_moving_action(self, action: Union[int]): + #if isinstance(action, Action): + # return (action.name in h.MANHATTAN_MOVES and self.allow_square_movement) or \ + # (action.name in h.DIAGONAL_MOVES and self.allow_diagonal_movement) + #else: + return action in self.movement_actions.keys() + + def is_no_op(self, action: Union[str, int]): + if isinstance(action, str): + action = self.by_name(action) + return self[action].name == 'no-op' + + def is_door_usage(self, action: Union[str, int]): + if isinstance(action, str): + action = self.by_name(action) + return self[action].name == 'use_door' + + +class StateSlices(Register): + + _accepted_objects = Slice + + @property + def AGENTSTARTIDX(self): + if self._agent_start_idx: + return self._agent_start_idx + else: + self._agent_start_idx = min([idx for idx, x in self.items() if c.AGENT.name in x.name]) + return self._agent_start_idx + + def __init__(self): + super(StateSlices, self).__init__() + self._agent_start_idx = None + + def _gather_occupation(self, excluded_slices): + exclusion = excluded_slices or [] + assert isinstance(exclusion, (int, list)) + exclusion = exclusion if isinstance(exclusion, list) else [exclusion] + + result = np.sum([x for i, x in self.items() if i not in exclusion], axis=0) + return result + + def free_cells(self, excluded_slices: Union[None, List[int], int] = None) -> np.array: + occupation = self._gather_occupation(excluded_slices) + free_cells = np.argwhere(occupation == c.IS_FREE_CELL) + np.random.shuffle(free_cells) + return free_cells + + def occupied_cells(self, excluded_slices: Union[None, List[int], int] = None) -> np.array: + occupation = self._gather_occupation(excluded_slices) + occupied_cells = np.argwhere(occupation == c.IS_OCCUPIED_CELL.value) + np.random.shuffle(occupied_cells) + return occupied_cells + + +class Zones(Register): + + @property + def danger_zone(self): + return self._zone_slices[self.by_enum(c.DANGER_ZONE)] + + @property + def accounting_zones(self): + return [self[idx] for idx, name in self.items() if name != c.DANGER_ZONE.value] + + def __init__(self, parsed_level): + raise NotImplementedError('This needs a Rework') + super(Zones, self).__init__() + slices = list() + self._accounting_zones = list() + self._danger_zones = list() + for symbol in np.unique(parsed_level): + if symbol == h.WALL: + continue + elif symbol == h.DANGER_ZONE: + self + symbol + slices.append(h.one_hot_level(parsed_level, symbol)) + self._danger_zones.append(symbol) + else: + self + symbol + slices.append(h.one_hot_level(parsed_level, symbol)) + self._accounting_zones.append(symbol) + + self._zone_slices = np.stack(slices) + + def __getitem__(self, item): + return self._zone_slices[item] + + def get_name(self, item): + return self._register[item] + + def by_name(self, item): + return self[super(Zones, self).by_name(item)] + + def register_additional_items(self, other: Union[str, List[str]]): + raise AttributeError('You are not allowed to add additional Zones in runtime.') \ No newline at end of file diff --git a/environments/factory/base_factory.py b/environments/factory/base_factory.py deleted file mode 100644 index 09968a4..0000000 --- a/environments/factory/base_factory.py +++ /dev/null @@ -1,364 +0,0 @@ -from pathlib import Path -from typing import List, Union, Iterable - -import gym -import numpy as np -from gym import spaces - -import yaml -from gym.wrappers import FrameStack - -from environments import helpers as h -from environments.utility_classes import Actions, StateSlices, AgentState, MovementProperties, Zones, DoorState - - -# noinspection PyAttributeOutsideInit -class BaseFactory(gym.Env): - - @property - def action_space(self): - return spaces.Discrete(self._actions.n) - - @property - def observation_space(self): - agent_slice = self.n_agents if self.omit_agent_slice_in_obs else 0 - agent_slice = (self.n_agents - 1) if self.combin_agent_slices_in_obs else agent_slice - if self.pomdp_radius: - shape = (self._state.shape[0] - agent_slice, self.pomdp_radius * 2 + 1, self.pomdp_radius * 2 + 1) - space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32) - return space - else: - shape = [x-agent_slice if idx == 0 else x for idx, x in enumerate(self._state.shape)] - space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32) - return space - - @property - def movement_actions(self): - return self._actions.movement_actions - - @property - def has_doors(self): - return hasattr(self, '_doors') - - def __enter__(self): - return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack) - - def __exit__(self, exc_type, exc_val, exc_tb): - self.close() - - def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_radius: Union[None, int] = 0, - movement_properties: MovementProperties = MovementProperties(), parse_doors=False, - combin_agent_slices_in_obs: bool = False, frames_to_stack=0, - omit_agent_slice_in_obs=False, **kwargs): - assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \ - (not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \ - 'Both options are exclusive' - assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1." - - self.movement_properties = movement_properties - self.level_name = level_name - - self.n_agents = n_agents - self.max_steps = max_steps - self.pomdp_radius = pomdp_radius - self.combin_agent_slices_in_obs = combin_agent_slices_in_obs - self.omit_agent_slice_in_obs = omit_agent_slice_in_obs - self.frames_to_stack = frames_to_stack - - self.done_at_collision = False - - self._state_slices = StateSlices() - - # Level - level_filepath = Path(__file__).parent / h.LEVELS_DIR / f'{self.level_name}.txt' - parsed_level = h.parse_level(level_filepath) - self._level = h.one_hot_level(parsed_level) - level_slices = [h.LEVEL] - - # Doors - if parse_doors: - parsed_doors = h.one_hot_level(parsed_level, h.DOOR) - if parsed_doors.any(): - self._doors = parsed_doors - level_slices.append(h.DOORS) - - # Agents - offset = len(level_slices) - self._state_slices.register_additional_items([*level_slices, - *[f'agent#{i}' for i in range(offset, n_agents + offset)]]) - - # Additional Slices from SubDomains - if 'additional_slices' in kwargs: - self._state_slices.register_additional_items(kwargs.get('additional_slices')) - self._zones = Zones(parsed_level) - - self._actions = Actions(self.movement_properties, can_use_doors=self.has_doors) - self._actions.register_additional_items(self.additional_actions) - self.reset() - - @property - def additional_actions(self) -> Union[str, List[str]]: - """ - When heriting from this Base Class, you musst implement this methode!!! - Please return a dict with the given types -> {int: str}. - The int should start at 0. - - :return: An Actions-object holding all actions with keys in range 0-n. - :rtype: Actions - """ - raise NotImplementedError('Please register additional actions ') - - def reset(self) -> (np.ndarray, int, bool, dict): - slices = [np.expand_dims(self._level, 0)] - self._steps = 0 - self._agent_states = list() - - # Door Init - if self.has_doors: - self._door_states = [DoorState(i, tuple(pos)) for i, pos - in enumerate(np.argwhere(self._doors == h.IS_OCCUPIED_CELL))] - slices.append(np.expand_dims(self._doors, 0)) - - # Agent placement ... - floor_tiles = np.argwhere(self._level == h.IS_FREE_CELL) - # ... on random positions - np.random.shuffle(floor_tiles) - agents = np.zeros((self.n_agents, *self._level.shape), dtype=np.int8) - for i, (x, y) in enumerate(floor_tiles[:self.n_agents]): - agents[i, x, y] = h.IS_OCCUPIED_CELL - agent_state = AgentState(i, -1, pos=(x, y)) - self._agent_states.append(agent_state) - slices.append(agents) - - # GLOBAL STATE - self._state = np.concatenate(slices, axis=0) - return None - - def _get_observations(self) -> np.ndarray: - if self.n_agents == 1: - obs = self._build_per_agent_obs(0) - elif self.n_agents >= 2: - obs = np.stack([self._build_per_agent_obs(agent_i) for agent_i in range(self.n_agents)]) - else: - raise ValueError('n_agents cannot be smaller than 1!!') - return obs - - def _build_per_agent_obs(self, agent_i: int) -> np.ndarray: - first_agent_slice = self._state_slices.AGENTSTARTIDX - # Todo: make this more efficient! - if self.pomdp_radius: - pomdp_diameter = self.pomdp_radius * 2 + 1 - global_x, global_y = self._agent_states[agent_i].pos - x0, x1 = max(0, global_x - self.pomdp_radius), global_x + self.pomdp_radius + 1 - y0, y1 = max(0, global_y - self.pomdp_radius), global_y + self.pomdp_radius + 1 - obs = self._state[:, x0:x1, y0:y1] - if obs.shape[1] != pomdp_diameter or obs.shape[2] != pomdp_diameter: - obs_padded = np.full((obs.shape[0], pomdp_diameter, pomdp_diameter), h.IS_OCCUPIED_CELL) - local_x, local_y = np.argwhere(obs[first_agent_slice + agent_i] == h.IS_OCCUPIED_CELL)[0] - obs_padded[:, - abs(local_x-self.pomdp_radius):abs(local_x-self.pomdp_radius)+obs.shape[1], - abs(local_y-self.pomdp_radius):abs(local_y-self.pomdp_radius)+obs.shape[2]] = obs - obs = obs_padded - else: - obs = self._state - if self.omit_agent_slice_in_obs: - obs_new = obs[[key for key, val in self._state_slices.items() if h.AGENT not in val]] - return obs_new - else: - if self.combin_agent_slices_in_obs: - agent_obs = np.sum(obs[[key for key, val in self._state_slices.items() if 'agent' in val]], - axis=0, keepdims=True) - obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:])) - return obs - else: - return obs - - def do_additional_actions(self, agent_i: int, action: int) -> ((int, int), bool): - raise NotImplementedError - - def step(self, actions): - actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions - assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]' - self._steps += 1 - done = False - - # Move this in a seperate function? - for agent_i, action in enumerate(actions): - agent = self._agent_states[agent_i] - if self._actions.is_moving_action(action): - pos, valid = self.move_or_colide(agent_i, action) - elif self._actions.is_no_op(action): - pos, valid = agent.pos, h.VALID - elif self._actions.is_door_usage(action): - # Check if agent raly stands on a door: - if self._state[self._state_slices.by_name(h.DOORS)][agent.pos] in [h.IS_OCCUPIED_CELL, ]: - door = [door for door in self._door_states if door.pos == self._agent_states[agent_i].pos][0] - door.use() - pos, valid = self._agent_states[agent_i].pos, h.VALID - # When he doesn't... - else: - pos, valid = self._agent_states[agent_i].pos, h.NOT_VALID - else: - pos, valid = self.do_additional_actions(agent_i, action) - # Update state accordingly - self._agent_states[agent_i].update(pos=pos, action_valid=valid, action=action) - - for i, collision_vec in enumerate(self.check_all_collisions(self._agent_states, self._state.shape[0])): - self._agent_states[i].update(collision_vector=collision_vec) - if self.done_at_collision and collision_vec.any(): - done = True - - # Step the door close intervall - agents_pos = [agent.pos for agent in self._agent_states] - if self.has_doors: - for door_i, door in enumerate(self._door_states): - if door.is_open and door.time_to_close and door.pos not in agents_pos: - door.time_to_close -= 1 - elif door.is_open and not door.time_to_close and door.pos not in agents_pos: - door.use() - self._state[self._state_slices.by_name(h.DOORS)] = 1 if door.is_closed else -1 - - reward, info = self.calculate_reward(self._agent_states) - - if self._steps >= self.max_steps: - done = True - - info.update(step_reward=reward, step=self._steps) - - return None, reward, done, info - - def check_all_collisions(self, agent_states: List[AgentState], collisions: int) -> np.ndarray: - collision_vecs = np.zeros((len(agent_states), collisions)) # n_agents x n_slices - for agent_state in agent_states: - # Register only collisions of moving agents - if self._actions.is_moving_action(agent_state.action): - collision_vecs[agent_state.i] = self.check_collisions(agent_state) - return collision_vecs - - def check_collisions(self, agent_state: AgentState) -> np.ndarray: - pos_x, pos_y = agent_state.pos - # FixMe: We need to find a way to spare out some dimensions, eg. an info dimension etc... a[?,] - # https://numpy.org/doc/stable/reference/arrays.indexing.html#boolean-array-indexing - collisions_vec = self._state[:, pos_x, pos_y].copy() # "vertical fiber" at position of agent i - collisions_vec[self._state_slices.AGENTSTARTIDX + agent_state.i] = h.IS_FREE_CELL # no self-collisions - if 'door' in self._state_slices.values(): - collisions_vec[self._state_slices.by_name('doors')] = h.IS_FREE_CELL # no door-collisions - - if agent_state.action_valid: - # All well, no collision. - # Place a function hook here if needed. - pass - else: - # Place a marker to indicate a collision with the level boundrys - collisions_vec[self._state_slices.by_name(h.LEVEL)] = h.IS_OCCUPIED_CELL - return collisions_vec - - def do_move(self, agent_i: int, old_pos: (int, int), new_pos: (int, int)) -> None: - (x, y), (x_new, y_new) = old_pos, new_pos - self._state[agent_i + self._state_slices.AGENTSTARTIDX, x, y] = h.IS_FREE_CELL - self._state[agent_i + self._state_slices.AGENTSTARTIDX, x_new, y_new] = h.IS_OCCUPIED_CELL - - def move_or_colide(self, agent_i: int, action: int) -> ((int, int), bool): - old_pos, new_pos, valid = self._check_agent_move(agent_i=agent_i, action=self._actions[action]) - if valid: - # Does not collide width level boundaries - self.do_move(agent_i, old_pos, new_pos) - return new_pos, valid - else: - # Agent seems to be trying to collide in this step - return old_pos, valid - - def _check_agent_move(self, agent_i, action: str): - agent_slice_idx = self._state_slices.AGENTSTARTIDX + agent_i - agent_slice = self._state[agent_slice_idx] # horizontal slice from state tensor - agent_pos = np.argwhere(agent_slice == 1) - if len(agent_pos) > 1: - raise AssertionError('Only one agent per slice is allowed.') - x, y = agent_pos[0] - - # Actions - x_diff, y_diff = h.ACTIONMAP[action] - x_new = x + x_diff - y_new = y + y_diff - - if self.has_doors and self._agent_states[agent_i]._last_pos != (-1, -1): - door = [door for door in self._door_states if door.pos == (x, y)] - if door: - door = door[0] - if door.is_open: - pass - else: # door.is_closed: - local_door_map = self._state[self._state_slices.by_name(h.LEVEL)][door.pos[0]-1:door.pos[0]+2, - door.pos[1]-1:door.pos[1]+2] - local_agent_map = np.zeros_like(local_door_map) - local_agent_map[tuple(np.subtract(door.pos, self._agent_states[agent_i]._last_pos))] += 1 - local_agent_map[tuple(np.subtract(door.pos, (x_new, y_new)))] += 1 - if np.all(local_door_map == h.HORIZONTAL_DOOR_MAP): - # This is a horizontal Door Configuration - if np.sum(local_agent_map[0]) >= 2 or np.sum(local_agent_map[-1]) >= 2: - # The Agent goes back to where he came from - pass - else: - # The Agent tries to go through a closed door - return (x, y), (x, y), h.NOT_VALID - else: - # This is a vertical Door Configuration - if np.sum(local_agent_map[:, 0]) >= 2 or np.sum(local_agent_map[:, -1]) >= 2: - # The Agent goes back to where he came from - pass - else: - # The Agent tries to go through a closed door - return (x, y), (x, y), h.NOT_VALID - else: - pass - else: - pass - - valid = h.check_position(self._state[self._state_slices.by_name(h.LEVEL)], (x_new, y_new)) - - return (x, y), (x_new, y_new), valid - - def agent_i_position(self, agent_i: int) -> (int, int): - positions = np.argwhere(self._state[self._state_slices.AGENTSTARTIDX + agent_i] == h.IS_OCCUPIED_CELL) - assert positions.shape[0] == 1 - pos_x, pos_y = positions[0] # a.flatten() - return pos_x, pos_y - - def free_cells(self, excluded_slices: Union[None, List[int], int] = None) -> np.array: - - excluded_slices = excluded_slices or [] - assert isinstance(excluded_slices, (int, list)) - excluded_slices = excluded_slices if isinstance(excluded_slices, list) else [excluded_slices] - - state = self._state - - if excluded_slices: - # Todo: Is there a cleaner way? - # inds = list(range(self._state.shape[0])) - # excluded_slices = [inds[x] if x < 0 else x for x in excluded_slices] - # state = self._state[[x for x in inds if x not in excluded_slices]] - - # Yes there is! - bool_array = np.full(self._state.shape[0], True) - bool_array[excluded_slices] = False - state = self._state[bool_array] - - free_cells = np.argwhere(state.sum(0) == h.IS_FREE_CELL) - np.random.shuffle(free_cells) - return free_cells - - def calculate_reward(self, agent_states: List[AgentState]) -> (int, dict): - # Returns: Reward, Info - raise NotImplementedError - - def render(self, mode='human'): - raise NotImplementedError - - def save_params(self, filepath: Path): - # noinspection PyProtectedMember - # d = {key: val._asdict() if hasattr(val, '_asdict') else val for key, val in self.__dict__.items() - d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')} - filepath.parent.mkdir(parents=True, exist_ok=True) - with filepath.open('w') as f: - yaml.dump(d, f) - # pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL) diff --git a/environments/factory/renderer.py b/environments/factory/renderer.py index fcc01a6..1f0374e 100644 --- a/environments/factory/renderer.py +++ b/environments/factory/renderer.py @@ -53,7 +53,7 @@ class Renderer: def blit_params(self, entity): r, c = entity.pos - img = self.assets[entity.name] + img = self.assets[entity.name.lower()] if entity.value_operation == 'opacity': img.set_alpha(255*entity.value) elif entity.value_operation == 'scale': diff --git a/environments/factory/simple_factory.py b/environments/factory/simple_factory.py index f92af26..f73c396 100644 --- a/environments/factory/simple_factory.py +++ b/environments/factory/simple_factory.py @@ -3,13 +3,17 @@ import random import numpy as np -from environments.factory.base_factory import BaseFactory + from environments import helpers as h +from environments.helpers import Constants as c +from environments.factory.base.base_factory import BaseFactory +from environments.factory.base.objects import Agent, Action, Object, Slice +from environments.factory.base.registers import Entities from environments.factory.renderer import Renderer, Entity -from environments.utility_classes import AgentState, MovementProperties +from environments.utility_classes import MovementProperties -DIRT_INDEX = -1 +DIRT = "dirt" CLEAN_UP_ACTION = 'clean_up' @@ -26,95 +30,104 @@ class DirtProperties(NamedTuple): class SimpleFactory(BaseFactory): @property - def additional_actions(self) -> List[str]: - return [CLEAN_UP_ACTION] + def additional_actions(self) -> List[Object]: + return [Action(CLEAN_UP_ACTION)] + + @property + def additional_entities(self) -> Union[Entities, List[Entities]]: + return [] + + @property + def additional_slices(self) -> List[Slice]: + return [Slice('dirt', np.zeros(self._level_shape))] def _is_clean_up_action(self, action: Union[str, int]): if isinstance(action, str): action = self._actions.by_name(action) - return self._actions[action] == CLEAN_UP_ACTION + return self._actions[action].name == CLEAN_UP_ACTION def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), verbose=False, **kwargs): self.dirt_properties = dirt_properties self.verbose = verbose - self.max_dirt = 20 self._renderer = None # expensive - don't use it when not required ! - super(SimpleFactory, self).__init__(*args, additional_slices=['dirt'], **kwargs) + super(SimpleFactory, self).__init__(*args, **kwargs) + + def _flush_state(self): + super(SimpleFactory, self)._flush_state() + self._obs_cube[self._slices.get_idx_by_name(DIRT)] = self._slices.by_name(DIRT).slice def render(self, mode='human'): if not self._renderer: # lazy init - height, width = self._state.shape[1:] + height, width = self._obs_cube.shape[1:] self._renderer = Renderer(width, height, view_radius=self.pomdp_radius, fps=5) - - dirt = [Entity('dirt', [x, y], min(0.15 + self._state[DIRT_INDEX, x, y], 1.5), 'scale') - for x, y in np.argwhere(self._state[DIRT_INDEX] > h.IS_FREE_CELL)] + dirt_slice = self._slices.by_name(DIRT).slice + dirt = [Entity('dirt', tile.pos, min(0.15 + dirt_slice[tile.pos], 1.5), 'scale') + for tile in [tile for tile in self._tiles if dirt_slice[tile.pos]]] walls = [Entity('wall', pos) - for pos in np.argwhere(self._state[self._state_slices.by_name(h.LEVEL)] > h.IS_FREE_CELL)] + for pos in np.argwhere(self._slices.by_enum(c.LEVEL).slice == c.OCCUPIED_CELL.value)] def asset_str(agent): - if any([x is None for x in [self._state_slices[j] for j in agent.collisions]]): - print('error') - cols = ' '.join([self._state_slices[j] for j in agent.collisions]) - if h.AGENT in cols: + # What does this abonimation do? + # if any([x is None for x in [self._slices[j] for j in agent.collisions]]): + # print('error') + col_names = [x.name for x in agent.temp_collisions] + if c.AGENT.value in col_names: return 'agent_collision', 'blank' - elif not agent.action_valid or 'level' in cols or h.AGENT in cols: - return h.AGENT, 'invalid' - elif self._is_clean_up_action(agent.action): - return h.AGENT, 'valid' + elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names: + return c.AGENT.value, 'invalid' + elif self._is_clean_up_action(agent.temp_action): + return c.AGENT.value, 'valid' else: - return h.AGENT, 'idle' + return c.AGENT.value, 'idle' agents = [] - for i, agent in enumerate(self._agent_states): + for i, agent in enumerate(self._agents): name, state = asset_str(agent) agents.append(Entity(name, agent.pos, 1, 'none', state, i+1)) doors = [] - if self.has_doors: - for i, door in enumerate(self._door_states): + if self.parse_doors: + for i, door in enumerate(self._doors): name, state = 'door_open' if door.is_open else 'door_closed', 'blank' agents.append(Entity(name, door.pos, 1, 'none', state, i+1)) self._renderer.render(dirt+walls+agents+doors) def spawn_dirt(self) -> None: - if not np.argwhere(self._state[DIRT_INDEX] != h.IS_FREE_CELL).shape[0] > self.dirt_properties.max_global_amount: - free_for_dirt = self.free_cells(excluded_slices=DIRT_INDEX) + dirt_slice = self._slices.by_name(DIRT).slice + # dirty_tiles = [tile for tile in self._tiles if dirt_slice[tile.pos]] + curr_dirt_amount = dirt_slice.sum() + if not curr_dirt_amount > self.dirt_properties.max_global_amount: + free_for_dirt = self._tiles.empty_tiles # randomly distribute dirt across the grid n_dirt_tiles = int(random.uniform(0, self.dirt_properties.max_spawn_ratio) * len(free_for_dirt)) - for x, y in free_for_dirt[:n_dirt_tiles]: - new_value = self._state[DIRT_INDEX, x, y] + self.dirt_properties.gain_amount - self._state[DIRT_INDEX, x, y] = max(new_value, self.dirt_properties.max_local_amount) + for tile in free_for_dirt[:n_dirt_tiles]: + new_value = dirt_slice[tile.pos] + self.dirt_properties.gain_amount + dirt_slice[tile.pos] = min(new_value, self.dirt_properties.max_local_amount) else: pass - def clean_up(self, pos: (int, int)) -> ((int, int), bool): - new_dirt_amount = self._state[DIRT_INDEX][pos] - self.dirt_properties.clean_amount - cleanup_was_sucessfull: bool - if self._state[DIRT_INDEX][pos] == h.IS_FREE_CELL: - cleanup_was_sucessfull = False - return pos, cleanup_was_sucessfull + def clean_up(self, agent: Agent) -> bool: + dirt_slice = self._slices.by_name(DIRT).slice + if dirt_slice[agent.pos]: + new_dirt_amount = dirt_slice[agent.pos] - self.dirt_properties.clean_amount + dirt_slice[agent.pos] = max(new_dirt_amount, c.FREE_CELL.value) + return True else: - cleanup_was_sucessfull = True - self._state[DIRT_INDEX][pos] = max(new_dirt_amount, h.IS_FREE_CELL) - return pos, cleanup_was_sucessfull + return False - def step(self, actions): - _, reward, done, info = super(SimpleFactory, self).step(actions) + def post_step(self) -> dict: if not self._next_dirt_spawn: self.spawn_dirt() self._next_dirt_spawn = self.dirt_properties.spawn_frequency else: self._next_dirt_spawn -= 1 + return {} - obs = self._get_observations() - return obs, reward, done, info - - def do_additional_actions(self, agent_i: int, action: int) -> ((int, int), bool): + def do_additional_actions(self, agent: Agent, action: int) -> bool: if action != self._actions.is_moving_action(action): if self._is_clean_up_action(action): - agent_i_pos = self.agent_i_position(agent_i) - _, valid = self.clean_up(agent_i_pos) - return agent_i_pos, valid + valid = self.clean_up(agent) + return valid else: raise RuntimeError('This should not happen!!!') else: @@ -122,19 +135,21 @@ class SimpleFactory(BaseFactory): def reset(self) -> (np.ndarray, int, bool, dict): _ = super().reset() # state, reward, done, info ... = - dirt_slice = np.zeros((1, *self._state.shape[1:])) - self._state = np.concatenate((self._state, dirt_slice)) # dirt is now the last slice self.spawn_dirt() self._next_dirt_spawn = self.dirt_properties.spawn_frequency obs = self._get_observations() return obs - def calculate_reward(self, agent_states: List[AgentState]) -> (int, dict): + def calculate_reward(self) -> (int, dict): info_dict = dict() - current_dirt_amount = self._state[DIRT_INDEX].sum() - dirty_tiles = np.argwhere(self._state[DIRT_INDEX] != h.IS_FREE_CELL).shape[0] + + dirt_slice = self._slices.by_name(DIRT).slice + dirty_tiles = [dirt_slice[tile.pos] for tile in self._tiles if dirt_slice[tile.pos]] + current_dirt_amount = sum(dirty_tiles) + dirty_tile_count = len(dirty_tiles) + info_dict.update(dirt_amount=current_dirt_amount) - info_dict.update(dirty_tile_count=dirty_tiles) + info_dict.update(dirty_tile_count=dirty_tile_count) try: # penalty = current_dirt_amount @@ -142,52 +157,47 @@ class SimpleFactory(BaseFactory): except (ZeroDivisionError, RuntimeWarning): reward = 0 - for agent_state in agent_states: - agent_name = f'{h.AGENT.capitalize()} {agent_state.i}' - cols = agent_state.collisions + for agent in self._agents: + if agent.temp_collisions: + self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}') - list_of_collisions = [self._state_slices[entity] for entity in cols - if entity != self._state_slices.by_name('dirt')] - - if list_of_collisions: - self.print(f't = {self._steps}\t{agent_name} has collisions with {list_of_collisions}') - - if self._is_clean_up_action(agent_state.action): - if agent_state.action_valid: + if self._is_clean_up_action(agent.temp_action): + if agent.temp_valid: reward += 1 - self.print(f'{agent_name} did just clean up some dirt at {agent_state.pos}.') + self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.') info_dict.update(dirt_cleaned=1) else: reward -= 0.01 - self.print(f'{agent_name} just tried to clean up some dirt at {agent_state.pos}, but failed.') - info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_action': 1}) - info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_dirt_cleanup': 1}) + self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.') + info_dict.update({f'{agent.name}_failed_action': 1}) + info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1}) - elif self._actions.is_moving_action(agent_state.action): - if agent_state.action_valid: + elif self._actions.is_moving_action(agent.temp_action): + if agent.temp_valid: # info_dict.update(movement=1) reward -= 0.00 else: # self.print('collision') - reward -= 0.01 + reward -= 0.05 + self.print(f'{agent.name} just hit the wall at {agent.pos}.') + info_dict.update({f'{agent.name}_vs_LEVEL': 1}) - elif self._actions.is_door_usage(agent_state.action): - if agent_state.action_valid: - reward += 0.1 - self.print(f'{agent_name} did just use the door at {agent_state.pos}.') + elif self._actions.is_door_usage(agent.temp_action): + if agent.temp_valid: + self.print(f'{agent.name} did just use the door at {agent.pos}.') info_dict.update(door_used=1) else: - self.print(f'{agent_name} just tried to use a door at {agent_state.pos}, but failed.') - info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_action': 1}) - info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_door_open': 1}) + reward -= 0.01 + self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.') + info_dict.update({f'{agent.name}_failed_action': 1}) + info_dict.update({f'{agent.name}_failed_door_open': 1}) else: info_dict.update(no_op=1) reward -= 0.00 - for entity in list_of_collisions: - entity = h.AGENT if h.AGENT in entity else entity - info_dict.update({f'{h.AGENT}_{agent_state.i}_vs_{entity}': 1}) + for other_agent in agent.temp_collisions: + info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1}) self.print(f"reward is {reward}") # Potential based rewards -> @@ -205,13 +215,13 @@ if __name__ == '__main__': move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True) dirt_props = DirtProperties() factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=10, - combin_agent_slices_in_obs=True, level_name='rooms', + combin_agent_slices_in_obs=False, level_name='rooms', parse_doors=True, pomdp_radius=3) n_actions = factory.action_space.n - 1 _ = factory.observation_space - for epoch in range(10000): + for epoch in range(100): random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)] env_state = factory.reset() r = 0 diff --git a/environments/helpers.py b/environments/helpers.py index fca3a92..fc6a514 100644 --- a/environments/helpers.py +++ b/environments/helpers.py @@ -1,27 +1,46 @@ from collections import defaultdict -from typing import Tuple +from enum import Enum, auto +from typing import Tuple, Union import numpy as np from pathlib import Path + # Constants -WALL = '#' -DOOR = 'D' -DANGER_ZONE = 'x' +class Constants(Enum): + WALL = '#' + DOOR = 'D' + DANGER_ZONE = 'x' + LEVEL = 'level' + AGENT = 'Agent' + FREE_CELL = 0 + OCCUPIED_CELL = 1 + + DOORS = 'doors' + IS_CLOSED_DOOR = 1 + IS_OPEN_DOOR = -1 + + LEVEL_IDX = 0 + + ACTION = auto() + COLLISIONS = auto() + VALID = True + NOT_VALID = False + + def __bool__(self): + return bool(self.value) + + LEVELS_DIR = 'levels' -LEVEL = 'level' -AGENT = 'agent' -IS_FREE_CELL = 0 -IS_OCCUPIED_CELL = 1 - -DOORS = 'doors' -IS_CLOSED_DOOR = IS_OCCUPIED_CELL -IS_OPEN_DOOR = -1 - -LEVEL_IDX = 0 TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles'] -IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count'] +IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount', + 'dirty_tile_count', 'terminal_observation', 'episode'] + +MANHATTAN_MOVES = ['north', 'east', 'south', 'west'] +DIAGONAL_MOVES = ['north_east', 'south_east', 'south_west', 'north_west'] + +NO_POS = (-9999, -9999) ACTIONMAP = defaultdict(lambda: (0, 0), dict(north=(-1, 0), east=(0, 1), south=(1, 0), west=(0, -1), @@ -38,8 +57,7 @@ HORIZONTAL_DOOR_ZONE_2 = np.asarray([[0, 0, 0], [0, 0, 0], [1, 1, 1]]) VERTICAL_DOOR_ZONE_1 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]]) VERTICAL_DOOR_ZONE_2 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]]) -NOT_VALID = False -VALID = True + # Utility functions @@ -51,10 +69,13 @@ def parse_level(path): return level -def one_hot_level(level, wall_char=WALL): +def one_hot_level(level, wall_char: Union[Constants, str] = Constants.WALL): grid = np.array(level) binary_grid = np.zeros(grid.shape, dtype=np.int8) - binary_grid[grid == wall_char] = 1 + if wall_char in Constants: + binary_grid[grid == wall_char.value] = Constants.OCCUPIED_CELL.value + else: + binary_grid[grid == wall_char] = Constants.OCCUPIED_CELL.value return binary_grid @@ -70,7 +91,7 @@ def check_position(slice_to_check_against: np.ndarray, position_to_check: Tuple[ # Check for collision with level walls valid = valid and not slice_to_check_against[x_pos, y_pos] - return valid + return Constants.VALID if valid else Constants.NOT_VALID if __name__ == '__main__': diff --git a/environments/logging/monitor.py b/environments/logging/monitor.py index 48d9d7d..93786d0 100644 --- a/environments/logging/monitor.py +++ b/environments/logging/monitor.py @@ -1,5 +1,6 @@ import pickle from pathlib import Path +from typing import List, Dict from stable_baselines3.common.callbacks import BaseCallback @@ -66,13 +67,15 @@ class MonitorCallback(BaseCallback): print('Plotting done.') self.closed = True - def _on_step(self) -> bool: - for _, info in enumerate(self.locals.get('infos', [])): + def _on_step(self, alt_infos: List[Dict] = None, alt_dones: List[bool] = None) -> bool: + infos = alt_infos or self.locals.get('infos', []) + dones = alt_dones or self.locals.get('dones', None) or self.locals.get('done', [None]) + for _, info in enumerate(infos): self._monitor_dict[self.num_timesteps] = {key: val for key, val in info.items() - if key not in ['terminal_observation', 'episode']} + if key not in ['terminal_observation', 'episode'] + and not key.startswith('rec_')} - for env_idx, done in list(enumerate(self.locals.get('dones', []))) + \ - list(enumerate(self.locals.get('done', []))): + for env_idx, done in enumerate(dones): if done: env_monitor_df = pd.DataFrame.from_dict(self._monitor_dict, orient='index') self._monitor_dict = dict() diff --git a/environments/logging/recorder.py b/environments/logging/recorder.py new file mode 100644 index 0000000..dec9ee1 --- /dev/null +++ b/environments/logging/recorder.py @@ -0,0 +1,74 @@ +import json +from pathlib import Path +from typing import Union + +import pandas as pd +from stable_baselines3.common.callbacks import BaseCallback + +from environments.factory.base.base_factory import REC_TAC +from environments.helpers import IGNORED_DF_COLUMNS + + +class RecorderCallback(BaseCallback): + + def __init__(self, filepath: Union[str, Path], occupation_map: bool = False, trajectory_map: bool = False): + super(RecorderCallback, self).__init__() + self.trajectory_map = trajectory_map + self.occupation_map = occupation_map + self.filepath = Path(filepath) + self._recorder_dict = dict() + self._recorder_df = pd.DataFrame() + self.started = False + self.closed = False + + def _on_step(self) -> bool: + for _, info in enumerate(self.locals.get('infos', [])): + self._recorder_dict[self.num_timesteps] = {key: val for key, val in info.items() + if not key.startswith(f'{REC_TAC}_')} + + for env_idx, done in list(enumerate(self.locals.get('dones', []))) + \ + list(enumerate(self.locals.get('done', []))): + if done: + env_monitor_df = pd.DataFrame.from_dict(self._recorder_dict, orient='index') + self._recorder_dict = dict() + columns = [col for col in env_monitor_df.columns if col not in IGNORED_DF_COLUMNS] + env_monitor_df = env_monitor_df.aggregate( + {col: 'mean' if col.endswith('ount') else 'sum' for col in columns} + ) + env_monitor_df['episode'] = len(self._recorder_df) + self._recorder_df = self._recorder_df.append([env_monitor_df]) + else: + pass + return True + + def __enter__(self): + self._on_training_start() + + def __exit__(self, exc_type, exc_val, exc_tb): + self._on_training_end() + + def _on_training_start(self) -> None: + if self.started: + pass + else: + self.filepath.parent.mkdir(exist_ok=True, parents=True) + self.started = True + pass + + def _on_training_end(self) -> None: + if self.closed: + pass + else: + # self.out_file.unlink(missing_ok=True) + with self.filepath.open('w') as f: + json_df = self._recorder_df.to_json(orient="table") + parsed = json.loads(json_df) + json.dump(parsed, f, indent=4) + + if self.occupation_map: + print('Recorder files were dumped to disk, now plotting the occupation map...') + + if self.trajectory_map: + print('Recorder files were dumped to disk, now plotting the occupation map...') + + self.closed = True \ No newline at end of file diff --git a/environments/oo_factory/_base_factory.py b/environments/oo_factory/_base_factory.py deleted file mode 100644 index 5fe2091..0000000 --- a/environments/oo_factory/_base_factory.py +++ /dev/null @@ -1,68 +0,0 @@ -from typing import List, Union - -import gym - - -class Entities(): - - def __init__(self): - pass - - -# noinspection PyAttributeOutsideInit -class BaseFactory(gym.Env): - - def __enter__(self): - return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack) - - def __exit__(self, exc_type, exc_val, exc_tb): - self.close() - - def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_radius: Union[None, int] = 0, - movement_properties: MovementProperties = MovementProperties(), - combin_agent_slices_in_obs: bool = False, frames_to_stack=0, - omit_agent_slice_in_obs=False, **kwargs): - assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \ - (not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \ - 'Both options are exclusive' - assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1." - - self.movement_properties = movement_properties - self.level_name = level_name - - self.n_agents = n_agents - self.max_steps = max_steps - self.pomdp_radius = pomdp_radius - self.combin_agent_slices_in_obs = combin_agent_slices_in_obs - self.omit_agent_slice_in_obs = omit_agent_slice_in_obs - self.frames_to_stack = frames_to_stack - - self.done_at_collision = False - - self._state_slices = StateSlices() - level_filepath = Path(__file__).parent / h.LEVELS_DIR / f'{self.level_name}.txt' - parsed_level = h.parse_level(level_filepath) - self._level = h.one_hot_level(parsed_level) - parsed_doors = h.one_hot_level(parsed_level, h.DOOR) - if parsed_doors.any(): - self._doors = parsed_doors - level_slices = ['level', 'doors'] - can_use_doors = True - else: - level_slices = ['level'] - can_use_doors = False - offset = len(level_slices) - self._state_slices.register_additional_items([*level_slices, - *[f'agent#{i}' for i in range(offset, n_agents + offset)]]) - if 'additional_slices' in kwargs: - self._state_slices.register_additional_items(kwargs.get('additional_slices')) - self._zones = Zones(parsed_level) - - self._actions = Actions(self.movement_properties, can_use_doors=can_use_doors) - self._actions.register_additional_items(self.additional_actions) - self.reset() - - - def step(self, actions: Union[int, List[int]]): - actions = actions if isinstance(actions, list) else [actions] - self.entities.step() \ No newline at end of file diff --git a/environments/utility_classes.py b/environments/utility_classes.py index 98eb061..ea7128b 100644 --- a/environments/utility_classes.py +++ b/environments/utility_classes.py @@ -1,298 +1,7 @@ -from typing import Union, List, NamedTuple, Tuple -import numpy as np - -from environments import helpers as h - - -IS_CLOSED = 'CLOSED' -IS_OPEN = 'OPEN' +from typing import NamedTuple class MovementProperties(NamedTuple): allow_square_movement: bool = True allow_diagonal_movement: bool = False allow_no_op: bool = False - - -# Preperations for Entities (not used yet) -class Entity: - - @property - def pos(self): - return self._pos - - @property - def identifier(self): - return self._identifier - - def __init__(self, identifier, pos): - self._pos = pos - self._identifier = identifier - - -class Door(Entity): - - @property - def is_closed(self): - return self._state == IS_CLOSED - - @property - def is_open(self): - return self._state == IS_OPEN - - @property - def status(self): - return self._state - - def __init__(self, *args, closed_on_init=True, **kwargs): - super(Door, self).__init__(*args, **kwargs) - self._state = IS_CLOSED if closed_on_init else IS_OPEN - - def use(self): - self._state: str = IS_CLOSED if self._state == IS_OPEN else IS_OPEN - pass - - -class Agent(Entity): - - @property - def direction_of_vision(self): - return self._direction_of_vision - - def __init__(self, *args, **kwargs): - super(Agent, self).__init__(*args, **kwargs) - self._direction_of_vision = (None, None) - - def move(self, new_pos: Tuple[int, int]): - x_old, y_old = self.pos - self._pos = new_pos - x_new, y_new = new_pos - self._direction_of_vision = (x_old-x_new, y_old-y_new) - return self.pos - - -class AgentState: - - @property - def collisions(self): - return np.argwhere(self.collision_vector != 0).flatten() - - @property - def direction_of_view(self): - last_x, last_y = self._last_pos - curr_x, curr_y = self.pos - return last_x-curr_x, last_y-curr_y - - def __init__(self, i: int, action: int, pos=None): - self.i = i - self.action = action - - self.collision_vector = None - self.action_valid = None - self.pos = pos - self._last_pos = (-1, -1) - - def update(self, **kwargs): # is this hacky?? o.0 - last_pos = self.pos - for key, value in kwargs.items(): - if hasattr(self, key): - self.__setattr__(key, value) - else: - raise AttributeError(f'"{key}" cannot be updated, this attr is not a part of {self.__name__}') - if self.action_valid and last_pos != self.pos: - self._last_pos = last_pos - - def reset(self): - self.__init__(self.i, self.action) - - -class DoorState: - - def __init__(self, i: int, pos: Tuple[int, int], closed_on_init=True, auto_close_interval=10): - self.i = i - self.pos = pos - self._state = self._state = IS_CLOSED if closed_on_init else IS_OPEN - self.auto_close_interval = auto_close_interval - self.time_to_close = -1 - - @property - def is_closed(self): - return self._state == IS_CLOSED - - @property - def is_open(self): - return self._state == IS_OPEN - - @property - def status(self): - return self._state - - def use(self): - if self._state == IS_OPEN: - self._state = IS_CLOSED - else: - self._state = IS_OPEN - self.time_to_close = self.auto_close_interval - -class Register: - - @property - def n(self): - return len(self) - - def __init__(self): - self._register = dict() - - def __len__(self): - return len(self._register) - - def __add__(self, other: str): - assert isinstance(other, str), f'All item names have to be of type {str}' - self._register.update({len(self._register): other}) - return self - - def register_additional_items(self, others: List[str]): - for other in others: - self + other - return self - - def keys(self): - return self._register.keys() - - def values(self): - return self._register.values() - - def items(self): - return self._register.items() - - def __getitem__(self, item): - try: - return self._register[item] - except KeyError: - print('NO') - raise - - def by_name(self, item): - return list(self._register.keys())[list(self._register.values()).index(item)] - - def __repr__(self): - return f'{self.__class__.__name__}({self._register})' - - -class Agents(Register): - - def __init__(self, n_agents): - super(Agents, self).__init__() - self.register_additional_items([f'agent#{i}' for i in range(n_agents)]) - self._agents = [Agent(x, (-1, -1)) for x in self.keys()] - pass - - def __getitem__(self, item): - return self._agents[item] - - def get_name(self, item): - return self._register[item] - - def by_name(self, item): - return self[super(Agents, self).by_name(item)] - - def __add__(self, other): - super(Agents, self).__add__(other) - self._agents.append(Agent(len(self)+1, (-1, -1))) - - -class Actions(Register): - - @property - def movement_actions(self): - return self._movement_actions - - def __init__(self, movement_properties: MovementProperties, can_use_doors=False): - self.allow_no_op = movement_properties.allow_no_op - self.allow_diagonal_movement = movement_properties.allow_diagonal_movement - self.allow_square_movement = movement_properties.allow_square_movement - self.can_use_doors = can_use_doors - super(Actions, self).__init__() - - if self.allow_square_movement: - self.register_additional_items(['north', 'east', 'south', 'west']) - if self.allow_diagonal_movement: - self.register_additional_items(['north_east', 'south_east', 'south_west', 'north_west']) - self._movement_actions = self._register.copy() - if self.can_use_doors: - self.register_additional_items(['use_door']) - if self.allow_no_op: - self.register_additional_items(['no-op']) - - def is_moving_action(self, action: Union[str, int]): - if isinstance(action, str): - return action in self.movement_actions.values() - else: - return self[action] in self.movement_actions.values() - - def is_no_op(self, action: Union[str, int]): - if isinstance(action, str): - action = self.by_name(action) - return self[action] == 'no-op' - - def is_door_usage(self, action: Union[str, int]): - if isinstance(action, str): - action = self.by_name(action) - return self[action] == 'use_door' - - -class StateSlices(Register): - - @property - def AGENTSTARTIDX(self): - if self._agent_start_idx: - return self._agent_start_idx - else: - self._agent_start_idx = min([idx for idx, x in self.items() if h.AGENT in x]) - return self._agent_start_idx - - def __init__(self): - super(StateSlices, self).__init__() - self._agent_start_idx = None - - -class Zones(Register): - - @property - def danger_zone(self): - return self._zone_slices[self.by_name(h.DANGER_ZONE)] - - @property - def accounting_zones(self): - return [self[idx] for idx, name in self.items() if name != h.DANGER_ZONE] - - def __init__(self, parsed_level): - super(Zones, self).__init__() - slices = list() - self._accounting_zones = list() - self._danger_zones = list() - for symbol in np.unique(parsed_level): - if symbol == h.WALL: - continue - elif symbol == h.DANGER_ZONE: - self + symbol - slices.append(h.one_hot_level(parsed_level, symbol)) - self._danger_zones.append(symbol) - else: - self + symbol - slices.append(h.one_hot_level(parsed_level, symbol)) - self._accounting_zones.append(symbol) - - self._zone_slices = np.stack(slices) - - def __getitem__(self, item): - return self._zone_slices[item] - - def get_name(self, item): - return self._register[item] - - def by_name(self, item): - return self[super(Zones, self).by_name(item)] - - def register_additional_items(self, other: Union[str, List[str]]): - raise AttributeError('You are not allowed to add additional Zones in runtime.') diff --git a/main.py b/main.py index 92fc8a4..c53275d 100644 --- a/main.py +++ b/main.py @@ -9,11 +9,12 @@ import pandas as pd from stable_baselines3.common.callbacks import CallbackList -from environments.factory.base_factory import MovementProperties from environments.factory.simple_factory import DirtProperties, SimpleFactory from environments.helpers import IGNORED_DF_COLUMNS from environments.logging.monitor import MonitorCallback from environments.logging.plotting import prepare_plot +from environments.logging.recorder import RecorderCallback +from environments.utility_classes import MovementProperties warnings.filterwarnings('ignore', category=FutureWarning) warnings.filterwarnings('ignore', category=UserWarning) @@ -91,8 +92,8 @@ if __name__ == '__main__': from algorithms.reg_dqn import RegDQN # from sb3_contrib import QRDQN - dirt_props = DirtProperties(clean_amount=3, gain_amount=0.2, max_global_amount=30, - max_local_amount=5, spawn_frequency=1, max_spawn_ratio=0.05) + dirt_props = DirtProperties(clean_amount=3, gain_amount=1, max_global_amount=30, + max_local_amount=5, spawn_frequency=3, max_spawn_ratio=0.05) move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True, allow_no_op=False) @@ -103,9 +104,10 @@ if __name__ == '__main__': for modeL_type in [A2C, PPO, RegDQN, DQN]: # , QRDQN]: for seed in range(3): - with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, + with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=False, movement_properties=move_props, level_name='rooms', frames_to_stack=4, - omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True) as env: + omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True, record_episodes=False + ) as env: if modeL_type.__name__ in ["PPO", "A2C"]: kwargs = dict(ent_coef=0.01) @@ -127,10 +129,13 @@ if __name__ == '__main__': out_path /= identifier callbacks = CallbackList( - [MonitorCallback(filepath=out_path / f'monitor_{identifier}.pick', plotting=False)] + [MonitorCallback(filepath=out_path / f'monitor_{identifier}.pick', plotting=False), + RecorderCallback(filepath=out_path / f'recorder_{identifier}.json', occupation_map=False, + trajectory_map=False + )] ) - model.learn(total_timesteps=int(1e5), callback=callbacks) + model.learn(total_timesteps=int(5e5), callback=callbacks) save_path = out_path / f'model_{identifier}.zip' save_path.parent.mkdir(parents=True, exist_ok=True) diff --git a/reload_agent.py b/reload_agent.py index faffb44..cf5313f 100644 --- a/reload_agent.py +++ b/reload_agent.py @@ -14,7 +14,7 @@ warnings.filterwarnings('ignore', category=UserWarning) if __name__ == '__main__': - model_name = 'A2C_1623923982' + model_name = 'PPO_1626075586' run_id = 0 out_path = Path(__file__).parent / 'debug_out' model_path = out_path / model_name