Restructuring and Testing Done

2025-07-06 17:41:36 +02:00 · 2021-07-13 11:12:03 +02:00
parent eee4760e72
commit 35f5bdeed4
14 changed files with 1160 additions and 842 deletions
--- a/environments/factory/base/init.py
+++ b/environments/factory/base/init.py
--- a/environments/factory/base/base_factory.py
+++ b/environments/factory/base/base_factory.py
@ -0,0 +1,370 @@
 from pathlib import Path
 from typing import List, Union, Iterable
 import gym
 import numpy as np
 from gym import spaces
 import yaml
 from gym.wrappers import FrameStack
 from environments.helpers import Constants as c, Constants
 from environments import helpers as h
 from environments.factory.base.objects import Slice, Agent, Tile, Action, MoveableEntity
 from environments.factory.base.registers import StateSlices, Actions, Entities, Agents, Doors, FloorTiles
 from environments.utility_classes import MovementProperties
 REC_TAC = 'rec'
 # noinspection PyAttributeOutsideInit
 class BaseFactory(gym.Env):
    @property
    def action_space(self):
        return spaces.Discrete(self._actions.n)
    @property
    def observation_space(self):
        agent_slice = self.n_agents if self.omit_agent_slice_in_obs else 0
        agent_slice = (self.n_agents - 1) if self.combin_agent_slices_in_obs else agent_slice
        if self.pomdp_radius:
            shape = (self._obs_cube.shape[0] - agent_slice, self.pomdp_radius * 2 + 1, self.pomdp_radius * 2 + 1)
            space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
            return space
        else:
            shape = [x-agent_slice if idx == 0 else x for idx, x in enumerate(self._obs_cube.shape)]
            space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
            return space
    @property
    def pomdp_diameter(self):
        return self.pomdp_radius * 2 + 1
    @property
    def movement_actions(self):
        return self._actions.movement_actions
    @property
    def additional_actions(self) -> Union[str, List[str]]:
        """
        When heriting from this Base Class, you musst implement this methode!!!
        :return:            A list of Actions-object holding all additional actions.
        :rtype:             List[Action]
        """
        raise NotImplementedError('Please register additional actions ')
    @property
    def additional_entities(self) -> Union[Entities, List[Entities]]:
        """
        When heriting from this Base Class, you musst implement this methode!!!
        :return:            A single Entites collection or a list of such.
        :rtype:             Union[Entities, List[Entities]]
        """
        raise NotImplementedError('Please register additional entities.')
    @property
    def additional_slices(self) -> Union[Slice, List[Slice]]:
        """
        When heriting from this Base Class, you musst implement this methode!!!
        :return:            A list of Slice-objects.
        :rtype:             List[Slice]
        """
        raise NotImplementedError('Please register additional slices.')
    def __enter__(self):
        return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()
    def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_radius: Union[None, int] = 0,
                 movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
                 combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False,
                 omit_agent_slice_in_obs=False, done_at_collision=False, **kwargs):
        assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
               (not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
               'Both options are exclusive'
        assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
        # Attribute Assignment
        self.movement_properties = movement_properties
        self.level_name = level_name
        self._level_shape = None
        self.n_agents = n_agents
        self.max_steps = max_steps
        self.pomdp_radius = pomdp_radius
        self.combin_agent_slices_in_obs = combin_agent_slices_in_obs
        self.omit_agent_slice_in_obs = omit_agent_slice_in_obs
        self.frames_to_stack = frames_to_stack
        self.done_at_collision = done_at_collision
        self.record_episodes = record_episodes
        self.parse_doors = parse_doors
        # Actions
        self._actions = Actions(self.movement_properties, can_use_doors=self.parse_doors)
        if additional_actions := self.additional_actions:
            self._actions.register_additional_items(additional_actions)
        self.reset()
    def _init_state_slices(self) -> StateSlices:
        state_slices = StateSlices()
        # Objects
        # Level
        level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt'
        parsed_level = h.parse_level(level_filepath)
        level = [Slice(c.LEVEL.name, h.one_hot_level(parsed_level))]
        self._level_shape = level[0].shape
        # Doors
        parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
        doors = [Slice(c.DOORS.value, parsed_doors)] if parsed_doors.any() and self.parse_doors else []
        # Agents
        agents = []
        for i in range(self.n_agents):
            agents.append(Slice(f'{c.AGENT.name}#{i}', np.zeros_like(level[0].slice)))
        state_slices.register_additional_items(level+doors+agents)
        # Additional Slices from SubDomains
        if additional_slices := self.additional_slices:
            state_slices.register_additional_items(additional_slices)
        return state_slices
    def _init_obs_cube(self) -> np.ndarray:
        x, y = self._slices.by_enum(c.LEVEL).shape
        state = np.zeros((len(self._slices), x, y))
        state[0] = self._slices.by_enum(c.LEVEL).slice
        if r := self.pomdp_radius:
            self._padded_obs_cube = np.full((len(self._slices), x + r*2, y + r*2), c.FREE_CELL.value)
            self._padded_obs_cube[0] = c.OCCUPIED_CELL.value
            self._padded_obs_cube[:, r:r+x, r:r+y] = state
        return state
    def _init_entities(self):
        # Tile Init
        self._tiles = FloorTiles.from_argwhere_coordinates(self._slices.by_enum(c.LEVEL).free_tiles)
        # Door Init
        if self.parse_doors:
            tiles = [self._tiles.by_pos(x) for x in self._slices.by_enum(c.DOORS).occupied_tiles]
            self._doors = Doors.from_tiles(tiles, context=self._tiles)
        # Agent Init on random positions
        self._agents = Agents.from_tiles(np.random.choice(self._tiles, self.n_agents))
        entities = Entities()
        entities.register_additional_items([self._agents])
        if self.parse_doors:
            entities.register_additional_items([self._doors])
        if additional_entities := self.additional_entities:
            entities.register_additional_items([additional_entities])
        return entities
    def reset(self) -> (np.ndarray, int, bool, dict):
        self._slices = self._init_state_slices()
        self._obs_cube = self._init_obs_cube()
        self._entitites = self._init_entities()
        self._flush_state()
        self._steps = 0
        info = self._summarize_state() if self.record_episodes else {}
        return None, None, None, info
    def pre_step(self) -> None:
        pass
    def post_step(self) -> dict:
        pass
    def step(self, actions):
        actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions
        assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
        self._steps += 1
        done = False
        # Pre step Hook for later use
        self.pre_step()
        # Move this in a seperate function?
        for action, agent in zip(actions, self._agents):
            agent.clear_temp_sate()
            action_name = self._actions[action]
            if self._actions.is_moving_action(action):
                valid = self._move_or_colide(agent, action_name)
            elif self._actions.is_no_op(action):
                valid = c.VALID.value
            elif self._actions.is_door_usage(action):
                # Check if agent raly stands on a door:
                if door := self._doors.by_pos(agent.pos):
                    door.use()
                    valid = c.VALID.value
                # When he doesn't...
                else:
                    valid = c.NOT_VALID.value
            else:
                valid = self.do_additional_actions(agent, action)
            agent.temp_action = action
            agent.temp_valid = valid
        self._flush_state()
        tiles_with_collisions = self.get_all_tiles_with_collisions()
        for tile in tiles_with_collisions:
            guests = tile.guests_that_can_collide
            for i, guest in enumerate(guests):
                this_collisions = guests[:]
                del this_collisions[i]
                guest.temp_collisions = this_collisions
        if self.done_at_collision and tiles_with_collisions:
            done = True
        # Step the door close intervall
        if self.parse_doors:
            self._doors.tick_doors()
        # Finalize
        reward, info = self.calculate_reward()
        if self._steps >= self.max_steps:
            done = True
        info.update(step_reward=reward, step=self._steps)
        if self.record_episodes:
            info.update(self._summarize_state())
        # Post step Hook for later use
        info.update(self.post_step())
        obs = self._get_observations()
        return obs, reward, done, info
    def _flush_state(self):
        self._obs_cube[np.arange(len(self._slices)) != self._slices.get_idx(c.LEVEL)] = c.FREE_CELL.value
        if self.parse_doors:
            for door in self._doors:
                if door.is_open:
                    self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.IS_OPEN_DOOR.value
                else:
                    self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.IS_CLOSED_DOOR.value
        for agent in self._agents:
            self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.pos] = c.OCCUPIED_CELL.value
            if agent.last_pos != h.NO_POS:
                self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.last_pos] = c.FREE_CELL.value
    def _get_observations(self) -> np.ndarray:
        if self.n_agents == 1:
            obs = self._build_per_agent_obs(self._agents[0])
        elif self.n_agents >= 2:
            obs = np.stack([self._build_per_agent_obs(agent) for agent in self._agents])
        else:
            raise ValueError('n_agents cannot be smaller than 1!!')
        return obs
    def _build_per_agent_obs(self, agent: Agent) -> np.ndarray:
        first_agent_slice = self._slices.AGENTSTARTIDX
        if r := self.pomdp_radius:
            x, y = self._level_shape
            self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube
            global_x, global_y = agent.pos
            global_x += r
            global_y += r
            x0, x1 = max(0, global_x - self.pomdp_radius), global_x + self.pomdp_radius + 1
            y0, y1 = max(0, global_y - self.pomdp_radius), global_y + self.pomdp_radius + 1
            obs = self._padded_obs_cube[:, x0:x1, y0:y1]
        else:
            obs = self._obs_cube
        if self.omit_agent_slice_in_obs:
            obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val]]
            return obs_new
        else:
            if self.combin_agent_slices_in_obs:
                agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name]],
                                   axis=0, keepdims=True)
                obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
                return obs
            else:
                return obs
    def do_additional_actions(self, agent_i: int, action: int) -> bool:
        raise NotImplementedError
    def get_all_tiles_with_collisions(self) -> List[Tile]:
        tiles_with_collisions = list()
        for tile in self._tiles:
            if tile.is_occupied():
                guests = [guest for guest in tile.guests if guest.can_collide]
                if len(guests) >= 2:
                    tiles_with_collisions.append(tile)
        return tiles_with_collisions
    def _move_or_colide(self, agent: Agent, action: Action) -> Constants:
        new_tile, valid = self._check_agent_move(agent, action)
        if valid:
            # Does not collide width level boundaries
            return agent.move(new_tile)
        else:
            # Agent seems to be trying to collide in this step
            return c.NOT_VALID
    def _check_agent_move(self, agent, action: Action) -> (Tile, bool):
        # Actions
        x_diff, y_diff = h.ACTIONMAP[action.name]
        x_new = agent.x + x_diff
        y_new = agent.y + y_diff
        new_tile = self._tiles.by_pos((x_new, y_new))
        if new_tile:
            valid = c.VALID
        else:
            tile = agent.tile
            valid = c.VALID
            return tile, valid
        if self.parse_doors and agent.last_pos != h.NO_POS:
            if door := self._doors.by_pos(agent.pos):
                if door.is_open:
                    pass
                else:  # door.is_closed:
                    if door.is_linked(agent.last_pos, new_tile.pos):
                        pass
                    else:
                        return agent.tile, c.NOT_VALID
            else:
                pass
        else:
            pass
        return new_tile, valid
    def calculate_reward(self) -> (int, dict):
        # Returns: Reward, Info
        raise NotImplementedError
    def render(self, mode='human'):
        raise NotImplementedError
    def save_params(self, filepath: Path):
        # noinspection PyProtectedMember
        # d = {key: val._asdict() if hasattr(val, '_asdict') else val for key, val in self.__dict__.items()
        d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')}
        filepath.parent.mkdir(parents=True, exist_ok=True)
        with filepath.open('w') as f:
            yaml.dump(d, f)
            # pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL)
    def _summarize_state(self):
        summary = {f'{REC_TAC}_step': self._steps}
        for entity in self._entitites:
            if hasattr(entity, 'summarize_state'):
                summary.update({f'{REC_TAC}_{entity.name}': entity.summarize_state()})
        return summary
--- a/environments/factory/base/objects.py
+++ b/environments/factory/base/objects.py
@ -0,0 +1,266 @@
 import itertools
 import networkx as nx
 import numpy as np
 from environments import helpers as h
 from environments.helpers import Constants as c
 import itertools
 def sub(p, q):
    return p - q
 class Object:
    def __bool__(self):
        return True
    @property
    def i(self):
        return self._identifier
    @property
    def name(self):
        return self._identifier
    def __init__(self, identifier, **kwargs):
        self._identifier = identifier
        if kwargs:
            print(f'Following kwargs were passed, but ignored: {kwargs}')
    def __repr__(self):
        return f'{self.__class__.__name__}({self._identifier})'
 class Action(Object):
    @property
    def name(self):
        return self.i
    def __init__(self, *args):
        super(Action, self).__init__(*args)
 class Slice(Object):
    @property
    def shape(self):
        return self.slice.shape
    @property
    def occupied_tiles(self):
        return np.argwhere(self.slice == c.OCCUPIED_CELL.value)
    @property
    def free_tiles(self):
        return np.argwhere(self.slice == c.FREE_CELL.value)
    def __init__(self, identifier, arrayslice):
        super(Slice, self).__init__(identifier)
        self.slice = arrayslice
 class Wall(Object):
    pass
 class Tile(Object):
    @property
    def guests_that_can_collide(self):
        return [x for x in self.guests if x.can_collide]
    @property
    def guests(self):
        return self._guests.values()
    @property
    def x(self):
        return self.pos[0]
    @property
    def y(self):
        return self.pos[1]
    @property
    def pos(self):
        return self._pos
    def __init__(self, i, pos):
        super(Tile, self).__init__(i)
        self._guests = dict()
        self._pos = tuple(pos)
    def __len__(self):
        return len(self._guests)
    def is_empty(self):
        return not len(self._guests)
    def is_occupied(self):
        return len(self._guests)
    def enter(self, guest):
        if guest.name not in self._guests:
            self._guests.update({guest.name: guest})
            return True
        else:
            return False
    def leave(self, guest):
        try:
            del self._guests[guest.name]
        except (ValueError, KeyError):
            return False
        return True
 class Entity(Object):
    @property
    def can_collide(self):
        return True
    @property
    def encoding(self):
        return 1
    @property
    def x(self):
        return self.pos[0]
    @property
    def y(self):
        return self.pos[1]
    @property
    def pos(self):
        return self._tile.pos
    @property
    def tile(self):
        return self._tile
    def __init__(self, identifier, tile: Tile, **kwargs):
        super(Entity, self).__init__(identifier, **kwargs)
        self._tile = tile
    def summarize_state(self):
        return self.__dict__.copy()
 class MoveableEntity(Entity):
    @property
    def last_tile(self):
        return self._last_tile
    @property
    def last_pos(self):
        if self._last_tile:
            return self._last_tile.pos
        else:
            return h.NO_POS
    @property
    def direction_of_view(self):
        last_x, last_y = self.last_pos
        curr_x, curr_y = self.pos
        return last_x-curr_x, last_y-curr_y
    def __init__(self, *args, **kwargs):
        super(MoveableEntity, self).__init__(*args, **kwargs)
        self._last_tile = None
    def move(self, next_tile):
        curr_tile = self.tile
        if curr_tile != next_tile:
            next_tile.enter(self)
            curr_tile.leave(self)
            self._tile = next_tile
            self._last_tile = curr_tile
            return True
        else:
            return False
 class Door(Entity):
    @property
    def can_collide(self):
        return False
    @property
    def encoding(self):
        return 1 if self.is_closed else -1
    def __init__(self, *args, context, closed_on_init=True, auto_close_interval=500):
        super(Door, self).__init__(*args)
        self._state = c.IS_CLOSED_DOOR
        self.auto_close_interval = auto_close_interval
        self.time_to_close = -1
        neighbor_pos = list(itertools.product([-1, 1, 0], repeat=2))[:-1]
        neighbor_tiles = [context.by_pos(tuple([sum(x) for x in zip(self.pos, diff)])) for diff in neighbor_pos]
        neighbor_pos = [x.pos for x in neighbor_tiles if x]
        possible_connections = itertools.combinations(neighbor_pos, 2)
        self.connectivity = nx.Graph()
        for a, b in possible_connections:
            if not max(abs(np.subtract(a, b))) > 1:
                self.connectivity.add_edge(a, b)
        if not closed_on_init:
            self._open()
    @property
    def is_closed(self):
        return self._state == c.IS_CLOSED_DOOR
    @property
    def is_open(self):
        return self._state == c.IS_OPEN_DOOR
    @property
    def status(self):
        return self._state
    def use(self):
        if self._state == c.IS_OPEN_DOOR:
            self._close()
        else:
            self._open()
    def tick(self):
        if self.is_open and len(self.tile) == 1 and self.time_to_close:
            self.time_to_close -= 1
        elif self.is_open and not self.time_to_close and len(self.tile) == 1:
            self.use()
    def _open(self):
        self.connectivity.add_edges_from([(self.pos, x) for x in self.connectivity.nodes])
        self._state = c.IS_OPEN_DOOR
        self.time_to_close = self.auto_close_interval
    def _close(self):
        self.connectivity.remove_node(self.pos)
        self._state = c.IS_CLOSED_DOOR
    def is_linked(self, old_pos, new_pos):
        try:
            _ = nx.shortest_path(self.connectivity, old_pos, new_pos)
            return True
        except nx.exception.NetworkXNoPath:
            return False
 class Agent(MoveableEntity):
    def __init__(self, *args):
        super(Agent, self).__init__(*args)
        self.clear_temp_sate()
    # noinspection PyAttributeOutsideInit
    def clear_temp_sate(self):
        self.temp_collisions = []
        self.temp_valid = None
        self.temp_action = -1
--- a/environments/factory/base/registers.py
+++ b/environments/factory/base/registers.py
@ -0,0 +1,292 @@
 import itertools
 import random
 from enum import Enum
 from typing import List, Union
 import networkx as nx
 import numpy as np
 from environments.factory.base.objects import Entity, Tile, Agent, Door, Slice, Action
 from environments.utility_classes import MovementProperties
 from environments import helpers as h
 from environments.helpers import Constants as c
 class Register:
    _accepted_objects = Entity
    @classmethod
    def from_argwhere_coordinates(cls, positions: (int, int), tiles):
        entities = [cls._accepted_objects(i, tiles.by_pos(position)) for i, position in enumerate(positions)]
        registered_obj = cls()
        registered_obj.register_additional_items(entities)
        return registered_obj
    @property
    def name(self):
        return self.__class__.__name__
    @property
    def n(self):
        return len(self)
    def __init__(self):
        self._register = dict()
        self._names = dict()
    def __len__(self):
        return len(self._register)
    def __iter__(self):
        return iter(self.values())
    def __add__(self, other: _accepted_objects):
        assert isinstance(other, self._accepted_objects), f'All item names have to be of type ' \
                                                          f'{self._accepted_objects}, ' \
                                                          f'but were {other.__class__}.,'
        self._names.update({other.name: len(self._register)})
        self._register.update({len(self._register): other})
        return self
    def register_additional_items(self, others: List[_accepted_objects]):
        for other in others:
            self + other
        return self
    def keys(self):
        return self._register.keys()
    def values(self):
        return self._register.values()
    def items(self):
        return self._register.items()
    def __getitem__(self, item):
        try:
            return self._register[item]
        except KeyError:
            print('NO')
            raise
    def by_name(self, item):
        return self[self._names[item]]
    def by_enum(self, enum: Enum):
        return self[self._names[enum.name]]
    def __repr__(self):
        return f'{self.__class__.__name__}({self._register})'
    def get_name(self, item):
        return self._register[item].name
    def get_idx_by_name(self, item):
        return self._names[item]
    def get_idx(self, enum: Enum):
        return self._names[enum.name]
    @classmethod
    def from_tiles(cls, tiles, **kwargs):
        entities = [cls._accepted_objects(f'{cls._accepted_objects.__name__.upper()}#{i}', tile, **kwargs)
                    for i, tile in enumerate(tiles)]
        registered_obj = cls()
        registered_obj.register_additional_items(entities)
        return registered_obj
 class EntityRegister(Register):
    @classmethod
    def from_argwhere_coordinates(cls, argwhere_coordinates):
        tiles = cls()
        tiles.register_additional_items([cls._accepted_objects(i, pos) for i, pos in enumerate(argwhere_coordinates)])
        return tiles
    def __init__(self):
        super(EntityRegister, self).__init__()
        self._tiles = dict()
    def __add__(self, other):
        super(EntityRegister, self).__add__(other)
        self._tiles[other.pos] = other
    def by_pos(self, pos):
        if isinstance(pos, np.ndarray):
            pos = tuple(pos)
        try:
            return self._tiles[pos]
        except KeyError:
            return None
 class Entities(Register):
    _accepted_objects = Register
    def __init__(self):
        super(Entities, self).__init__()
    def __iter__(self):
        return iter([x for sublist in self.values() for x in sublist])
    @classmethod
    def from_argwhere_coordinates(cls, positions):
        raise AttributeError()
 class FloorTiles(EntityRegister):
    _accepted_objects = Tile
    @property
    def occupied_tiles(self):
        tiles = [tile for tile in self if tile.is_occupied()]
        random.shuffle(tiles)
        return tiles
    @property
    def empty_tiles(self):
        tiles = [tile for tile in self if tile.is_empty()]
        random.shuffle(tiles)
        return tiles
 class Agents(Register):
    _accepted_objects = Agent
    @property
    def positions(self):
        return [agent.pos for agent in self]
 class Doors(EntityRegister):
    _accepted_objects = Door
    def tick_doors(self):
        for door in self:
            door.tick()
 class Actions(Register):
    _accepted_objects = Action
    @property
    def movement_actions(self):
        return self._movement_actions
    def __init__(self, movement_properties: MovementProperties, can_use_doors=False):
        self.allow_no_op = movement_properties.allow_no_op
        self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
        self.allow_square_movement = movement_properties.allow_square_movement
        self.can_use_doors = can_use_doors
        super(Actions, self).__init__()
        if self.allow_square_movement:
            self.register_additional_items([self._accepted_objects(direction) for direction in h.MANHATTAN_MOVES])
        if self.allow_diagonal_movement:
            self.register_additional_items([self._accepted_objects(direction) for direction in h.DIAGONAL_MOVES])
        self._movement_actions = self._register.copy()
        if self.can_use_doors:
            self.register_additional_items([self._accepted_objects('use_door')])
        if self.allow_no_op:
            self.register_additional_items([self._accepted_objects('no-op')])
    def is_moving_action(self, action: Union[int]):
        #if isinstance(action, Action):
        #    return (action.name in h.MANHATTAN_MOVES and self.allow_square_movement) or \
        #           (action.name in h.DIAGONAL_MOVES and self.allow_diagonal_movement)
        #else:
        return action in self.movement_actions.keys()
    def is_no_op(self, action: Union[str, int]):
        if isinstance(action, str):
            action = self.by_name(action)
        return self[action].name == 'no-op'
    def is_door_usage(self, action: Union[str, int]):
        if isinstance(action, str):
            action = self.by_name(action)
        return self[action].name == 'use_door'
 class StateSlices(Register):
    _accepted_objects = Slice
    @property
    def AGENTSTARTIDX(self):
        if self._agent_start_idx:
            return self._agent_start_idx
        else:
            self._agent_start_idx = min([idx for idx, x in self.items() if c.AGENT.name in x.name])
            return self._agent_start_idx
    def __init__(self):
        super(StateSlices, self).__init__()
        self._agent_start_idx = None
    def _gather_occupation(self, excluded_slices):
        exclusion = excluded_slices or []
        assert isinstance(exclusion, (int, list))
        exclusion = exclusion if isinstance(exclusion, list) else [exclusion]
        result = np.sum([x for i, x in self.items() if i not in exclusion], axis=0)
        return result
    def free_cells(self, excluded_slices: Union[None, List[int], int] = None) -> np.array:
        occupation = self._gather_occupation(excluded_slices)
        free_cells = np.argwhere(occupation == c.IS_FREE_CELL)
        np.random.shuffle(free_cells)
        return free_cells
    def occupied_cells(self, excluded_slices: Union[None, List[int], int] = None) -> np.array:
        occupation = self._gather_occupation(excluded_slices)
        occupied_cells = np.argwhere(occupation == c.IS_OCCUPIED_CELL.value)
        np.random.shuffle(occupied_cells)
        return occupied_cells
 class Zones(Register):
    @property
    def danger_zone(self):
        return self._zone_slices[self.by_enum(c.DANGER_ZONE)]
    @property
    def accounting_zones(self):
        return [self[idx] for idx, name in self.items() if name != c.DANGER_ZONE.value]
    def __init__(self, parsed_level):
        raise NotImplementedError('This needs a Rework')
        super(Zones, self).__init__()
        slices = list()
        self._accounting_zones = list()
        self._danger_zones = list()
        for symbol in np.unique(parsed_level):
            if symbol == h.WALL:
                continue
            elif symbol == h.DANGER_ZONE:
                self + symbol
                slices.append(h.one_hot_level(parsed_level, symbol))
                self._danger_zones.append(symbol)
            else:
                self + symbol
                slices.append(h.one_hot_level(parsed_level, symbol))
                self._accounting_zones.append(symbol)
        self._zone_slices = np.stack(slices)
    def __getitem__(self, item):
        return self._zone_slices[item]
    def get_name(self, item):
        return self._register[item]
    def by_name(self, item):
        return self[super(Zones, self).by_name(item)]
    def register_additional_items(self, other: Union[str, List[str]]):
        raise AttributeError('You are not allowed to add additional Zones in runtime.')
--- a/environments/factory/base_factory.py
+++ b/environments/factory/base_factory.py
@ -1,364 +0,0 @@
 from pathlib import Path
 from typing import List, Union, Iterable
 import gym
 import numpy as np
 from gym import spaces
 import yaml
 from gym.wrappers import FrameStack
 from environments import helpers as h
 from environments.utility_classes import Actions, StateSlices, AgentState, MovementProperties, Zones, DoorState
 # noinspection PyAttributeOutsideInit
 class BaseFactory(gym.Env):
    @property
    def action_space(self):
        return spaces.Discrete(self._actions.n)
    @property
    def observation_space(self):
        agent_slice = self.n_agents if self.omit_agent_slice_in_obs else 0
        agent_slice = (self.n_agents - 1) if self.combin_agent_slices_in_obs else agent_slice
        if self.pomdp_radius:
            shape = (self._state.shape[0] - agent_slice, self.pomdp_radius * 2 + 1, self.pomdp_radius * 2 + 1)
            space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
            return space
        else:
            shape = [x-agent_slice if idx == 0 else x for idx, x in enumerate(self._state.shape)]
            space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
            return space
    @property
    def movement_actions(self):
        return self._actions.movement_actions
    @property
    def has_doors(self):
        return hasattr(self, '_doors')
    def __enter__(self):
        return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()
    def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_radius: Union[None, int] = 0,
                 movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
                 combin_agent_slices_in_obs: bool = False, frames_to_stack=0,
                 omit_agent_slice_in_obs=False, **kwargs):
        assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
               (not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
            'Both options are exclusive'
        assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
        self.movement_properties = movement_properties
        self.level_name = level_name
        self.n_agents = n_agents
        self.max_steps = max_steps
        self.pomdp_radius = pomdp_radius
        self.combin_agent_slices_in_obs = combin_agent_slices_in_obs
        self.omit_agent_slice_in_obs = omit_agent_slice_in_obs
        self.frames_to_stack = frames_to_stack
        self.done_at_collision = False
        self._state_slices = StateSlices()
        # Level
        level_filepath = Path(__file__).parent / h.LEVELS_DIR / f'{self.level_name}.txt'
        parsed_level = h.parse_level(level_filepath)
        self._level = h.one_hot_level(parsed_level)
        level_slices = [h.LEVEL]
        # Doors
        if parse_doors:
            parsed_doors = h.one_hot_level(parsed_level, h.DOOR)
            if parsed_doors.any():
                self._doors = parsed_doors
                level_slices.append(h.DOORS)
        # Agents
        offset = len(level_slices)
        self._state_slices.register_additional_items([*level_slices,
                                                      *[f'agent#{i}' for i in range(offset, n_agents + offset)]])
        # Additional Slices from SubDomains
        if 'additional_slices' in kwargs:
            self._state_slices.register_additional_items(kwargs.get('additional_slices'))
        self._zones = Zones(parsed_level)
        self._actions = Actions(self.movement_properties, can_use_doors=self.has_doors)
        self._actions.register_additional_items(self.additional_actions)
        self.reset()
    @property
    def additional_actions(self) -> Union[str, List[str]]:
        """
        When heriting from this Base Class, you musst implement this methode!!!
        Please return a dict with the given types -> {int: str}.
        The int should start at 0.
        :return:            An Actions-object holding all actions with keys in range 0-n.
        :rtype:             Actions
        """
        raise NotImplementedError('Please register additional actions ')
    def reset(self) -> (np.ndarray, int, bool, dict):
        slices = [np.expand_dims(self._level, 0)]
        self._steps = 0
        self._agent_states = list()
        # Door Init
        if self.has_doors:
            self._door_states = [DoorState(i, tuple(pos)) for i, pos
                                 in enumerate(np.argwhere(self._doors == h.IS_OCCUPIED_CELL))]
            slices.append(np.expand_dims(self._doors, 0))
        # Agent placement ...
        floor_tiles = np.argwhere(self._level == h.IS_FREE_CELL)
        # ... on random positions
        np.random.shuffle(floor_tiles)
        agents = np.zeros((self.n_agents, *self._level.shape), dtype=np.int8)
        for i, (x, y) in enumerate(floor_tiles[:self.n_agents]):
            agents[i, x, y] = h.IS_OCCUPIED_CELL
            agent_state = AgentState(i, -1, pos=(x, y))
            self._agent_states.append(agent_state)
        slices.append(agents)
        # GLOBAL STATE
        self._state = np.concatenate(slices, axis=0)
        return None
    def _get_observations(self) -> np.ndarray:
        if self.n_agents == 1:
            obs = self._build_per_agent_obs(0)
        elif self.n_agents >= 2:
            obs = np.stack([self._build_per_agent_obs(agent_i) for agent_i in range(self.n_agents)])
        else:
            raise ValueError('n_agents cannot be smaller than 1!!')
        return obs
    def _build_per_agent_obs(self, agent_i: int) -> np.ndarray:
        first_agent_slice = self._state_slices.AGENTSTARTIDX
        # Todo: make this more efficient!
        if self.pomdp_radius:
            pomdp_diameter = self.pomdp_radius * 2 + 1
            global_x, global_y = self._agent_states[agent_i].pos
            x0, x1 = max(0, global_x - self.pomdp_radius), global_x + self.pomdp_radius + 1
            y0, y1 = max(0, global_y - self.pomdp_radius), global_y + self.pomdp_radius + 1
            obs = self._state[:, x0:x1, y0:y1]
            if obs.shape[1] != pomdp_diameter or obs.shape[2] != pomdp_diameter:
                obs_padded = np.full((obs.shape[0], pomdp_diameter, pomdp_diameter), h.IS_OCCUPIED_CELL)
                local_x, local_y = np.argwhere(obs[first_agent_slice + agent_i] == h.IS_OCCUPIED_CELL)[0]
                obs_padded[:,
                abs(local_x-self.pomdp_radius):abs(local_x-self.pomdp_radius)+obs.shape[1],
                abs(local_y-self.pomdp_radius):abs(local_y-self.pomdp_radius)+obs.shape[2]] = obs
                obs = obs_padded
        else:
            obs = self._state
        if self.omit_agent_slice_in_obs:
            obs_new = obs[[key for key, val in self._state_slices.items() if h.AGENT not in val]]
            return obs_new
        else:
            if self.combin_agent_slices_in_obs:
                agent_obs = np.sum(obs[[key for key, val in self._state_slices.items() if 'agent' in val]],
                                   axis=0, keepdims=True)
                obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
                return obs
            else:
                return obs
    def do_additional_actions(self, agent_i: int, action: int) -> ((int, int), bool):
        raise NotImplementedError
    def step(self, actions):
        actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions
        assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
        self._steps += 1
        done = False
        # Move this in a seperate function?
        for agent_i, action in enumerate(actions):
            agent = self._agent_states[agent_i]
            if self._actions.is_moving_action(action):
                pos, valid = self.move_or_colide(agent_i, action)
            elif self._actions.is_no_op(action):
                pos, valid = agent.pos, h.VALID
            elif self._actions.is_door_usage(action):
                # Check if agent raly stands on a door:
                if self._state[self._state_slices.by_name(h.DOORS)][agent.pos] in [h.IS_OCCUPIED_CELL, ]:
                    door = [door for door in self._door_states if door.pos == self._agent_states[agent_i].pos][0]
                    door.use()
                    pos, valid = self._agent_states[agent_i].pos, h.VALID
                # When he doesn't...
                else:
                    pos, valid = self._agent_states[agent_i].pos, h.NOT_VALID
            else:
                pos, valid = self.do_additional_actions(agent_i, action)
            # Update state accordingly
            self._agent_states[agent_i].update(pos=pos, action_valid=valid, action=action)
        for i, collision_vec in enumerate(self.check_all_collisions(self._agent_states, self._state.shape[0])):
            self._agent_states[i].update(collision_vector=collision_vec)
            if self.done_at_collision and collision_vec.any():
                done = True
        # Step the door close intervall
        agents_pos = [agent.pos for agent in self._agent_states]
        if self.has_doors:
            for door_i, door in enumerate(self._door_states):
                if door.is_open and door.time_to_close and door.pos not in agents_pos:
                    door.time_to_close -= 1
                elif door.is_open and not door.time_to_close and door.pos not in agents_pos:
                    door.use()
                    self._state[self._state_slices.by_name(h.DOORS)] = 1 if door.is_closed else -1
        reward, info = self.calculate_reward(self._agent_states)
        if self._steps >= self.max_steps:
            done = True
        info.update(step_reward=reward, step=self._steps)
        return None, reward, done, info
    def check_all_collisions(self, agent_states: List[AgentState], collisions: int) -> np.ndarray:
        collision_vecs = np.zeros((len(agent_states), collisions))  # n_agents x n_slices
        for agent_state in agent_states:
            # Register only collisions of moving agents
            if self._actions.is_moving_action(agent_state.action):
                collision_vecs[agent_state.i] = self.check_collisions(agent_state)
        return collision_vecs
    def check_collisions(self, agent_state: AgentState) -> np.ndarray:
        pos_x, pos_y = agent_state.pos
        # FixMe: We need to find a way to spare out some dimensions, eg. an info dimension etc... a[?,]
        #  https://numpy.org/doc/stable/reference/arrays.indexing.html#boolean-array-indexing
        collisions_vec = self._state[:, pos_x, pos_y].copy()                 # "vertical fiber" at position of agent i
        collisions_vec[self._state_slices.AGENTSTARTIDX + agent_state.i] = h.IS_FREE_CELL   # no self-collisions
        if 'door' in self._state_slices.values():
            collisions_vec[self._state_slices.by_name('doors')] = h.IS_FREE_CELL            # no door-collisions
        if agent_state.action_valid:
            # All well, no collision.
            # Place a function hook here if needed.
            pass
        else:
            # Place a marker to indicate a collision with the level boundrys
            collisions_vec[self._state_slices.by_name(h.LEVEL)] = h.IS_OCCUPIED_CELL
        return collisions_vec
    def do_move(self, agent_i: int, old_pos: (int, int), new_pos: (int, int)) -> None:
        (x, y), (x_new, y_new) = old_pos, new_pos
        self._state[agent_i + self._state_slices.AGENTSTARTIDX, x, y] = h.IS_FREE_CELL
        self._state[agent_i + self._state_slices.AGENTSTARTIDX, x_new, y_new] = h.IS_OCCUPIED_CELL
    def move_or_colide(self, agent_i: int, action: int) -> ((int, int), bool):
        old_pos, new_pos, valid = self._check_agent_move(agent_i=agent_i, action=self._actions[action])
        if valid:
            # Does not collide width level boundaries
            self.do_move(agent_i, old_pos, new_pos)
            return new_pos, valid
        else:
            # Agent seems to be trying to collide in this step
            return old_pos, valid
    def _check_agent_move(self, agent_i, action: str):
        agent_slice_idx = self._state_slices.AGENTSTARTIDX + agent_i
        agent_slice = self._state[agent_slice_idx]  # horizontal slice from state tensor
        agent_pos = np.argwhere(agent_slice == 1)
        if len(agent_pos) > 1:
            raise AssertionError('Only one agent per slice is allowed.')
        x, y = agent_pos[0]
        # Actions
        x_diff, y_diff = h.ACTIONMAP[action]
        x_new = x + x_diff
        y_new = y + y_diff
        if self.has_doors and self._agent_states[agent_i]._last_pos != (-1, -1):
            door = [door for door in self._door_states if door.pos == (x, y)]
            if door:
                door = door[0]
                if door.is_open:
                    pass
                else:  # door.is_closed:
                    local_door_map = self._state[self._state_slices.by_name(h.LEVEL)][door.pos[0]-1:door.pos[0]+2,
                                                                                      door.pos[1]-1:door.pos[1]+2]
                    local_agent_map = np.zeros_like(local_door_map)
                    local_agent_map[tuple(np.subtract(door.pos, self._agent_states[agent_i]._last_pos))] += 1
                    local_agent_map[tuple(np.subtract(door.pos, (x_new, y_new)))] += 1
                    if np.all(local_door_map == h.HORIZONTAL_DOOR_MAP):
                        # This is a horizontal Door Configuration
                        if np.sum(local_agent_map[0]) >= 2 or np.sum(local_agent_map[-1]) >= 2:
                            # The Agent goes back to where he came from
                            pass
                        else:
                            # The Agent tries to go through a closed door
                            return (x, y), (x, y), h.NOT_VALID
                    else:
                        # This is a vertical Door Configuration
                        if np.sum(local_agent_map[:, 0]) >= 2 or np.sum(local_agent_map[:, -1]) >= 2:
                            # The Agent goes back to where he came from
                            pass
                        else:
                            # The Agent tries to go through a closed door
                            return (x, y), (x, y), h.NOT_VALID
            else:
                pass
        else:
            pass
        valid = h.check_position(self._state[self._state_slices.by_name(h.LEVEL)], (x_new, y_new))
        return (x, y), (x_new, y_new), valid
    def agent_i_position(self, agent_i: int) -> (int, int):
        positions = np.argwhere(self._state[self._state_slices.AGENTSTARTIDX + agent_i] == h.IS_OCCUPIED_CELL)
        assert positions.shape[0] == 1
        pos_x, pos_y = positions[0]  # a.flatten()
        return pos_x, pos_y
    def free_cells(self, excluded_slices: Union[None, List[int], int] = None) -> np.array:
        excluded_slices = excluded_slices or []
        assert isinstance(excluded_slices, (int, list))
        excluded_slices = excluded_slices if isinstance(excluded_slices, list) else [excluded_slices]
        state = self._state
        if excluded_slices:
            # Todo: Is there a cleaner way?
            #  inds = list(range(self._state.shape[0]))
            #  excluded_slices = [inds[x] if x < 0 else x for x in excluded_slices]
            #  state = self._state[[x for x in inds if x not in excluded_slices]]
            # Yes there is!
            bool_array = np.full(self._state.shape[0], True)
            bool_array[excluded_slices] = False
            state = self._state[bool_array]
        free_cells = np.argwhere(state.sum(0) == h.IS_FREE_CELL)
        np.random.shuffle(free_cells)
        return free_cells
    def calculate_reward(self, agent_states: List[AgentState]) -> (int, dict):
        # Returns: Reward, Info
        raise NotImplementedError
    def render(self, mode='human'):
        raise NotImplementedError
    def save_params(self, filepath: Path):
        # noinspection PyProtectedMember
        # d = {key: val._asdict() if hasattr(val, '_asdict') else val for key, val in self.__dict__.items()
        d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')}
        filepath.parent.mkdir(parents=True, exist_ok=True)
        with filepath.open('w') as f:
            yaml.dump(d, f)
            # pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL)
--- a/environments/factory/renderer.py
+++ b/environments/factory/renderer.py
@ -53,7 +53,7 @@ class Renderer:
    def blit_params(self, entity):
        r, c = entity.pos
-        img = self.assets[entity.name]
+        img = self.assets[entity.name.lower()]
        if entity.value_operation == 'opacity':
            img.set_alpha(255*entity.value)
        elif entity.value_operation == 'scale':
--- a/environments/factory/simple_factory.py
+++ b/environments/factory/simple_factory.py
@ -3,13 +3,17 @@ import random
 import numpy as np
-from environments.factory.base_factory import BaseFactory
+
 from environments import helpers as h
 from environments.helpers import Constants as c
 from environments.factory.base.base_factory import BaseFactory
 from environments.factory.base.objects import Agent, Action, Object, Slice
 from environments.factory.base.registers import Entities
 from environments.factory.renderer import Renderer, Entity
-from environments.utility_classes import AgentState, MovementProperties
+from environments.utility_classes import MovementProperties
-DIRT_INDEX = -1
+DIRT = "dirt"
 CLEAN_UP_ACTION = 'clean_up'
@ -26,95 +30,104 @@ class DirtProperties(NamedTuple):
 class SimpleFactory(BaseFactory):
    @property
-    def additional_actions(self) -> List[str]:
+    def additional_actions(self) -> List[Object]:
-        return [CLEAN_UP_ACTION]
+        return [Action(CLEAN_UP_ACTION)]
    @property
    def additional_entities(self) -> Union[Entities, List[Entities]]:
        return []
    @property
    def additional_slices(self) -> List[Slice]:
        return [Slice('dirt', np.zeros(self._level_shape))]
    def _is_clean_up_action(self, action: Union[str, int]):
        if isinstance(action, str):
            action = self._actions.by_name(action)
-        return self._actions[action] == CLEAN_UP_ACTION
+        return self._actions[action].name == CLEAN_UP_ACTION
    def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), verbose=False, **kwargs):
        self.dirt_properties = dirt_properties
        self.verbose = verbose
        self.max_dirt = 20
        self._renderer = None  # expensive - don't use it when not required !
-        super(SimpleFactory, self).__init__(*args, additional_slices=['dirt'], **kwargs)
+        super(SimpleFactory, self).__init__(*args, **kwargs)
    def _flush_state(self):
        super(SimpleFactory, self)._flush_state()
        self._obs_cube[self._slices.get_idx_by_name(DIRT)] = self._slices.by_name(DIRT).slice
    def render(self, mode='human'):
        if not self._renderer:  # lazy init
-            height, width = self._state.shape[1:]
+            height, width = self._obs_cube.shape[1:]
            self._renderer = Renderer(width, height, view_radius=self.pomdp_radius, fps=5)
-
+        dirt_slice = self._slices.by_name(DIRT).slice
-        dirt = [Entity('dirt', [x, y], min(0.15 + self._state[DIRT_INDEX, x, y], 1.5), 'scale')
+        dirt = [Entity('dirt', tile.pos, min(0.15 + dirt_slice[tile.pos], 1.5), 'scale')
-                for x, y in np.argwhere(self._state[DIRT_INDEX] > h.IS_FREE_CELL)]
+                for tile in [tile for tile in self._tiles if dirt_slice[tile.pos]]]
        walls = [Entity('wall', pos)
-                 for pos in np.argwhere(self._state[self._state_slices.by_name(h.LEVEL)] > h.IS_FREE_CELL)]
+                 for pos in np.argwhere(self._slices.by_enum(c.LEVEL).slice == c.OCCUPIED_CELL.value)]
        def asset_str(agent):
-            if any([x is None for x in [self._state_slices[j] for j in agent.collisions]]):
+            # What does this abonimation do?
-                print('error')
+            # if any([x is None for x in [self._slices[j] for j in agent.collisions]]):
-            cols = ' '.join([self._state_slices[j] for j in agent.collisions])
+            #     print('error')
-            if h.AGENT in cols:
+            col_names = [x.name for x in agent.temp_collisions]
            if c.AGENT.value in col_names:
                return 'agent_collision', 'blank'
-            elif not agent.action_valid or 'level' in cols or h.AGENT in cols:
+            elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names:
-                return h.AGENT, 'invalid'
+                return c.AGENT.value, 'invalid'
-            elif self._is_clean_up_action(agent.action):
+            elif self._is_clean_up_action(agent.temp_action):
-                return h.AGENT, 'valid'
+                return c.AGENT.value, 'valid'
            else:
-                return h.AGENT, 'idle'
+                return c.AGENT.value, 'idle'
        agents = []
-        for i, agent in enumerate(self._agent_states):
+        for i, agent in enumerate(self._agents):
            name, state = asset_str(agent)
            agents.append(Entity(name, agent.pos, 1, 'none', state, i+1))
        doors = []
-        if self.has_doors:
+        if self.parse_doors:
-            for i, door in enumerate(self._door_states):
+            for i, door in enumerate(self._doors):
                name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
                agents.append(Entity(name, door.pos, 1, 'none', state, i+1))
        self._renderer.render(dirt+walls+agents+doors)
    def spawn_dirt(self) -> None:
-        if not np.argwhere(self._state[DIRT_INDEX] != h.IS_FREE_CELL).shape[0] > self.dirt_properties.max_global_amount:
+        dirt_slice = self._slices.by_name(DIRT).slice
-            free_for_dirt = self.free_cells(excluded_slices=DIRT_INDEX)
+        # dirty_tiles = [tile for tile in self._tiles if dirt_slice[tile.pos]]
        curr_dirt_amount = dirt_slice.sum()
        if not curr_dirt_amount > self.dirt_properties.max_global_amount:
            free_for_dirt = self._tiles.empty_tiles
            # randomly distribute dirt across the grid
            n_dirt_tiles = int(random.uniform(0, self.dirt_properties.max_spawn_ratio) * len(free_for_dirt))
-            for x, y in free_for_dirt[:n_dirt_tiles]:
+            for tile in free_for_dirt[:n_dirt_tiles]:
-                new_value = self._state[DIRT_INDEX, x, y] + self.dirt_properties.gain_amount
+                new_value = dirt_slice[tile.pos] + self.dirt_properties.gain_amount
-                self._state[DIRT_INDEX, x, y] = max(new_value, self.dirt_properties.max_local_amount)
+                dirt_slice[tile.pos] = min(new_value, self.dirt_properties.max_local_amount)
        else:
            pass
-    def clean_up(self, pos: (int, int)) -> ((int, int), bool):
+    def clean_up(self, agent: Agent) -> bool:
-        new_dirt_amount = self._state[DIRT_INDEX][pos] - self.dirt_properties.clean_amount
+        dirt_slice = self._slices.by_name(DIRT).slice
-        cleanup_was_sucessfull: bool
+        if dirt_slice[agent.pos]:
-        if self._state[DIRT_INDEX][pos] == h.IS_FREE_CELL:
+            new_dirt_amount = dirt_slice[agent.pos] - self.dirt_properties.clean_amount
-            cleanup_was_sucessfull = False
+            dirt_slice[agent.pos] = max(new_dirt_amount, c.FREE_CELL.value)
-            return pos, cleanup_was_sucessfull
+            return True
        else:
-            cleanup_was_sucessfull = True
+            return False
            self._state[DIRT_INDEX][pos] = max(new_dirt_amount, h.IS_FREE_CELL)
            return pos, cleanup_was_sucessfull
-    def step(self, actions):
+    def post_step(self) -> dict:
        _, reward, done, info = super(SimpleFactory, self).step(actions)
        if not self._next_dirt_spawn:
            self.spawn_dirt()
            self._next_dirt_spawn = self.dirt_properties.spawn_frequency
        else:
            self._next_dirt_spawn -= 1
        return {}
-        obs = self._get_observations()
+    def do_additional_actions(self, agent: Agent, action: int) -> bool:
        return obs, reward, done, info
    def do_additional_actions(self, agent_i: int, action: int) -> ((int, int), bool):
        if action != self._actions.is_moving_action(action):
            if self._is_clean_up_action(action):
-                agent_i_pos = self.agent_i_position(agent_i)
+                valid = self.clean_up(agent)
-                _, valid = self.clean_up(agent_i_pos)
+                return valid
                return agent_i_pos, valid
            else:
                raise RuntimeError('This should not happen!!!')
        else:
@ -122,19 +135,21 @@ class SimpleFactory(BaseFactory):
    def reset(self) -> (np.ndarray, int, bool, dict):
        _ = super().reset()  # state, reward, done, info ... =
        dirt_slice = np.zeros((1, *self._state.shape[1:]))
        self._state = np.concatenate((self._state, dirt_slice))  # dirt is now the last slice
        self.spawn_dirt()
        self._next_dirt_spawn = self.dirt_properties.spawn_frequency
        obs = self._get_observations()
        return obs
-    def calculate_reward(self, agent_states: List[AgentState]) -> (int, dict):
+    def calculate_reward(self) -> (int, dict):
        info_dict = dict()
-        current_dirt_amount = self._state[DIRT_INDEX].sum()
+
-        dirty_tiles = np.argwhere(self._state[DIRT_INDEX] != h.IS_FREE_CELL).shape[0]
+        dirt_slice = self._slices.by_name(DIRT).slice
        dirty_tiles = [dirt_slice[tile.pos] for tile in self._tiles if dirt_slice[tile.pos]]
        current_dirt_amount = sum(dirty_tiles)
        dirty_tile_count = len(dirty_tiles)
        info_dict.update(dirt_amount=current_dirt_amount)
-        info_dict.update(dirty_tile_count=dirty_tiles)
+        info_dict.update(dirty_tile_count=dirty_tile_count)
        try:
            # penalty = current_dirt_amount
@ -142,52 +157,47 @@ class SimpleFactory(BaseFactory):
        except (ZeroDivisionError, RuntimeWarning):
            reward = 0
-        for agent_state in agent_states:
+        for agent in self._agents:
-            agent_name = f'{h.AGENT.capitalize()} {agent_state.i}'
+            if agent.temp_collisions:
-            cols = agent_state.collisions
+                self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
-            list_of_collisions = [self._state_slices[entity] for entity in cols
+            if self._is_clean_up_action(agent.temp_action):
-                                  if entity != self._state_slices.by_name('dirt')]
+                if agent.temp_valid:
            if list_of_collisions:
                self.print(f't = {self._steps}\t{agent_name} has collisions with {list_of_collisions}')
            if self._is_clean_up_action(agent_state.action):
                if agent_state.action_valid:
                    reward += 1
-                    self.print(f'{agent_name} did just clean up some dirt at {agent_state.pos}.')
+                    self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
                    info_dict.update(dirt_cleaned=1)
                else:
                    reward -= 0.01
-                    self.print(f'{agent_name} just tried to clean up some dirt at {agent_state.pos}, but failed.')
+                    self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
-                    info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_action': 1})
+                    info_dict.update({f'{agent.name}_failed_action': 1})
-                    info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_dirt_cleanup': 1})
+                    info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
-            elif self._actions.is_moving_action(agent_state.action):
+            elif self._actions.is_moving_action(agent.temp_action):
-                if agent_state.action_valid:
+                if agent.temp_valid:
                    # info_dict.update(movement=1)
                    reward -= 0.00
                else:
                    # self.print('collision')
-                    reward -= 0.01
+                    reward -= 0.05
                    self.print(f'{agent.name} just hit the wall at {agent.pos}.')
                    info_dict.update({f'{agent.name}_vs_LEVEL': 1})
-            elif self._actions.is_door_usage(agent_state.action):
+            elif self._actions.is_door_usage(agent.temp_action):
-                if agent_state.action_valid:
+                if agent.temp_valid:
-                    reward += 0.1
+                    self.print(f'{agent.name} did just use the door at {agent.pos}.')
                    self.print(f'{agent_name} did just use the door at {agent_state.pos}.')
                    info_dict.update(door_used=1)
                else:
-                    self.print(f'{agent_name} just tried to use a door at {agent_state.pos}, but failed.')
+                    reward -= 0.01
-                    info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_action': 1})
+                    self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
-                    info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_door_open': 1})
+                    info_dict.update({f'{agent.name}_failed_action': 1})
                    info_dict.update({f'{agent.name}_failed_door_open': 1})
            else:
                info_dict.update(no_op=1)
                reward -= 0.00
-            for entity in list_of_collisions:
+            for other_agent in agent.temp_collisions:
-                entity = h.AGENT if h.AGENT in entity else entity
+                info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1})
                info_dict.update({f'{h.AGENT}_{agent_state.i}_vs_{entity}': 1})
        self.print(f"reward is {reward}")
        # Potential based rewards ->
@ -205,13 +215,13 @@ if __name__ == '__main__':
    move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True)
    dirt_props = DirtProperties()
    factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=10,
-                            combin_agent_slices_in_obs=True, level_name='rooms',
+                            combin_agent_slices_in_obs=False, level_name='rooms', parse_doors=True,
                            pomdp_radius=3)
    n_actions = factory.action_space.n - 1
    _ = factory.observation_space
-    for epoch in range(10000):
+    for epoch in range(100):
        random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)]
        env_state = factory.reset()
        r = 0
--- a/environments/helpers.py
+++ b/environments/helpers.py
@ -1,27 +1,46 @@
 from collections import defaultdict
-from typing import Tuple
+from enum import Enum, auto
 from typing import Tuple, Union
 import numpy as np
 from pathlib import Path
 # Constants
-WALL = '#'
+class Constants(Enum):
-DOOR = 'D'
+    WALL = '#'
-DANGER_ZONE = 'x'
+    DOOR = 'D'
    DANGER_ZONE = 'x'
    LEVEL = 'level'
    AGENT = 'Agent'
    FREE_CELL = 0
    OCCUPIED_CELL = 1
    DOORS = 'doors'
    IS_CLOSED_DOOR = 1
    IS_OPEN_DOOR = -1
    LEVEL_IDX = 0
    ACTION = auto()
    COLLISIONS = auto()
    VALID = True
    NOT_VALID = False
    def __bool__(self):
        return bool(self.value)
 LEVELS_DIR = 'levels'
 LEVEL = 'level'
 AGENT = 'agent'
 IS_FREE_CELL = 0
 IS_OCCUPIED_CELL = 1
 DOORS = 'doors'
 IS_CLOSED_DOOR = IS_OCCUPIED_CELL
 IS_OPEN_DOOR = -1
 LEVEL_IDX = 0
 TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles']
-IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count']
+IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount',
                      'dirty_tile_count', 'terminal_observation', 'episode']
 MANHATTAN_MOVES = ['north', 'east', 'south', 'west']
 DIAGONAL_MOVES = ['north_east', 'south_east', 'south_west', 'north_west']
 NO_POS = (-9999, -9999)
 ACTIONMAP = defaultdict(lambda: (0, 0), dict(north=(-1, 0), east=(0, 1),
                                             south=(1, 0), west=(0, -1),
@ -38,8 +57,7 @@ HORIZONTAL_DOOR_ZONE_2 = np.asarray([[0, 0, 0], [0, 0, 0], [1, 1, 1]])
 VERTICAL_DOOR_ZONE_1 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
 VERTICAL_DOOR_ZONE_2 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
-NOT_VALID = False
+
 VALID = True
 # Utility functions
@ -51,10 +69,13 @@ def parse_level(path):
    return level
-def one_hot_level(level, wall_char=WALL):
+def one_hot_level(level, wall_char: Union[Constants, str] = Constants.WALL):
    grid = np.array(level)
    binary_grid = np.zeros(grid.shape, dtype=np.int8)
-    binary_grid[grid == wall_char] = 1
+    if wall_char in Constants:
        binary_grid[grid == wall_char.value] = Constants.OCCUPIED_CELL.value
    else:
        binary_grid[grid == wall_char] = Constants.OCCUPIED_CELL.value
    return binary_grid
@ -70,7 +91,7 @@ def check_position(slice_to_check_against: np.ndarray, position_to_check: Tuple[
    # Check for collision with level walls
    valid = valid and not slice_to_check_against[x_pos, y_pos]
-    return valid
+    return Constants.VALID if valid else Constants.NOT_VALID
 if __name__ == '__main__':
--- a/environments/logging/monitor.py
+++ b/environments/logging/monitor.py
@ -1,5 +1,6 @@
 import pickle
 from pathlib import Path
 from typing import List, Dict
 from stable_baselines3.common.callbacks import BaseCallback
@ -66,13 +67,15 @@ class MonitorCallback(BaseCallback):
                print('Plotting done.')
            self.closed = True
-    def _on_step(self) -> bool:
+    def _on_step(self, alt_infos: List[Dict] = None, alt_dones: List[bool] = None) -> bool:
-        for _, info in enumerate(self.locals.get('infos', [])):
+        infos = alt_infos or self.locals.get('infos', [])
        dones = alt_dones or self.locals.get('dones', None) or self.locals.get('done', [None])
        for _, info in enumerate(infos):
            self._monitor_dict[self.num_timesteps] = {key: val for key, val in info.items()
-                                                      if key not in ['terminal_observation', 'episode']}
+                                                      if key not in ['terminal_observation', 'episode']
                                                      and not key.startswith('rec_')}
-        for env_idx, done in list(enumerate(self.locals.get('dones', []))) + \
+        for env_idx, done in enumerate(dones):
                             list(enumerate(self.locals.get('done', []))):
            if done:
                env_monitor_df = pd.DataFrame.from_dict(self._monitor_dict, orient='index')
                self._monitor_dict = dict()
--- a/environments/logging/recorder.py
+++ b/environments/logging/recorder.py
@ -0,0 +1,74 @@
 import json
 from pathlib import Path
 from typing import Union
 import pandas as pd
 from stable_baselines3.common.callbacks import BaseCallback
 from environments.factory.base.base_factory import REC_TAC
 from environments.helpers import IGNORED_DF_COLUMNS
 class RecorderCallback(BaseCallback):
    def __init__(self, filepath: Union[str, Path], occupation_map: bool = False, trajectory_map: bool = False):
        super(RecorderCallback, self).__init__()
        self.trajectory_map = trajectory_map
        self.occupation_map = occupation_map
        self.filepath = Path(filepath)
        self._recorder_dict = dict()
        self._recorder_df = pd.DataFrame()
        self.started = False
        self.closed = False
    def _on_step(self) -> bool:
        for _, info in enumerate(self.locals.get('infos', [])):
            self._recorder_dict[self.num_timesteps] = {key: val for key, val in info.items()
                                                       if not key.startswith(f'{REC_TAC}_')}
        for env_idx, done in list(enumerate(self.locals.get('dones', []))) + \
                             list(enumerate(self.locals.get('done', []))):
            if done:
                env_monitor_df = pd.DataFrame.from_dict(self._recorder_dict, orient='index')
                self._recorder_dict = dict()
                columns = [col for col in env_monitor_df.columns if col not in IGNORED_DF_COLUMNS]
                env_monitor_df = env_monitor_df.aggregate(
                    {col: 'mean' if col.endswith('ount') else 'sum' for col in columns}
                )
                env_monitor_df['episode'] = len(self._recorder_df)
                self._recorder_df = self._recorder_df.append([env_monitor_df])
            else:
                pass
        return True
    def __enter__(self):
        self._on_training_start()
    def __exit__(self, exc_type, exc_val, exc_tb):
        self._on_training_end()
    def _on_training_start(self) -> None:
        if self.started:
            pass
        else:
            self.filepath.parent.mkdir(exist_ok=True, parents=True)
            self.started = True
        pass
    def _on_training_end(self) -> None:
        if self.closed:
            pass
        else:
            # self.out_file.unlink(missing_ok=True)
            with self.filepath.open('w') as f:
                json_df = self._recorder_df.to_json(orient="table")
                parsed = json.loads(json_df)
                json.dump(parsed, f, indent=4)
            if self.occupation_map:
                print('Recorder files were dumped to disk, now plotting the occupation map...')
            if self.trajectory_map:
                print('Recorder files were dumped to disk, now plotting the occupation map...')
            self.closed = True
--- a/environments/oo_factory/_base_factory.py
+++ b/environments/oo_factory/_base_factory.py
@ -1,68 +0,0 @@
 from typing import List, Union
 import gym
 class Entities():
    def __init__(self):
        pass
 # noinspection PyAttributeOutsideInit
 class BaseFactory(gym.Env):
    def __enter__(self):
        return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()
    def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_radius: Union[None, int] = 0,
                 movement_properties: MovementProperties = MovementProperties(),
                 combin_agent_slices_in_obs: bool = False, frames_to_stack=0,
                 omit_agent_slice_in_obs=False, **kwargs):
        assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
               (not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
            'Both options are exclusive'
        assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
        self.movement_properties = movement_properties
        self.level_name = level_name
        self.n_agents = n_agents
        self.max_steps = max_steps
        self.pomdp_radius = pomdp_radius
        self.combin_agent_slices_in_obs = combin_agent_slices_in_obs
        self.omit_agent_slice_in_obs = omit_agent_slice_in_obs
        self.frames_to_stack = frames_to_stack
        self.done_at_collision = False
        self._state_slices = StateSlices()
        level_filepath = Path(__file__).parent / h.LEVELS_DIR / f'{self.level_name}.txt'
        parsed_level = h.parse_level(level_filepath)
        self._level = h.one_hot_level(parsed_level)
        parsed_doors = h.one_hot_level(parsed_level, h.DOOR)
        if parsed_doors.any():
            self._doors = parsed_doors
            level_slices = ['level', 'doors']
            can_use_doors = True
        else:
            level_slices = ['level']
            can_use_doors = False
        offset = len(level_slices)
        self._state_slices.register_additional_items([*level_slices,
                                                      *[f'agent#{i}' for i in range(offset, n_agents + offset)]])
        if 'additional_slices' in kwargs:
            self._state_slices.register_additional_items(kwargs.get('additional_slices'))
        self._zones = Zones(parsed_level)
        self._actions = Actions(self.movement_properties, can_use_doors=can_use_doors)
        self._actions.register_additional_items(self.additional_actions)
        self.reset()
    def step(self, actions: Union[int, List[int]]):
        actions = actions if isinstance(actions, list) else [actions]
        self.entities.step()
--- a/environments/utility_classes.py
+++ b/environments/utility_classes.py
@ -1,298 +1,7 @@
-from typing import Union, List, NamedTuple, Tuple
+from typing import NamedTuple
 import numpy as np
 from environments import helpers as h
 IS_CLOSED = 'CLOSED'
 IS_OPEN = 'OPEN'
 class MovementProperties(NamedTuple):
    allow_square_movement: bool = True
    allow_diagonal_movement: bool = False
    allow_no_op: bool = False
 # Preperations for Entities (not used yet)
 class Entity:
    @property
    def pos(self):
        return self._pos
    @property
    def identifier(self):
        return self._identifier
    def __init__(self, identifier, pos):
        self._pos = pos
        self._identifier = identifier
 class Door(Entity):
    @property
    def is_closed(self):
        return self._state == IS_CLOSED
    @property
    def is_open(self):
        return self._state == IS_OPEN
    @property
    def status(self):
        return self._state
    def __init__(self, *args, closed_on_init=True, **kwargs):
        super(Door, self).__init__(*args, **kwargs)
        self._state = IS_CLOSED if closed_on_init else IS_OPEN
    def use(self):
        self._state: str = IS_CLOSED if self._state == IS_OPEN else IS_OPEN
    pass
 class Agent(Entity):
    @property
    def direction_of_vision(self):
        return self._direction_of_vision
    def __init__(self, *args, **kwargs):
        super(Agent, self).__init__(*args, **kwargs)
        self._direction_of_vision = (None, None)
    def move(self, new_pos: Tuple[int, int]):
        x_old, y_old = self.pos
        self._pos = new_pos
        x_new, y_new = new_pos
        self._direction_of_vision = (x_old-x_new, y_old-y_new)
        return self.pos
 class AgentState:
    @property
    def collisions(self):
        return np.argwhere(self.collision_vector != 0).flatten()
    @property
    def direction_of_view(self):
        last_x, last_y = self._last_pos
        curr_x, curr_y = self.pos
        return last_x-curr_x, last_y-curr_y
    def __init__(self, i: int, action: int, pos=None):
        self.i = i
        self.action = action
        self.collision_vector = None
        self.action_valid = None
        self.pos = pos
        self._last_pos = (-1, -1)
    def update(self, **kwargs):                             # is this hacky?? o.0
        last_pos = self.pos
        for key, value in kwargs.items():
            if hasattr(self, key):
                self.__setattr__(key, value)
            else:
                raise AttributeError(f'"{key}" cannot be updated, this attr is not a part of {self.__name__}')
        if self.action_valid and last_pos != self.pos:
            self._last_pos = last_pos
    def reset(self):
        self.__init__(self.i, self.action)
 class DoorState:
    def __init__(self, i: int, pos: Tuple[int, int], closed_on_init=True, auto_close_interval=10):
        self.i = i
        self.pos = pos
        self._state = self._state = IS_CLOSED if closed_on_init else IS_OPEN
        self.auto_close_interval = auto_close_interval
        self.time_to_close = -1
    @property
    def is_closed(self):
        return self._state == IS_CLOSED
    @property
    def is_open(self):
        return self._state == IS_OPEN
    @property
    def status(self):
        return self._state
    def use(self):
        if self._state == IS_OPEN:
            self._state = IS_CLOSED
        else:
            self._state = IS_OPEN
            self.time_to_close = self.auto_close_interval
 class Register:
    @property
    def n(self):
        return len(self)
    def __init__(self):
        self._register = dict()
    def __len__(self):
        return len(self._register)
    def __add__(self, other: str):
        assert isinstance(other, str), f'All item names have to be of type {str}'
        self._register.update({len(self._register): other})
        return self
    def register_additional_items(self, others: List[str]):
        for other in others:
            self + other
        return self
    def keys(self):
        return self._register.keys()
    def values(self):
        return self._register.values()
    def items(self):
        return self._register.items()
    def __getitem__(self, item):
        try:
            return self._register[item]
        except KeyError:
            print('NO')
            raise
    def by_name(self, item):
        return list(self._register.keys())[list(self._register.values()).index(item)]
    def __repr__(self):
        return f'{self.__class__.__name__}({self._register})'
 class Agents(Register):
    def __init__(self, n_agents):
        super(Agents, self).__init__()
        self.register_additional_items([f'agent#{i}' for i in range(n_agents)])
        self._agents = [Agent(x, (-1, -1)) for x in self.keys()]
        pass
    def __getitem__(self, item):
        return self._agents[item]
    def get_name(self, item):
        return self._register[item]
    def by_name(self, item):
        return self[super(Agents, self).by_name(item)]
    def __add__(self, other):
        super(Agents, self).__add__(other)
        self._agents.append(Agent(len(self)+1, (-1, -1)))
 class Actions(Register):
    @property
    def movement_actions(self):
        return self._movement_actions
    def __init__(self, movement_properties: MovementProperties, can_use_doors=False):
        self.allow_no_op = movement_properties.allow_no_op
        self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
        self.allow_square_movement = movement_properties.allow_square_movement
        self.can_use_doors = can_use_doors
        super(Actions, self).__init__()
        if self.allow_square_movement:
            self.register_additional_items(['north', 'east', 'south', 'west'])
        if self.allow_diagonal_movement:
            self.register_additional_items(['north_east', 'south_east', 'south_west', 'north_west'])
        self._movement_actions = self._register.copy()
        if self.can_use_doors:
            self.register_additional_items(['use_door'])
        if self.allow_no_op:
            self.register_additional_items(['no-op'])
    def is_moving_action(self, action: Union[str, int]):
        if isinstance(action, str):
            return action in self.movement_actions.values()
        else:
            return self[action] in self.movement_actions.values()
    def is_no_op(self, action: Union[str, int]):
        if isinstance(action, str):
            action = self.by_name(action)
        return self[action] == 'no-op'
    def is_door_usage(self, action: Union[str, int]):
        if isinstance(action, str):
            action = self.by_name(action)
        return self[action] == 'use_door'
 class StateSlices(Register):
    @property
    def AGENTSTARTIDX(self):
        if self._agent_start_idx:
            return self._agent_start_idx
        else:
            self._agent_start_idx = min([idx for idx, x in self.items() if h.AGENT in x])
            return self._agent_start_idx
    def __init__(self):
        super(StateSlices, self).__init__()
        self._agent_start_idx = None
 class Zones(Register):
    @property
    def danger_zone(self):
        return self._zone_slices[self.by_name(h.DANGER_ZONE)]
    @property
    def accounting_zones(self):
        return [self[idx] for idx, name in self.items() if name != h.DANGER_ZONE]
    def __init__(self, parsed_level):
        super(Zones, self).__init__()
        slices = list()
        self._accounting_zones = list()
        self._danger_zones = list()
        for symbol in np.unique(parsed_level):
            if symbol == h.WALL:
                continue
            elif symbol == h.DANGER_ZONE:
                self + symbol
                slices.append(h.one_hot_level(parsed_level, symbol))
                self._danger_zones.append(symbol)
            else:
                self + symbol
                slices.append(h.one_hot_level(parsed_level, symbol))
                self._accounting_zones.append(symbol)
        self._zone_slices = np.stack(slices)
    def __getitem__(self, item):
        return self._zone_slices[item]
    def get_name(self, item):
        return self._register[item]
    def by_name(self, item):
        return self[super(Zones, self).by_name(item)]
    def register_additional_items(self, other: Union[str, List[str]]):
        raise AttributeError('You are not allowed to add additional Zones in runtime.')
--- a/main.py
+++ b/main.py
@ -9,11 +9,12 @@ import pandas as pd
 from stable_baselines3.common.callbacks import CallbackList
 from environments.factory.base_factory import MovementProperties
 from environments.factory.simple_factory import DirtProperties, SimpleFactory
 from environments.helpers import IGNORED_DF_COLUMNS
 from environments.logging.monitor import MonitorCallback
 from environments.logging.plotting import prepare_plot
 from environments.logging.recorder import RecorderCallback
 from environments.utility_classes import MovementProperties
 warnings.filterwarnings('ignore', category=FutureWarning)
 warnings.filterwarnings('ignore', category=UserWarning)
@ -91,8 +92,8 @@ if __name__ == '__main__':
    from algorithms.reg_dqn import RegDQN
    # from sb3_contrib import QRDQN
-    dirt_props = DirtProperties(clean_amount=3, gain_amount=0.2, max_global_amount=30,
+    dirt_props = DirtProperties(clean_amount=3, gain_amount=1, max_global_amount=30,
-                                max_local_amount=5, spawn_frequency=1, max_spawn_ratio=0.05)
+                                max_local_amount=5, spawn_frequency=3, max_spawn_ratio=0.05)
    move_props = MovementProperties(allow_diagonal_movement=True,
                                    allow_square_movement=True,
                                    allow_no_op=False)
@ -103,9 +104,10 @@ if __name__ == '__main__':
    for modeL_type in [A2C, PPO, RegDQN, DQN]:  # , QRDQN]:
        for seed in range(3):
-            with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400,
+            with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=False,
                               movement_properties=move_props, level_name='rooms', frames_to_stack=4,
-                               omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True) as env:
+                               omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True, record_episodes=False
                               ) as env:
                if modeL_type.__name__ in ["PPO", "A2C"]:
                    kwargs = dict(ent_coef=0.01)
@ -127,10 +129,13 @@ if __name__ == '__main__':
                out_path /= identifier
                callbacks = CallbackList(
-                    [MonitorCallback(filepath=out_path / f'monitor_{identifier}.pick', plotting=False)]
+                    [MonitorCallback(filepath=out_path / f'monitor_{identifier}.pick', plotting=False),
                     RecorderCallback(filepath=out_path / f'recorder_{identifier}.json', occupation_map=False,
                                      trajectory_map=False
                                      )]
                )
-                model.learn(total_timesteps=int(1e5), callback=callbacks)
+                model.learn(total_timesteps=int(5e5), callback=callbacks)
                save_path = out_path / f'model_{identifier}.zip'
                save_path.parent.mkdir(parents=True, exist_ok=True)
--- a/reload_agent.py
+++ b/reload_agent.py
@ -14,7 +14,7 @@ warnings.filterwarnings('ignore', category=UserWarning)
 if __name__ == '__main__':
-    model_name = 'A2C_1623923982'
+    model_name = 'PPO_1626075586'
    run_id = 0
    out_path = Path(__file__).parent / 'debug_out'
    model_path = out_path / model_name