Rework of Observations and Entity Differentiation, lazy obs build by notification

2025-12-26 08:26:07 +01:00 · 2021-12-22 10:48:36 +01:00
parent 7f7a3d9a3b
commit b43f595207
14 changed files with 961 additions and 487 deletions
--- a/environments/factory/base/base_factory.py
+++ b/environments/factory/base/base_factory.py
@@ -1,7 +1,9 @@
 import abc
 import enum
 import time
 from collections import defaultdict
 from enum import Enum
 from itertools import chain
 from pathlib import Path
 from typing import List, Union, Iterable, Dict
 import numpy as np
@@ -14,7 +16,8 @@ from environments.factory.base.shadow_casting import Map
 from environments.helpers import Constants as c, Constants
 from environments import helpers as h
 from environments.factory.base.objects import Agent, Tile, Action
-from environments.factory.base.registers import Actions, Entities, Agents, Doors, FloorTiles, WallTiles, PlaceHolders
+from environments.factory.base.registers import Actions, Entities, Agents, Doors, FloorTiles, WallTiles, PlaceHolders, \
    GlobalPositions
 from environments.utility_classes import MovementProperties, ObservationProperties, MarlFrameStack
 from environments.utility_classes import AgentRenderOptions as a_obs
@@ -30,17 +33,31 @@ class BaseFactory(gym.Env):
    def action_space(self):
        return spaces.Discrete(len(self._actions))
    @property
    def named_action_space(self):
        return {x.identifier.value: idx for idx, x in enumerate(self._actions.values())}
    @property
    def observation_space(self):
-        if r := self._pomdp_r:
+        obs, _ = self._build_observations()
-            z = self._obs_cube.shape[0]
+        if self.n_agents > 1:
-            xy = r*2 + 1
+            shape = obs[0].shape
            level_shape = (z, xy, xy)
        else:
-            level_shape = self._obs_cube.shape
+            shape = obs.shape
-        space = spaces.Box(low=0, high=1, shape=level_shape, dtype=np.float32)
+        space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
        return space
    @property
    def named_observation_space(self):
        # Build it
        _, named_obs = self._build_observations()
        if self.n_agents > 1:
            # Only return the first named obs space, as their structure at the moment is same.
            return [{key.name: val for key, val in named_ob.items()} for named_ob in named_obs.values()][0]
        else:
            return {key.name: val for key, val in named_obs.items()}
    @property
    def pomdp_diameter(self):
        return self._pomdp_r * 2 + 1
@@ -86,11 +103,14 @@ class BaseFactory(gym.Env):
        self.obs_prop = obs_prop
        self.level_name = level_name
        self._level_shape = None
        self._obs_shape = None
        self.verbose = verbose
        self._renderer = None  # expensive - don't use it when not required !
        self._entities = Entities()
        self.n_agents = n_agents
        level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt'
        self._parsed_level = h.parse_level(level_filepath)
        self.max_steps = max_steps
        self._pomdp_r = self.obs_prop.pomdp_r
@@ -114,10 +134,12 @@ class BaseFactory(gym.Env):
        # Objects
        self._entities = Entities()
        # Level
-        level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt'
+
-        parsed_level = h.parse_level(level_filepath)
+        level_array = h.one_hot_level(self._parsed_level)
-        level_array = h.one_hot_level(parsed_level)
+        level_array = np.pad(level_array, self.obs_prop.pomdp_r, 'constant', constant_values=1)
        self._level_shape = level_array.shape
        self._obs_shape = self._level_shape if not self.obs_prop.pomdp_r else (self.pomdp_diameter, ) * 2
        # Walls
        walls = WallTiles.from_argwhere_coordinates(
@@ -134,13 +156,14 @@ class BaseFactory(gym.Env):
        self._entities.register_additional_items({c.FLOOR: floor})
        # NOPOS
-        self._NO_POS_TILE = Tile(c.NO_POS.value)
+        self._NO_POS_TILE = Tile(c.NO_POS.value, None)
        # Doors
        if self.parse_doors:
-            parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
+            parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR)
            parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
            if np.any(parsed_doors):
-                door_tiles = [floor.by_pos(pos) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL.value)]
+                door_tiles = [floor.by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL.value)]
                doors = Doors.from_tiles(door_tiles, self._level_shape,
                                         entity_kwargs=dict(context=floor)
                                         )
@@ -153,12 +176,11 @@ class BaseFactory(gym.Env):
        # Agents
        agents_to_spawn = self.n_agents-len(self._injected_agents)
-        agents_kwargs = dict(level_shape=self._level_shape,
+        agents_kwargs = dict(individual_slices=self.obs_prop.render_agents == a_obs.SEPERATE,
-                             individual_slices=self.obs_prop.render_agents == a_obs.SEPERATE,
+                             hide_from_obs_builder=self.obs_prop.render_agents in [a_obs.NOT, a_obs.LEVEL],
-                             hide_from_obs_builder=self.obs_prop.render_agents == a_obs.LEVEL,
+                             )
                             is_observable=self.obs_prop.render_agents != a_obs.NOT)
        if agents_to_spawn:
-            agents = Agents.from_tiles(floor.empty_tiles[:agents_to_spawn], **agents_kwargs)
+            agents = Agents.from_tiles(floor.empty_tiles[:agents_to_spawn], self._level_shape, **agents_kwargs)
        else:
            agents = Agents(**agents_kwargs)
        if self._injected_agents:
@@ -173,10 +195,10 @@ class BaseFactory(gym.Env):
            # TODO: Make this accept Lists for multiple placeholders
            # Empty Observations with either [0, 1, N(0, 1)]
-            placeholder = PlaceHolders.from_tiles([self._NO_POS_TILE], self._level_shape,
+            placeholder = PlaceHolders.from_values(self.obs_prop.additional_agent_placeholder, self._level_shape,
-                                                  entity_kwargs=dict(
+                                                   entity_kwargs=dict(
-                                                      fill_value=self.obs_prop.additional_agent_placeholder)
+                                                       fill_value=self.obs_prop.additional_agent_placeholder)
-                                                  )
+                                                   )
            self._entities.register_additional_items({c.AGENT_PLACEHOLDER: placeholder})
@@ -184,24 +206,22 @@ class BaseFactory(gym.Env):
        if additional_entities := self.additional_entities:
            self._entities.register_additional_items(additional_entities)
        if self.obs_prop.show_global_position_info:
            global_positions = GlobalPositions(self._level_shape)
            obs_shape_2d = self._level_shape if not self._pomdp_r else ((self.pomdp_diameter,) * 2)
            global_positions.spawn_GlobalPositionObjects(obs_shape_2d, self[c.AGENT])
            self._entities.register_additional_items({c.GLOBAL_POSITION: global_positions})
        # Return
        return self._entities
-    def _init_obs_cube(self):
+    def reset(self) -> (np.typing.ArrayLike, int, bool, dict):
        arrays = self._entities.obs_arrays
        obs_cube_z = sum([a.shape[0] if not self[key].is_per_agent else 1 for key, a in arrays.items()])
        obs_cube_z += 1 if self.obs_prop.show_global_position_info else 0
        self._obs_cube = np.zeros((obs_cube_z, *self._level_shape), dtype=np.float32)
    def reset(self) -> (np.ndarray, int, bool, dict):
        _ = self._base_init_env()
        self._init_obs_cube()
        self.do_additional_reset()
        self._steps = 0
-        obs = self._get_observations()
+        obs, _ = self._build_observations()
        return obs
    def step(self, actions):
@@ -264,7 +284,7 @@ class BaseFactory(gym.Env):
        # Post step Hook for later use
        info.update(self.hook_post_step())
-        obs = self._get_observations()
+        obs, _ = self._build_observations()
        return obs, reward, done, info
@@ -284,141 +304,120 @@ class BaseFactory(gym.Env):
        else:
            return c.NOT_VALID
-    def _get_observations(self) -> np.ndarray:
+    def _build_observations(self) -> np.typing.ArrayLike:
-        state_array_dict = self._entities.obs_arrays
+        # Observation dict:
-        if self.n_agents == 1:
+        per_agent_expl_idx = dict()
-            obs = self._build_per_agent_obs(self[c.AGENT][0], state_array_dict)
+        per_agent_obsn = dict()
-        elif self.n_agents >= 2:
+        # Generel Observations
-            obs = np.stack([self._build_per_agent_obs(agent, state_array_dict) for agent in self[c.AGENT]])
+        lvl_obs = self[c.WALLS].as_array()
-        else:
+        door_obs = self[c.DOORS].as_array()
-            raise ValueError('n_agents cannot be smaller than 1!!')
+        agent_obs = self[c.AGENT].as_array() if self.obs_prop.render_agents != a_obs.NOT else None
-        return obs
+        placeholder_obs = self[c.AGENT_PLACEHOLDER].as_array() if self[c.AGENT_PLACEHOLDER] else None
        add_obs_dict = self._additional_observations()
-    def _build_per_agent_obs(self, agent: Agent, state_array_dict) -> np.ndarray:
+        for agent_idx, agent in enumerate(self[c.AGENT]):
-        agent_pos_is_omitted = False
+            obs_dict = dict()
-        agent_omit_idx = None
+            # Build Agent Observations
-
+            if self.obs_prop.render_agents != a_obs.NOT:
-        if self.obs_prop.omit_agent_self and self.n_agents == 1:
+                if self.obs_prop.omit_agent_self:
-            pass
+                    if self.obs_prop.render_agents == a_obs.SEPERATE:
-        elif self.obs_prop.omit_agent_self and self.obs_prop.render_agents in [a_obs.COMBINED, ] and self.n_agents > 1:
+                        agent_obs = np.take(agent_obs, [x for x in range(self.n_agents) if x != agent_idx], axis=0)
            state_array_dict[c.AGENT][0, agent.x, agent.y] -= agent.encoding
            agent_pos_is_omitted = True
        elif self.obs_prop.omit_agent_self and self.obs_prop.render_agents == a_obs.SEPERATE and self.n_agents > 1:
            agent_omit_idx = next((i for i, a in enumerate(self[c.AGENT]) if a == agent))
        running_idx, shadowing_idxs, can_be_shadowed_idxs = 0, [], []
        self._obs_cube[:] = 0
        # FIXME: Refactor this! Make a globally build observation, then add individual per-agent-obs
        for key, array in state_array_dict.items():
            # Flush state array object representation to obs cube
            if not self[key].hide_from_obs_builder:
                if self[key].is_per_agent:
                    per_agent_idx = self[key].idx_by_entity(agent)
                    z = 1
                    self._obs_cube[running_idx: running_idx+z] = array[per_agent_idx]
                else:
                    if key == c.AGENT and agent_omit_idx is not None:
                        z = array.shape[0] - 1
                        for array_idx in range(array.shape[0]):
                            self._obs_cube[running_idx: running_idx+z] = array[[x for x in range(array.shape[0])
                                                                                if x != agent_omit_idx]]
                    # Agent OBS are combined
                    elif key == c.AGENT and self.obs_prop.omit_agent_self \
                            and self.obs_prop.render_agents == a_obs.COMBINED:
                        z = 1
                        self._obs_cube[running_idx: running_idx + z] = array
                    # Each Agent is rendered on a seperate array slice
                    else:
-                        z = array.shape[0]
+                        agent_obs = agent_obs.copy()
-                        self._obs_cube[running_idx: running_idx + z] = array
+                        agent_obs[(0, *agent.pos)] -= agent.encoding
                # Define which OBS SLices cast a Shadow
                if self[key].is_blocking_light:
                    for i in range(z):
                        shadowing_idxs.append(running_idx + i)
                # Define which OBS SLices are effected by shadows
                if self[key].can_be_shadowed:
                    for i in range(z):
                        can_be_shadowed_idxs.append(running_idx + i)
                running_idx += z
-        if agent_pos_is_omitted:
+            # Build Level Observations
-            state_array_dict[c.AGENT][0, agent.x, agent.y] += agent.encoding
+            if self.obs_prop.render_agents == a_obs.LEVEL:
-
+                lvl_obs = lvl_obs.copy()
-        if self._pomdp_r:
+                lvl_obs += agent_obs
            obs = self._do_pomdp_obs_cutout(agent, self._obs_cube)
        else:
            obs = self._obs_cube
        obs = obs.copy()
        if self.obs_prop.cast_shadows:
            obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx in shadowing_idxs]
            door_shadowing = False
            if self.parse_doors:
                if doors := self[c.DOORS]:
                    if door := doors.by_pos(agent.pos):
                        if door.is_closed:
                            for group in door.connectivity_subgroups:
                                if agent.last_pos not in group:
                                    door_shadowing = True
                                    if self._pomdp_r:
                                        blocking = [tuple(np.subtract(x, agent.pos) + (self._pomdp_r, self._pomdp_r))
                                                    for x in group]
                                        xs, ys = zip(*blocking)
                                    else:
                                        xs, ys = zip(*group)
                                    # noinspection PyUnresolvedReferences
                                    obs_block_light[0][xs, ys] = False
            light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int))
            if self._pomdp_r:
                light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape))
            else:
                light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
            if door_shadowing:
                # noinspection PyUnboundLocalVariable
                light_block_map[xs, ys] = 0
            agent.temp_light_map = light_block_map
            for obs_idx in can_be_shadowed_idxs:
                obs[obs_idx] = ((obs[obs_idx] * light_block_map) + 0.) - (1 - light_block_map)  # * obs[0])
        else:
            pass
        # Agents observe other agents as wall
        if self.obs_prop.render_agents == a_obs.LEVEL and self.n_agents > 1:
            other_agent_obs = self[c.AGENT].as_array()
            if self.obs_prop.omit_agent_self:
                other_agent_obs[:, agent.x, agent.y] -= agent.encoding
            obs_dict[c.WALLS] = lvl_obs
            if self.obs_prop.render_agents in [a_obs.SEPERATE, a_obs.COMBINED]:
                obs_dict[c.AGENT] = agent_obs
            if self[c.AGENT_PLACEHOLDER]:
                obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs
            obs_dict[c.DOORS] = door_obs
            obs_dict.update(add_obs_dict)
            observations = np.vstack(list(obs_dict.values()))
            if self.obs_prop.pomdp_r:
-                oobs = self._do_pomdp_obs_cutout(agent, other_agent_obs)[0]
+                observations = self._do_pomdp_cutout(agent, observations)
                # noinspection PyUnresolvedReferences
                mask = (oobs != c.SHADOWED_CELL.value).astype(int)
                obs[0] += oobs * mask
            raw_obs = self._additional_raw_observations(agent)
            observations = np.vstack((observations, *list(raw_obs.values())))
            keys = list(chain(obs_dict.keys(), raw_obs.keys()))
            idxs = np.cumsum([x.shape[0] for x in chain(obs_dict.values(), raw_obs.values())]) - 1
            per_agent_expl_idx[agent.name] = {key: list(range(a, b)) for key, a, b in
                                              zip(keys, idxs, list(idxs[1:]) + [idxs[-1]+1, ])}
            # Shadow Casting
            try:
                light_block_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
                                   if self[key].is_blocking_light]
                # Flatten
                light_block_obs = [x for y in light_block_obs for x in y]
                shadowed_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
                                if self[key].can_be_shadowed]
                # Flatten
                shadowed_obs = [x for y in shadowed_obs for x in y]
            except AttributeError as e:
                print('Check your Keys! Only use Constants as Keys!')
                print(e)
                raise e
            if self.obs_prop.cast_shadows:
                obs_block_light = observations[light_block_obs] != c.OCCUPIED_CELL.value
                door_shadowing = False
                if self.parse_doors:
                    if doors := self[c.DOORS]:
                        if door := doors.by_pos(agent.pos):
                            if door.is_closed:
                                for group in door.connectivity_subgroups:
                                    if agent.last_pos not in group:
                                        door_shadowing = True
                                        if self._pomdp_r:
                                            blocking = [
                                                tuple(np.subtract(x, agent.pos) + (self._pomdp_r, self._pomdp_r))
                                                for x in group]
                                            xs, ys = zip(*blocking)
                                        else:
                                            xs, ys = zip(*group)
                                        # noinspection PyUnresolvedReferences
                                        obs_block_light[:, xs, ys] = False
                light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int).squeeze())
                if self._pomdp_r:
                    light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape))
                else:
                    light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
                if door_shadowing:
                    # noinspection PyUnboundLocalVariable
                    light_block_map[xs, ys] = 0
                agent.temp_light_map = light_block_map.copy()
                observations[shadowed_obs] = ((observations[shadowed_obs] * light_block_map) + 0.) - (1 - light_block_map)
            else:
-                obs[0] += other_agent_obs
+                pass
-        # Additional Observation:
+            per_agent_obsn[agent.name] = observations
        for additional_obs in self.additional_obs_build():
            obs[running_idx:running_idx+additional_obs.shape[0]] = additional_obs
            running_idx += additional_obs.shape[0]
        for additional_per_agent_obs in self.additional_per_agent_obs_build(agent):
            obs[running_idx:running_idx + additional_per_agent_obs.shape[0]] = additional_per_agent_obs
            running_idx += additional_per_agent_obs.shape[0]
-        return obs
+        if self.n_agents == 1:
            agent_name = self[c.AGENT][0].name
            obs, explained_idx = per_agent_obsn[agent_name], per_agent_expl_idx[agent_name]
        elif self.n_agents >= 2:
            obs, explained_idx = np.stack(list(per_agent_obsn.values())), per_agent_expl_idx
        else:
            raise ValueError
-    def _do_pomdp_obs_cutout(self, agent, obs_to_be_padded):
+        return obs, explained_idx
    def _do_pomdp_cutout(self, agent, obs_to_be_padded):
        assert obs_to_be_padded.ndim == 3
        r, d = self._pomdp_r, self.pomdp_diameter
        x0, x1 = max(0, agent.x - r), min(agent.x + r + 1, self._level_shape[0])
        y0, y1 = max(0, agent.y - r), min(agent.y + r + 1, self._level_shape[1])
        # Other Agent Obs = oobs
        oobs = obs_to_be_padded[:, x0:x1, y0:y1]
-        if oobs.shape[0:] != (d, d):
+        if oobs.shape[1:] != (d, d):
            if xd := oobs.shape[1] % d:
                if agent.x > r:
                    x0_pad = 0
@@ -478,7 +477,7 @@ class BaseFactory(gym.Env):
            if doors := self[c.DOORS]:
                if self.doors_have_area:
                    if door := doors.by_pos(new_tile.pos):
-                        if door.can_collide:
+                        if door.is_open:
                            return agent.tile, c.NOT_VALID
                        else:  # door.is_closed:
                            pass
@@ -569,7 +568,7 @@ class BaseFactory(gym.Env):
        if not self._renderer:  # lazy init
            from environments.factory.base.renderer import Renderer, RenderEntity
            global Renderer, RenderEntity
-            height, width = self._obs_cube.shape[1:]
+            height, width = self._level_shape
            self._renderer = Renderer(width, height, view_radius=self._pomdp_r, fps=5)
        # noinspection PyUnboundLocalVariable
@@ -636,20 +635,6 @@ class BaseFactory(gym.Env):
    # Functions which provide additions to functions of the base class
    #  Always call super!!!!!!
    @abc.abstractmethod
    def additional_obs_build(self) -> List[np.ndarray]:
        return []
    def additional_per_agent_obs_build(self, agent) -> List[np.ndarray]:
        additional_per_agent_obs = []
        if self.obs_prop.show_global_position_info:
            pos_array = np.zeros(self.observation_space.shape[1:])
            for xy in range(1):
                pos_array[0, xy] = agent.pos[xy] / self._level_shape[xy]
            additional_per_agent_obs.append(pos_array)
        return additional_per_agent_obs
    @abc.abstractmethod
    def do_additional_reset(self) -> None:
        pass
@@ -666,6 +651,17 @@ class BaseFactory(gym.Env):
    def check_additional_done(self) -> bool:
        return False
    @abc.abstractmethod
    def _additional_observations(self) -> Dict[Constants, np.typing.ArrayLike]:
        return {}
    @abc.abstractmethod
    def _additional_raw_observations(self, agent) -> Dict[Constants, np.typing.ArrayLike]:
        additional_raw_observations = {}
        if self.obs_prop.show_global_position_info:
            additional_raw_observations.update({c.GLOBAL_POSITION: self[c.GLOBAL_POSITION].by_entity(agent).as_array()})
        return additional_raw_observations
    @abc.abstractmethod
    def calculate_additional_reward(self, agent: Agent) -> (int, dict):
        return 0, {}
--- a/environments/factory/base/objects.py
+++ b/environments/factory/base/objects.py
@@ -3,22 +3,26 @@ from enum import Enum
 from typing import Union
 import networkx as nx
 import numpy as np
 from environments import helpers as h
 from environments.helpers import Constants as c
 import itertools
 ##########################################################################
 # ##################### Base Object Definition ######################### #
 ##########################################################################
 class Object:
    """Generell Objects for Organisation and Maintanance such as Actions etc..."""
    _u_idx = defaultdict(lambda: 0)
    def __bool__(self):
        return True
    @property
    def is_blocking_light(self):
        return self._is_blocking_light
    @property
    def name(self):
        return self._name
@@ -43,7 +47,7 @@ class Object:
        elif self._str_ident is not None and self._enum_ident is None:
            self._name = f'{self.__class__.__name__}[{self._str_ident}]'
        elif self._str_ident is None and self._enum_ident is None:
-            self._name = f'{self.__class__.__name__}#{self._u_idx[self.__class__.__name__]}'
+            self._name = f'{self.__class__.__name__}#{Object._u_idx[self.__class__.__name__]}'
            Object._u_idx[self.__class__.__name__] += 1
        else:
            raise ValueError('Please use either of the idents.')
@@ -68,16 +72,56 @@ class Object:
            return other.name == self.name
-class Entity(Object):
+class EnvObject(Object):
-    @property
+    """Objects that hold Information that are observable, but have no position on the env grid. Inventories etc..."""
-    def can_collide(self):
+
-        return True
+    _u_idx = defaultdict(lambda: 0)
    @property
    def encoding(self):
        return c.OCCUPIED_CELL.value
    def __init__(self, register, **kwargs):
        super(EnvObject, self).__init__(**kwargs)
        self._register = register
 class BoundingMixin:
    @property
    def bound_entity(self):
        return self._bound_entity
    def __init__(self, entity_to_be_bound, *args, **kwargs):
        super().__init__(*args, **kwargs)
        assert entity_to_be_bound is not None
        self._bound_entity = entity_to_be_bound
    def __repr__(self):
        s = super(BoundingMixin, self).__repr__()
        i = s[:s.find('(')]
        return f'{s[:i]}[{self.bound_entity.name}]{s[i:]}'
    @property
    def name(self):
        return f'{super(BoundingMixin, self).name}({self._bound_entity.name})'
    def belongs_to_entity(self, entity):
        return entity == self.bound_entity
 class Entity(EnvObject):
    """Full Env Entity that lives on the env Grid. Doors, Items, Dirt etc..."""
    @property
    def is_blocking_light(self):
        return self._is_blocking_light
    @property
    def can_collide(self):
        return True
    @property
    def x(self):
        return self.pos[0]
@@ -94,9 +138,10 @@ class Entity(Object):
    def tile(self):
        return self._tile
-    def __init__(self, tile, **kwargs):
+    def __init__(self, tile, *args, is_blocking_light=True,  **kwargs):
-        super().__init__(**kwargs)
+        super().__init__(*args, **kwargs)
        self._tile = tile
        self._is_blocking_light = is_blocking_light
        tile.enter(self)
    def summarize_state(self, **_) -> dict:
@@ -104,7 +149,7 @@ class Entity(Object):
                    tile=str(self.tile.name), can_collide=bool(self.can_collide))
    def __repr__(self):
-        return f'{self.name}(@{self.pos})'
+        return super(Entity, self).__repr__() + f'(@{self.pos})'
 class MoveableEntity(Entity):
@@ -118,7 +163,7 @@ class MoveableEntity(Entity):
        if self._last_tile:
            return self._last_tile.pos
        else:
-            return c.NO_POS
+            return c.NO_POS.value
    @property
    def direction_of_view(self):
@@ -137,45 +182,66 @@ class MoveableEntity(Entity):
            curr_tile.leave(self)
            self._tile = next_tile
            self._last_tile = curr_tile
            self._register.notify_change_to_value(self)
            return True
        else:
            return False
 ##########################################################################
 # ####################### Objects and Entitys ########################## #
 ##########################################################################
 class Action(Object):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
-class PlaceHolder(MoveableEntity):
+class PlaceHolder(Object):
    def __init__(self, *args, fill_value=0, **kwargs):
        super().__init__(*args, **kwargs)
        self._fill_value = fill_value
    @property
    def last_tile(self):
        return self.tile
    @property
    def direction_of_view(self):
        return self.pos
    @property
    def can_collide(self):
        return False
    @property
    def encoding(self):
-        return c.NO_POS.value[0]
+        return self._fill_value
    @property
    def name(self):
        return "PlaceHolder"
-class Tile(Object):
+class GlobalPosition(EnvObject):
    def belongs_to_entity(self, entity):
        return self._agent == entity
    def __init__(self, level_shape, obs_shape, agent, normalized: bool = True):
        super(GlobalPosition, self).__init__(self)
        self._obs_shape = (1, *obs_shape) if len(obs_shape) == 2 else obs_shape
        self._agent = agent
        self._level_shape = level_shape
        self._normalized = normalized
    def as_array(self):
        pos_array = np.zeros(self._obs_shape)
        for xy in range(1):
            pos_array[0, 0, xy] = self._agent.pos[xy] / self._level_shape[xy]
        return pos_array
 class Tile(EnvObject):
    @property
    def encoding(self):
        return c.FREE_CELL.value
    @property
    def guests_that_can_collide(self):
@@ -197,8 +263,8 @@ class Tile(Object):
    def pos(self):
        return self._pos
-    def __init__(self, pos, **kwargs):
+    def __init__(self, pos, *args, **kwargs):
-        super(Tile, self).__init__(**kwargs)
+        super(Tile, self).__init__(*args, **kwargs)
        self._guests = dict()
        self._pos = tuple(pos)
@@ -233,6 +299,11 @@ class Tile(Object):
 class Wall(Tile):
    @property
    def encoding(self):
        return c.OCCUPIED_CELL.value
    pass
@@ -247,7 +318,8 @@ class Door(Entity):
    @property
    def encoding(self):
-        return 1 if self.is_closed else 2
+        # This is important as it shadow is checked by occupation value
        return c.OCCUPIED_CELL.value if self.is_closed else 2
    @property
    def str_state(self):
@@ -307,11 +379,13 @@ class Door(Entity):
    def _open(self):
        self.connectivity.add_edges_from([(self.pos, x) for x in range(len(self.connectivity_subgroups))])
        self._state = c.OPEN_DOOR
        self._register.notify_change_to_value(self)
        self.time_to_close = self.auto_close_interval
    def _close(self):
        self.connectivity.remove_node(self.pos)
        self._state = c.CLOSED_DOOR
        self._register.notify_change_to_value(self)
    def is_linked(self, old_pos, new_pos):
        try:
--- a/environments/factory/base/registers.py
+++ b/environments/factory/base/registers.py
@@ -1,18 +1,23 @@
 import numbers
 import random
 from abc import ABC
-from typing import List, Union, Dict
+from typing import List, Union, Dict, Tuple
 import numpy as np
-from environments.factory.base.objects import Entity, Tile, Agent, Door, Action, Wall, Object, PlaceHolder
+from environments.factory.base.objects import Entity, Tile, Agent, Door, Action, Wall, PlaceHolder, GlobalPosition, \
    Object, EnvObject
 from environments.utility_classes import MovementProperties
 from environments import helpers as h
 from environments.helpers import Constants as c
 ##########################################################################
 # ##################### Base Register Definition ####################### #
 ##########################################################################
-class Register:
+
-    _accepted_objects = Entity
+class ObjectRegister:
    _accepted_objects = Object
    @property
    def name(self):
@@ -48,6 +53,12 @@ class Register:
    def items(self):
        return self._register.items()
    def _get_index(self, item):
        try:
            return next(i for i, v in enumerate(self._register.values()) if v == item)
        except (StopIteration, AssertionError):
            return None
    def __getitem__(self, item):
        if isinstance(item, (int, np.int64, np.int32)):
            if item < 0:
@@ -65,39 +76,66 @@ class Register:
        return f'{self.__class__.__name__}({self._register})'
-class ObjectRegister(Register):
+class EnvObjectRegister(ObjectRegister):
-    hide_from_obs_builder = False
+    _accepted_objects = EnvObject
-    def __init__(self, level_shape: (int, int), *args, individual_slices=False, is_per_agent=False, **kwargs):
+    def __init__(self, obs_shape: (int, int), *args, **kwargs):
-        super(ObjectRegister, self).__init__(*args, **kwargs)
+        super(EnvObjectRegister, self).__init__(*args, **kwargs)
-        self.is_per_agent = is_per_agent
+        self._shape = obs_shape
        self.individual_slices = individual_slices
        self._level_shape = level_shape
        self._array = None
        self.hide_from_obs_builder = False
        self._lazy_eval_transforms = []
-    def register_item(self, other):
+    def register_item(self, other: EnvObject):
-        super(ObjectRegister, self).register_item(other)
+        super(EnvObjectRegister, self).register_item(other)
        if self._array is None:
-            self._array = np.zeros((1, *self._level_shape))
+            self._array = np.zeros((1, *self._shape))
-        else:
+        self.notify_change_to_value(other)
-            if self.individual_slices:
+
-                self._array = np.concatenate((self._array, np.zeros((1, *self._array.shape[1:]))))
+    def as_array(self):
        if self._lazy_eval_transforms:
            idxs, values  = zip(*self._lazy_eval_transforms)
            # nuumpy put repects the ordering so that
            np.put(self._array, idxs, values)
            self._lazy_eval_transforms = []
        return self._array
    def summarize_states(self, n_steps=None):
        return [val.summarize_state(n_steps=n_steps) for val in self.values()]
    def notify_change_to_free(self, env_object: EnvObject):
        self._array_change_notifyer(env_object, value=c.FREE_CELL.value)
-class EntityObjectRegister(ObjectRegister, ABC):
+    def notify_change_to_value(self, env_object: EnvObject):
        self._array_change_notifyer(env_object)
-    def as_array(self):
+    def _array_change_notifyer(self, env_object: EnvObject, value=None):
-        raise NotImplementedError
+        pos = self._get_index(env_object)
        value = value if value is not None else env_object.encoding
        self._lazy_eval_transforms.append((pos, value))
    def __delitem__(self, name):
        self.notify_change_to_free(self._register[name])
        del self._register[name]
    def delete_env_object(self, env_object: EnvObject):
        del self[env_object.name]
    def delete_env_object_by_name(self, name):
        del self[name]
 class EntityRegister(EnvObjectRegister, ABC):
    _accepted_objects = Entity
    @classmethod
    def from_tiles(cls, tiles, *args, entity_kwargs=None, **kwargs):
        # objects_name = cls._accepted_objects.__name__
        register_obj = cls(*args, **kwargs)
-        entities = [cls._accepted_objects(tile, str_ident=i, **entity_kwargs if entity_kwargs is not None else {})
+        entities = [cls._accepted_objects(tile, register_obj, str_ident=i,
                                          **entity_kwargs if entity_kwargs is not None else {})
                    for i, tile in enumerate(tiles)]
        register_obj.register_additional_items(entities)
        return register_obj
@@ -115,86 +153,172 @@ class EntityObjectRegister(ObjectRegister, ABC):
    def tiles(self):
        return [entity.tile for entity in self]
-    def __init__(self, *args, is_blocking_light=False, is_observable=True, can_be_shadowed=True, **kwargs):
+    @property
-        super(EntityObjectRegister, self).__init__(*args, **kwargs)
+    def encodings(self):
-        self.can_be_shadowed = can_be_shadowed
+        return [x.encoding for x in self]
        self.is_blocking_light = is_blocking_light
        self.is_observable = is_observable
-    def by_pos(self, pos):
+    def __init__(self, level_shape, *args,
-        if isinstance(pos, np.ndarray):
+                 is_blocking_light: bool = False,
-            pos = tuple(pos)
+                 can_be_shadowed: bool = True,
                 individual_slices: bool = False, **kwargs):
        super(EntityRegister, self).__init__(level_shape, *args, **kwargs)
        self._lazy_eval_transforms = []
        self.can_be_shadowed = can_be_shadowed
        self.individual_slices = individual_slices
        self.is_blocking_light = is_blocking_light
    def __delitem__(self, name):
        idx, obj = next((i, obj) for i, obj in enumerate(self) if obj.name == name)
        obj.tile.leave(obj)
        super(EntityRegister, self).__delitem__(name)
        if self.individual_slices:
            self._array = np.delete(self._array, idx, axis=0)
    def as_array(self):
        if self._lazy_eval_transforms:
            idxs, values  = zip(*self._lazy_eval_transforms)
            # numpy put repects the ordering so that
            # Todo: Export the index building in a seperate function
            np.put(self._array, [np.ravel_multi_index(idx, self._array.shape) for idx in idxs], values)
            self._lazy_eval_transforms = []
        return self._array
    def _array_change_notifyer(self, entity, pos=None, value=None):
        # Todo: Export the contruction in a seperate function
        pos = pos if pos is not None else entity.pos
        value = value if value is not None else entity.encoding
        x, y = pos
        if self.individual_slices:
            idx = (self._get_index(entity), x, y)
        else:
            idx = (0, x, y)
        self._lazy_eval_transforms.append((idx, value))
    def by_pos(self, pos: Tuple[int, int]):
        try:
-            return next(item for item in self.values() if item.pos == pos)
+            return next(item for item in self if item.pos == tuple(pos))
        except StopIteration:
            return None
-class MovingEntityObjectRegister(EntityObjectRegister, ABC):
+class BoundRegisterMixin(EnvObjectRegister, ABC):
    @classmethod
    def from_entities_to_bind(self, entitites):
        def from_values(cls, values: Union[str, numbers.Number, List[Union[str, numbers.Number]]],
                        *args, object_kwargs=None, **kwargs):
            # objects_name = cls._accepted_objects.__name__
            if isinstance(values, (str, numbers.Number)):
                values = [values]
            register_obj = cls(*args, **kwargs)
            objects = [cls._accepted_objects(register_obj, str_ident=i, fill_value=value,
                                             **object_kwargs if object_kwargs is not None else {})
                       for i, value in enumerate(values)]
            register_obj.register_additional_items(objects)
            return register_obj
 class MovingEntityObjectRegister(EntityRegister, ABC):
    def __init__(self, *args, **kwargs):
        super(MovingEntityObjectRegister, self).__init__(*args, **kwargs)
-    def by_pos(self, pos):
+    def notify_change_to_value(self, entity):
-        if isinstance(pos, np.ndarray):
+        super(MovingEntityObjectRegister, self).notify_change_to_value(entity)
-            pos = tuple(pos)
+        if entity.last_pos != c.NO_POS.value:
            try:
                self._array_change_notifyer(entity, entity.last_pos, value=c.FREE_CELL.value)
            except AttributeError:
                pass
 ##########################################################################
 # ################# Objects and Entity Registers ####################### #
 ##########################################################################
 class GlobalPositions(EnvObjectRegister):
    _accepted_objects = GlobalPosition
    is_blocking_light = False
    can_be_shadowed = False
    hide_from_obs_builder = True
    def __init__(self, *args, **kwargs):
        super(GlobalPositions, self).__init__(*args, is_per_agent=True, individual_slices=True, **kwargs)
    def as_array(self):
        # Todo make this lazy?
        return np.stack([gp.as_array() for inv_idx, gp in enumerate(self)])
    def spawn_GlobalPositionObjects(self, obs_shape, agents):
        global_positions = [self._accepted_objects(self._shape, obs_shape, agent)
                            for _, agent in enumerate(agents)]
        # noinspection PyTypeChecker
        self.register_additional_items(global_positions)
    def summarize_states(self, n_steps=None):
        return {}
    def idx_by_entity(self, entity):
        try:
-            return next(x for x in self if x.pos == pos)
+            return next((idx for idx, inv in enumerate(self) if inv.belongs_to_entity(entity)))
        except StopIteration:
            return None
-    def __delitem__(self, name):
+    def by_entity(self, entity):
-        idx = next(i for i, entity in enumerate(self) if entity.name == name)
+        try:
-        del self._register[name]
+            return next((inv for inv in self if inv.belongs_to_entity(entity)))
-        if self.individual_slices:
+        except StopIteration:
-            self._array = np.delete(self._array, idx, axis=0)
+            return None
    def delete_entity(self, item):
        self.delete_entity_by_name(item.name)
    def delete_entity_by_name(self, name):
        del self[name]
-class PlaceHolders(MovingEntityObjectRegister):
+class PlaceHolders(EnvObjectRegister):
    _accepted_objects = PlaceHolder
-    def __init__(self, *args, fill_value: Union[str, int] = 0, **kwargs):
+    def __init__(self, *args, **kwargs):
        assert not 'individual_slices' in kwargs, 'Keyword - "individual_slices": "True" and must not be altered'
        kwargs.update(individual_slices=False)
        super().__init__(*args, **kwargs)
-        self.fill_value = fill_value
+
    @classmethod
    def from_values(cls, values: Union[str, numbers.Number, List[Union[str, numbers.Number]]],
                    *args, object_kwargs=None, **kwargs):
        # objects_name = cls._accepted_objects.__name__
        if isinstance(values, (str, numbers.Number)):
            values = [values]
        register_obj = cls(*args, **kwargs)
        objects = [cls._accepted_objects(register_obj, str_ident=i, fill_value=value,
                                         **object_kwargs if object_kwargs is not None else {})
                   for i, value in enumerate(values)]
        register_obj.register_additional_items(objects)
        return register_obj
    # noinspection DuplicatedCode
    def as_array(self):
-        if isinstance(self.fill_value, numbers.Number):
+        for idx, placeholder in enumerate(self):
-            self._array[:] = self.fill_value
+            if isinstance(placeholder.encoding, numbers.Number):
-        elif isinstance(self.fill_value, str):
+                self._array[idx][:] = placeholder.fill_value
-            if self.fill_value.lower() in ['normal', 'n']:
+            elif isinstance(placeholder.fill_value, str):
-                self._array = np.random.normal(size=self._array.shape)
+                if placeholder.fill_value.lower() in ['normal', 'n']:
                    self._array[:] = np.random.normal(size=self._array.shape)
                else:
                    raise ValueError('Choose one of: ["normal", "N"]')
            else:
-                raise ValueError('Choose one of: ["normal", "N"]')
+                raise TypeError('Objects of type "str" or "number" is required here.')
        else:
            raise TypeError('Objects of type "str" or "number" is required here.')
-        if self.individual_slices:
+        return self._array
            return self._array
        else:
            return self._array[None, 0]
-class Entities(Register):
+class Entities(ObjectRegister):
-
+    _accepted_objects = EntityRegister
    _accepted_objects = EntityObjectRegister
    @property
-    def observable_arrays(self):
+    def arrays(self):
-        # FIXME: Find a better name
+        return {key: val.as_array() for key, val in self.items()}
        return {key: val.as_array() for key, val in self.items() if val.is_observable}
    @property
    def obs_arrays(self):
-        # FIXME: Find a better name
+        return {key: val.as_array() for key, val in self.items() if not val.hide_from_obs_builder}
        return {key: val.as_array() for key, val in self.items() if val.is_observable and not val.hide_from_obs_builder}
    @property
    def names(self):
@@ -220,34 +344,34 @@ class Entities(Register):
        return found_entities
-class WallTiles(EntityObjectRegister):
+class WallTiles(EntityRegister):
    _accepted_objects = Wall
    _light_blocking = True
    hide_from_obs_builder = True
    def as_array(self):
        if not np.any(self._array):
            # Which is Faster?
            # indices = [x.pos for x in self]
            # np.put(self._array, [np.ravel_multi_index((0, *x), self._array.shape) for x in indices], self.encodings)
            x, y = zip(*[x.pos for x in self])
            self._array[0, x, y] = self.encoding
        return self._array
    def __init__(self, *args, **kwargs):
-        super(WallTiles, self).__init__(*args, individual_slices=False,
+        super(WallTiles, self).__init__(*args, is_blocking_light=self._light_blocking, individual_slices=False,
-                                        is_blocking_light=self._light_blocking, **kwargs)
+                                        **kwargs)
    @property
    def encoding(self):
        return c.OCCUPIED_CELL.value
    @property
    def array(self):
        return self._array
    @classmethod
    def from_argwhere_coordinates(cls, argwhere_coordinates, *args, **kwargs):
        tiles = cls(*args, **kwargs)
        # noinspection PyTypeChecker
        tiles.register_additional_items(
-            [cls._accepted_objects(pos, is_blocking_light=cls._light_blocking)
+            [cls._accepted_objects(pos, tiles, is_blocking_light=cls._light_blocking)
             for pos in argwhere_coordinates]
        )
        return tiles
@@ -264,12 +388,11 @@ class WallTiles(EntityObjectRegister):
 class FloorTiles(WallTiles):
    _accepted_objects = Tile
    _light_blocking = False
    def __init__(self, *args, **kwargs):
-        super(FloorTiles, self).__init__(*args, is_observable=False, **kwargs)
+        super(FloorTiles, self).__init__(*args, **kwargs)
    @property
    def encoding(self):
@@ -297,22 +420,21 @@ class FloorTiles(WallTiles):
 class Agents(MovingEntityObjectRegister):
    _accepted_objects = Agent
    def __init__(self, *args, hide_from_obs_builder=False, **kwargs):
        super().__init__(*args, **kwargs)
        self.hide_from_obs_builder = hide_from_obs_builder
-    # noinspection DuplicatedCode
+    @DeprecationWarning
-    def as_array(self):
+    def Xas_array(self):
-        self._array[:] = c.FREE_CELL.value
+        # Super Safe Version
-        # noinspection PyTupleAssignmentBalance
+        # self._array[:] = c.FREE_CELL.value
-        for z, x, y, v in zip(range(len(self)), *zip(*[x.pos for x in self]), [x.encoding for x in self]):
+        indices = list(zip(range(len(self)), *zip(*[x.last_pos for x in self])))
-            if self.individual_slices:
+        np.put(self._array, [np.ravel_multi_index(x, self._array.shape) for x in indices], c.FREE_CELL.value)
-                self._array[z, x, y] += v
+        indices = list(zip(range(len(self)), *zip(*[x.pos for x in self])))
-            else:
+        np.put(self._array, [np.ravel_multi_index(x, self._array.shape) for x in indices], self.encodings)
-                self._array[0, x, y] += v
+
        if self.individual_slices:
            return self._array
        else:
@@ -329,17 +451,11 @@ class Agents(MovingEntityObjectRegister):
        self._register[agent.name] = agent
-class Doors(EntityObjectRegister):
+class Doors(EntityRegister):
    def __init__(self, *args, **kwargs):
        super(Doors, self).__init__(*args, is_blocking_light=True, **kwargs)
    def as_array(self):
        self._array[:] = 0
        for door in self:
            self._array[0, door.x, door.y] = door.encoding
        return self._array
    _accepted_objects = Door
    def get_near_position(self, position: (int, int)) -> Union[None, Door]:
@@ -353,8 +469,7 @@ class Doors(EntityObjectRegister):
            door.tick()
-class Actions(Register):
+class Actions(ObjectRegister):
    _accepted_objects = Action
    @property
@@ -385,7 +500,7 @@ class Actions(Register):
        return action in self.movement_actions.values()
-class Zones(Register):
+class Zones(ObjectRegister):
    @property
    def accounting_zones(self):
--- a/environments/factory/base/shadow_casting.py
+++ b/environments/factory/base/shadow_casting.py
@@ -13,7 +13,7 @@ mult_array = np.asarray([
 class Map(object):
    # Multipliers for transforming coordinates to other octants:
-    def __init__(self, map_array: np.ndarray, diamond_slope: float = 0.9):
+    def __init__(self, map_array: np.typing.ArrayLike, diamond_slope: float = 0.9):
        self.data = map_array
        self.width, self.height = map_array.shape
        self.light = np.full_like(self.data, c.FREE_CELL.value)
--- a/environments/factory/combined_factories.py
+++ b/environments/factory/combined_factories.py
@@ -1,6 +1,7 @@
 import random
 from environments.factory.factory_battery import BatteryFactory, BatteryProperties
 from environments.factory.factory_dest import DestFactory
 from environments.factory.factory_dirt import DirtFactory, DirtProperties
 from environments.factory.factory_item import ItemFactory
@@ -17,6 +18,12 @@ class DirtBatteryFactory(DirtFactory, BatteryFactory):
        super().__init__(*args, **kwargs)
 # noinspection PyAbstractClass
 class DirtDestItemFactory(ItemFactory, DirtFactory, DestFactory):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
 if __name__ == '__main__':
    from environments.utility_classes import AgentRenderOptions as ARO, ObservationProperties
--- a/environments/factory/factory_battery.py
+++ b/environments/factory/factory_battery.py
@@ -1,18 +1,18 @@
-from typing import Union, NamedTuple
+from typing import Union, NamedTuple, Dict
 import numpy as np
 from environments.factory.base.base_factory import BaseFactory
-from environments.factory.base.objects import Agent, Action, Entity
+from environments.factory.base.objects import Agent, Action, Entity, EnvObject, BoundingMixin
-from environments.factory.base.registers import EntityObjectRegister, ObjectRegister
+from environments.factory.base.registers import EntityRegister, EnvObjectRegister
 from environments.factory.base.renderer import RenderEntity
-from environments.helpers import Constants as c
+from environments.helpers import Constants as c, Constants
 from environments import helpers as h
 CHARGE_ACTION = h.EnvActions.CHARGE
-ITEM_DROP_OFF = 1
+CHARGE_POD = 1
 class BatteryProperties(NamedTuple):
@@ -24,42 +24,18 @@ class BatteryProperties(NamedTuple):
    multi_charge: bool = False
-class Battery(object):
+class Battery(EnvObject, BoundingMixin):
    @property
    def is_discharged(self):
        return self.charge_level == 0
-    @property
+    def __init__(self, initial_charge_level: float, *args, **kwargs):
-    def is_blocking_light(self):
+        super(Battery, self).__init__(*args, **kwargs)
        return False
    @property
    def can_collide(self):
        return False
    @property
    def name(self):
        return f'{self.__class__.__name__}({self.agent.name})'
    def __init__(self, pomdp_r: int, level_shape: (int, int), agent: Agent, initial_charge_level: float):
        super().__init__()
        self.agent = agent
        self._pomdp_r = pomdp_r
        self._level_shape = level_shape
        if self._pomdp_r:
            self._array = np.zeros((1, pomdp_r * 2 + 1, pomdp_r * 2 + 1))
        else:
            self._array = np.zeros((1, *self._level_shape))
        self.charge_level = initial_charge_level
-    def as_array(self):
+    def encoding(self):
-        self._array[:] = c.FREE_CELL.value
+        return self.charge_level
        self._array[0, 0] = self.charge_level
        return self._array
    def __repr__(self):
        return f'{self.__class__.__name__}[{self.agent.name}]({self.charge_level})'
    def charge(self, amount) -> c:
        if self.charge_level < 1:
@@ -73,12 +49,10 @@ class Battery(object):
        if self.charge_level != 0:
            # noinspection PyTypeChecker
            self.charge_level = max(0, amount + self.charge_level)
            self._register.notify_change_to_value(self)
            return c.VALID
        else:
-            return  c.NOT_VALID
+            return c.NOT_VALID
    def belongs_to_entity(self, entity):
        return self.agent == entity
    def summarize_state(self, **kwargs):
        attr_dict = {key: str(val) for key, val in self.__dict__.items() if not key.startswith('_') and key != 'data'}
@@ -86,7 +60,7 @@ class Battery(object):
        return attr_dict
-class BatteriesRegister(ObjectRegister):
+class BatteriesRegister(EnvObjectRegister):
    _accepted_objects = Battery
    is_blocking_light = False
@@ -98,16 +72,17 @@ class BatteriesRegister(ObjectRegister):
        self.is_observable = True
    def as_array(self):
-        # self._array[:] = c.FREE_CELL.value
+        # ToDO: Make this Lazy
        self._array[:] = c.FREE_CELL.value
        for inv_idx, battery in enumerate(self):
            self._array[inv_idx] = battery.as_array()
        return self._array
    def spawn_batteries(self, agents, pomdp_r, initial_charge_level):
-        inventories = [self._accepted_objects(pomdp_r, self._level_shape, agent,
+        batteries = [self._accepted_objects(pomdp_r, self._shape, agent,
-                                              initial_charge_level)
+                                            initial_charge_level)
-                       for _, agent in enumerate(agents)]
+                     for _, agent in enumerate(agents)]
-        self.register_additional_items(inventories)
+        self.register_additional_items(batteries)
    def idx_by_entity(self, entity):
        try:
@@ -135,7 +110,7 @@ class ChargePod(Entity):
    @property
    def encoding(self):
-        return ITEM_DROP_OFF
+        return CHARGE_POD
    def __init__(self, *args, charge_rate: float = 0.4,
                 multi_charge: bool = False, **kwargs):
@@ -157,11 +132,12 @@ class ChargePod(Entity):
            return summary
-class ChargePods(EntityObjectRegister):
+class ChargePods(EntityRegister):
    _accepted_objects = ChargePod
-    def as_array(self):
+    @DeprecationWarning
    def Xas_array(self):
        self._array[:] = c.FREE_CELL.value
        for item in self:
            if item.pos != c.NO_POS.value:
@@ -180,6 +156,16 @@ class BatteryFactory(BaseFactory):
        self.btry_prop = btry_prop
        super().__init__(*args, **kwargs)
    def _additional_raw_observations(self, agent) -> Dict[Constants, np.typing.ArrayLike]:
        additional_raw_observations = super()._additional_raw_observations(agent)
        additional_raw_observations.update({c.BATTERIES: self[c.BATTERIES].by_entity(agent).as_array()})
        return additional_raw_observations
    def _additional_observations(self) -> Dict[Constants, np.typing.ArrayLike]:
        additional_observations = super()._additional_observations()
        additional_observations.update({c.CHARGE_POD: self[c.CHARGE_POD].as_array()})
        return additional_observations
    @property
    def additional_entities(self):
        super_entities = super().additional_entities
--- a/environments/factory/factory_destination.py
+++ b/environments/factory/factory_destination.py
@@ -6,10 +6,10 @@ import numpy as np
 import random
 from environments.factory.base.base_factory import BaseFactory
-from environments.helpers import Constants as c
+from environments.helpers import Constants as c, Constants
 from environments import helpers as h
-from environments.factory.base.objects import Agent, Entity, Action, Tile
+from environments.factory.base.objects import Agent, Entity, Action
-from environments.factory.base.registers import Entities, MovingEntityObjectRegister
+from environments.factory.base.registers import Entities, EntityRegister
 from environments.factory.base.renderer import RenderEntity
@@ -62,13 +62,16 @@ class Destination(Entity):
        return state_summary
-class Destinations(MovingEntityObjectRegister):
+class Destinations(EntityRegister):
    _accepted_objects = Destination
    _light_blocking = False
    def as_array(self):
        self._array[:] = c.FREE_CELL.value
        # ToDo: Switch to new Style Array Put
        # indices = list(zip(range(len(self)), *zip(*[x.pos for x in self])))
        # np.put(self._array, [np.ravel_multi_index(x, self._array.shape) for x in indices], self.encodings)
        for item in self:
            if item.pos != c.NO_POS.value:
                self._array[0, item.x, item.y] = item.encoding
@@ -83,39 +86,39 @@ class ReachedDestinations(Destinations):
    _light_blocking = False
    def __init__(self, *args, **kwargs):
-        super(ReachedDestinations, self).__init__(*args, is_observable=False, **kwargs)
+        super(ReachedDestinations, self).__init__(*args, **kwargs)
    def summarize_states(self, n_steps=None):
        return {}
-class DestSpawnMode(object):
+class DestModeOptions(object):
    DONE        = 'DONE'
    GROUPED     = 'GROUPED'
    PER_DEST    = 'PER_DEST'
-class DestinationProperties(NamedTuple):
+class DestProperties(NamedTuple):
    n_dests:                                     int = 1     # How many destinations are there
    dwell_time:                                  int = 0     # How long does the agent need to "wait" on a destination
    spawn_frequency:                             int = 0
    spawn_in_other_zone:                        bool = True  #
-    spawn_mode:                                  str = DestSpawnMode.DONE
+    spawn_mode:                                  str = DestModeOptions.DONE
    assert dwell_time >= 0, 'dwell_time cannot be < 0!'
    assert spawn_frequency >= 0, 'spawn_frequency cannot be < 0!'
    assert n_dests >= 0, 'n_destinations cannot be < 0!'
-    assert (spawn_mode == DestSpawnMode.DONE) != bool(spawn_frequency)
+    assert (spawn_mode == DestModeOptions.DONE) != bool(spawn_frequency)
 # noinspection PyAttributeOutsideInit, PyAbstractClass
-class DestinationFactory(BaseFactory):
+class DestFactory(BaseFactory):
    # noinspection PyMissingConstructor
-    def __init__(self, *args, dest_prop: DestinationProperties  = DestinationProperties(),
+    def __init__(self, *args, dest_prop: DestProperties  = DestProperties(),
                 env_seed=time.time_ns(), **kwargs):
        if isinstance(dest_prop, dict):
-            dest_prop = DestinationProperties(**dest_prop)
+            dest_prop = DestProperties(**dest_prop)
        self.dest_prop = dest_prop
        kwargs.update(env_seed=env_seed)
        self._dest_rng = np.random.default_rng(env_seed)
@@ -145,10 +148,6 @@ class DestinationFactory(BaseFactory):
        super_entities.update({c.DESTINATION: destinations, c.REACHEDDESTINATION: reached_destinations})
        return super_entities
    def additional_per_agent_obs_build(self, agent) -> List[np.ndarray]:
        additional_per_agent_obs_build = super().additional_per_agent_obs_build(agent)
        return additional_per_agent_obs_build
    def wait(self, agent: Agent):
        if destiantion := self[c.DESTINATION].by_pos(agent.pos):
            valid = destiantion.wait(agent)
@@ -178,13 +177,13 @@ class DestinationFactory(BaseFactory):
                                 if val == self.dest_prop.spawn_frequency]
        if destinations_to_spawn:
            n_dest_to_spawn = len(destinations_to_spawn)
-            if self.dest_prop.spawn_mode != DestSpawnMode.GROUPED:
+            if self.dest_prop.spawn_mode != DestModeOptions.GROUPED:
                destinations = [Destination(tile) for tile in self[c.FLOOR].empty_tiles[:n_dest_to_spawn]]
                self[c.DESTINATION].register_additional_items(destinations)
                for dest in destinations_to_spawn:
                    del self._dest_spawn_timer[dest]
                self.print(f'{n_dest_to_spawn} new destinations have been spawned')
-            elif self.dest_prop.spawn_mode == DestSpawnMode.GROUPED and n_dest_to_spawn == self.dest_prop.n_dests:
+            elif self.dest_prop.spawn_mode == DestModeOptions.GROUPED and n_dest_to_spawn == self.dest_prop.n_dests:
                destinations = [Destination(tile) for tile in self[c.FLOOR].empty_tiles[:n_dest_to_spawn]]
                self[c.DESTINATION].register_additional_items(destinations)
                for dest in destinations_to_spawn:
@@ -204,7 +203,7 @@ class DestinationFactory(BaseFactory):
        for dest in list(self[c.DESTINATION].values()):
            if dest.is_considered_reached:
                self[c.REACHEDDESTINATION].register_item(dest)
-                self[c.DESTINATION].delete_entity(dest)
+                self[c.DESTINATION].delete_env_object(dest)
                self._dest_spawn_timer[dest.name] = 0
                self.print(f'{dest.name} is reached now, removing...')
            else:
@@ -219,6 +218,11 @@ class DestinationFactory(BaseFactory):
        self.trigger_destination_spawn()
        return info_dict
    def _additional_observations(self) -> Dict[Constants, np.typing.ArrayLike]:
        additional_observations = super()._additional_observations()
        additional_observations.update({c.DESTINATION: self[c.DESTINATION].as_array()})
        return additional_observations
    def calculate_additional_reward(self, agent: Agent) -> (int, dict):
        # noinspection PyUnresolvedReferences
        reward, info_dict = super().calculate_additional_reward(agent)
@@ -240,7 +244,7 @@ class DestinationFactory(BaseFactory):
                    info_dict.update(agent_reached_destination=1)
                    self.print(f'{agent.name} just reached destination at {agent.pos}')
                    reward += 0.5
-                    self[c.REACHEDDESTINATION].delete_entity(reached_dest)
+                    self[c.REACHEDDESTINATION].delete_env_object(reached_dest)
        return reward, info_dict
    def render_additional_assets(self, mode='human'):
@@ -256,7 +260,7 @@ if __name__ == '__main__':
    render = True
-    dest_probs = DestinationProperties(n_dests=2, spawn_frequency=5, spawn_mode=DestSpawnMode.GROUPED)
+    dest_probs = DestProperties(n_dests=2, spawn_frequency=5, spawn_mode=DestModeOptions.GROUPED)
    obs_props = ObservationProperties(render_agents=ARO.LEVEL, omit_agent_self=True, pomdp_r=2)
@@ -264,12 +268,12 @@ if __name__ == '__main__':
                  'allow_diagonal_movement': False,
                  'allow_no_op': False}
-    factory = DestinationFactory(n_agents=10, done_at_collision=False,
+    factory = DestFactory(n_agents=10, done_at_collision=False,
-                                 level_name='rooms', max_steps=400,
+                          level_name='rooms', max_steps=400,
-                                 obs_prop=obs_props, parse_doors=True,
+                          obs_prop=obs_props, parse_doors=True,
-                                 verbose=True,
+                          verbose=True,
-                                 mv_prop=move_props, dest_prop=dest_probs
+                          mv_prop=move_props, dest_prop=dest_probs
-                                 )
+                          )
    # noinspection DuplicatedCode
    n_actions = factory.action_space.n - 1
--- a/environments/factory/factory_dirt.py
+++ b/environments/factory/factory_dirt.py
@@ -6,11 +6,11 @@ import random
 import numpy as np
 from algorithms.TSP_dirt_agent import TSPDirtAgent
-from environments.helpers import Constants as c
+from environments.helpers import Constants as c, Constants
 from environments import helpers as h
 from environments.factory.base.base_factory import BaseFactory
 from environments.factory.base.objects import Agent, Action, Entity, Tile
-from environments.factory.base.registers import Entities, MovingEntityObjectRegister
+from environments.factory.base.registers import Entities, MovingEntityObjectRegister, EntityRegister
 from environments.factory.base.renderer import RenderEntity
 from environments.utility_classes import ObservationProperties
@@ -42,6 +42,7 @@ class Dirt(Entity):
    def amount(self):
        return self._amount
    @property
    def encoding(self):
        # Edit this if you want items to be drawn in the ops differntly
        return self._amount
@@ -52,6 +53,7 @@ class Dirt(Entity):
    def set_new_amount(self, amount):
        self._amount = amount
        self._register.notify_change_to_value(self)
    def summarize_state(self, **kwargs):
        state_dict = super().summarize_state(**kwargs)
@@ -59,18 +61,7 @@ class Dirt(Entity):
        return state_dict
-class DirtRegister(MovingEntityObjectRegister):
+class DirtRegister(EntityRegister):
    def as_array(self):
        if self._array is not None:
            self._array[:] = c.FREE_CELL.value
            for dirt in list(self.values()):
                if dirt.amount == 0:
                    self.delete_entity(dirt)
                self._array[0, dirt.x, dirt.y] = dirt.amount
        else:
            self._array = np.zeros((1, *self._level_shape))
        return self._array
    _accepted_objects = Dirt
@@ -93,7 +84,7 @@ class DirtRegister(MovingEntityObjectRegister):
            if not self.amount > self.dirt_properties.max_global_amount:
                dirt = self.by_pos(tile.pos)
                if dirt is None:
-                    dirt = Dirt(tile, amount=self.dirt_properties.max_spawn_amount)
+                    dirt = Dirt(tile, self, amount=self.dirt_properties.max_spawn_amount)
                    self.register_item(dirt)
                else:
                    new_value = dirt.amount + self.dirt_properties.max_spawn_amount
@@ -155,7 +146,7 @@ class DirtFactory(BaseFactory):
            new_dirt_amount = dirt.amount - self.dirt_prop.clean_amount
            if new_dirt_amount <= 0:
-                self[c.DIRT].delete_entity(dirt)
+                self[c.DIRT].delete_env_object(dirt)
            else:
                dirt.set_new_amount(max(new_dirt_amount, c.FREE_CELL.value))
            return c.VALID
@@ -224,6 +215,11 @@ class DirtFactory(BaseFactory):
        done = self.dirt_prop.done_when_clean and (len(self[c.DIRT]) == 0)
        return super_done or done
    def _additional_observations(self) -> Dict[Constants, np.typing.ArrayLike]:
        additional_observations = super()._additional_observations()
        additional_observations.update({c.DIRT: self[c.DIRT].as_array()})
        return additional_observations
    def calculate_additional_reward(self, agent: Agent) -> (int, dict):
        reward, info_dict = super().calculate_additional_reward(agent)
        dirt = [dirt.amount for dirt in self[c.DIRT]]
@@ -278,41 +274,52 @@ if __name__ == '__main__':
    )
    obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True,
-                                      pomdp_r=2, additional_agent_placeholder=None)
+                                      pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True)
    move_props = {'allow_square_movement': True,
                  'allow_diagonal_movement': False,
                  'allow_no_op': False}
    global_timings = []
    for i in range(20):
-    factory = DirtFactory(n_agents=1, done_at_collision=False,
+        factory = DirtFactory(n_agents=2, done_at_collision=False,
-                          level_name='rooms', max_steps=400,
+                              level_name='rooms', max_steps=1000,
-                          doors_have_area=False,
+                              doors_have_area=False,
-                          obs_prop=obs_props, parse_doors=True,
+                              obs_prop=obs_props, parse_doors=True,
-                          record_episodes=True, verbose=True,
+                              record_episodes=True, verbose=True,
-                          mv_prop=move_props, dirt_prop=dirt_props,
+                              mv_prop=move_props, dirt_prop=dirt_props,
-                          inject_agents=[TSPDirtAgent]
+                              # inject_agents=[TSPDirtAgent],
-                          )
+                              )
-    # noinspection DuplicatedCode
+        # noinspection DuplicatedCode
-    n_actions = factory.action_space.n - 1
+        n_actions = factory.action_space.n - 1
-    _ = factory.observation_space
+        _ = factory.observation_space
-
+        obs_space = factory.observation_space
-    for epoch in range(10):
+        obs_space_named = factory.named_observation_space
-        random_actions = [[random.randint(0, n_actions) for _
+        times = []
-                           in range(factory.n_agents)] for _
+        import time
-                          in range(factory.max_steps+1)]
+        for epoch in range(10):
-        env_state = factory.reset()
+            start_time = time.time()
-        if render:
+            random_actions = [[random.randint(0, n_actions) for _
-            factory.render()
+                               in range(factory.n_agents)] for _
-        tsp_agent = factory.get_injected_agents()[0]
+                              in range(factory.max_steps+1)]
-
+            env_state = factory.reset()
        r = 0
        for agent_i_action in random_actions:
            env_state, step_r, done_bool, info_obj = factory.step(tsp_agent.predict())
            r += step_r
            if render:
                factory.render()
-            if done_bool:
+            # tsp_agent = factory.get_injected_agents()[0]
-                break
+
-        print(f'Factory run {epoch} done, reward is:\n    {r}')
+            r = 0
            for agent_i_action in random_actions:
                env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
                r += step_r
                if render:
                    factory.render()
                if done_bool:
                    break
            times.append(time.time() - start_time)
            # print(f'Factory run {epoch} done, reward is:\n    {r}')
        print('Time Taken: ', sum(times) / 10)
        global_timings.append(sum(times) / 10)
    print('Time Taken: ', sum(global_timings[10:]) / 10)
 pass
--- a/environments/factory/factory_item.py
+++ b/environments/factory/factory_item.py
@@ -6,11 +6,11 @@ import numpy as np
 import random
 from environments.factory.base.base_factory import BaseFactory
-from environments.helpers import Constants as c
+from environments.helpers import Constants as c, Constants
 from environments import helpers as h
 from environments.factory.base.objects import Agent, Entity, Action, Tile, MoveableEntity
-from environments.factory.base.registers import Entities, EntityObjectRegister, ObjectRegister, \
+from environments.factory.base.registers import Entities, EntityRegister, EnvObjectRegister, MovingEntityObjectRegister, \
-    MovingEntityObjectRegister
+    BoundRegisterMixin
 from environments.factory.base.renderer import RenderEntity
@@ -19,7 +19,7 @@ NO_ITEM = 0
 ITEM_DROP_OFF = 1
-class Item(MoveableEntity):
+class Item(Entity):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
@@ -41,20 +41,21 @@ class Item(MoveableEntity):
    def set_auto_despawn(self, auto_despawn):
        self._auto_despawn = auto_despawn
    def despawn(self):
        # Todo: Move this to base class?
        curr_tile = self.tile
        curr_tile.leave(self)
        self._tile = None
        self._register.notify_change_to_value(self)
        return True
 class ItemRegister(MovingEntityObjectRegister):
-    def as_array(self):
+class ItemRegister(EntityRegister):
        self._array[:] = c.FREE_CELL.value
        for item in self:
            if item.pos != c.NO_POS.value:
                self._array[0, item.x, item.y] = item.encoding
        return self._array
    _accepted_objects = Item
    def spawn_items(self, tiles: List[Tile]):
-        items = [Item(tile) for tile in tiles]
+        items = [Item(tile, self) for tile in tiles]
        self.register_additional_items(items)
    def despawn_items(self, items: List[Item]):
@@ -63,7 +64,7 @@ class ItemRegister(MovingEntityObjectRegister):
            del self[item]
-class Inventory(UserList):
+class Inventory(EntityRegister, BoundRegisterMixin):
    @property
    def is_blocking_light(self):
@@ -73,19 +74,18 @@ class Inventory(UserList):
    def name(self):
        return f'{self.__class__.__name__}({self.agent.name})'
-    def __init__(self, pomdp_r: int, level_shape: (int, int), agent: Agent, capacity: int):
+    def __init__(self, obs_shape: (int, int), agent: Agent, capacity: int):
        super(Inventory, self).__init__()
        self.agent = agent
-        self.pomdp_r = pomdp_r
+        self._obs_shape = obs_shape
-        self._level_shape = level_shape
+
-        if self.pomdp_r:
+        self._array = np.zeros((1, *self._obs_shape))
-            self._array = np.zeros((1, pomdp_r * 2 + 1, pomdp_r * 2 + 1))
+
        else:
            self._array = np.zeros((1, *self._level_shape))
        self.capacity = min(capacity, self._array.size)
    def as_array(self):
        self._array[:] = c.FREE_CELL.value
        # ToDo: Make this Lazy
        for item_idx, item in enumerate(self):
            x_diff, y_diff = divmod(item_idx, self._array.shape[1])
            self._array[0, int(x_diff), int(y_diff)] = item.encoding
@@ -110,25 +110,22 @@ class Inventory(UserList):
        return attr_dict
-class Inventories(ObjectRegister):
+class Inventories(EnvObjectRegister):
    _accepted_objects = Inventory
    is_blocking_light = False
    can_be_shadowed = False
    hide_from_obs_builder = True
-    def __init__(self, *args, **kwargs):
+    def __init__(self, obs_shape, *args, **kwargs):
        super(Inventories, self).__init__(*args, is_per_agent=True, individual_slices=True, **kwargs)
-        self.is_observable = True
+        self._obs_shape = obs_shape
    def as_array(self):
-        # self._array[:] = c.FREE_CELL.value
+        return np.stack([inventory.as_array() for inv_idx, inventory in enumerate(self)])
        for inv_idx, inventory in enumerate(self):
            self._array[inv_idx] = inventory.as_array()
        return self._array
-    def spawn_inventories(self, agents, pomdp_r, capacity):
+    def spawn_inventories(self, agents, capacity):
-        inventories = [self._accepted_objects(pomdp_r, self._level_shape, agent, capacity)
+        inventories = [self._accepted_objects(self._obs_shape, agent, capacity)
                       for _, agent in enumerate(agents)]
        self.register_additional_items(inventories)
@@ -183,20 +180,20 @@ class DropOffLocation(Entity):
            return super().summarize_state(n_steps=n_steps)
-class DropOffLocations(EntityObjectRegister):
+class DropOffLocations(EntityRegister):
    _accepted_objects = DropOffLocation
-    def as_array(self):
+    @DeprecationWarning
    def Xas_array(self):
        # Todo: Which is faster?
        # indices = list(zip(range(len(self)), *zip(*[x.pos for x in self])))
        # np.put(self._array, [np.ravel_multi_index(x, self._array.shape) for x in indices], self.encodings)
        self._array[:] = c.FREE_CELL.value
-        for item in self:
+        indices = list(zip([0, ] * len(self), *zip(*[x.pos for x in self])))
-            if item.pos != c.NO_POS.value:
+        np.put(self._array, [np.ravel_multi_index(x, self._array.shape) for x in indices], self.encodings)
                self._array[0, item.x, item.y] = item.encoding
        return self._array
    def __repr__(self):
        super(DropOffLocations, self).__repr__()
 class ItemProperties(NamedTuple):
    n_items:                   int  = 5     # How many items are there at the same time
@@ -241,17 +238,23 @@ class ItemFactory(BaseFactory):
        empty_tiles = self[c.FLOOR].empty_tiles[:self.item_prop.n_items]
        item_register.spawn_items(empty_tiles)
-        inventories = Inventories(self._level_shape if not self._pomdp_r else ((self.pomdp_diameter,) * 2))
+        inventories = Inventories(self._level_shape if not self._pomdp_r else ((self.pomdp_diameter,) * 2),
-        inventories.spawn_inventories(self[c.AGENT], self._pomdp_r,
+                                  self._level_shape)
-                                      self.item_prop.max_agent_inventory_capacity)
+        inventories.spawn_inventories(self[c.AGENT], self.item_prop.max_agent_inventory_capacity)
        super_entities.update({c.DROP_OFF: drop_offs, c.ITEM: item_register, c.INVENTORY: inventories})
        return super_entities
-    def additional_per_agent_obs_build(self, agent) -> List[np.ndarray]:
+    def _additional_raw_observations(self, agent) -> Dict[Constants, np.typing.ArrayLike]:
-        additional_per_agent_obs_build = super().additional_per_agent_obs_build(agent)
+        additional_raw_observations = super()._additional_raw_observations(agent)
-        additional_per_agent_obs_build.append(self[c.INVENTORY].by_entity(agent).as_array())
+        additional_raw_observations.update({c.INVENTORY: self[c.INVENTORY].by_entity(agent).as_array()})
-        return additional_per_agent_obs_build
+        return additional_raw_observations
    def _additional_observations(self) -> Dict[Constants, np.typing.ArrayLike]:
        additional_observations = super()._additional_observations()
        additional_observations.update({c.ITEM: self[c.ITEM].as_array()})
        additional_observations.update({c.DROP_OFF: self[c.DROP_OFF].as_array()})
        return additional_observations
    def do_item_action(self, agent: Agent):
        inventory = self[c.INVENTORY].by_entity(agent)
@@ -264,7 +267,7 @@ class ItemFactory(BaseFactory):
        elif item := self[c.ITEM].by_pos(agent.pos):
            try:
                inventory.append(item)
-                item.move(self._NO_POS_TILE)
+                item.despawn()
                return c.VALID
            except RuntimeError:
                return c.NOT_VALID
@@ -308,7 +311,7 @@ class ItemFactory(BaseFactory):
            if item.auto_despawn >= 1:
                item.set_auto_despawn(item.auto_despawn-1)
            elif not item.auto_despawn:
-                self[c.ITEM].delete_entity(item)
+                self[c.ITEM].delete_env_object(item)
            else:
                pass
@@ -327,12 +330,12 @@ class ItemFactory(BaseFactory):
                    info_dict.update({f'{agent.name}_item_drop_off': 1})
                    info_dict.update(item_drop_off=1)
                    self.print(f'{agent.name} just dropped of an item at {drop_off.pos}.')
-                    reward += 0.5
+                    reward += 1
                else:
                    info_dict.update({f'{agent.name}_item_pickup': 1})
                    info_dict.update(item_pickup=1)
                    self.print(f'{agent.name} just picked up an item at {agent.pos}')
-                    reward += 0.1
+                    reward += 0.2
            else:
                if self[c.DROP_OFF].by_pos(agent.pos):
                    info_dict.update({f'{agent.name}_failed_drop_off': 1})
@@ -363,13 +366,13 @@ if __name__ == '__main__':
    item_probs = ItemProperties()
-    obs_props = ObservationProperties(render_agents=ARO.LEVEL, omit_agent_self=True, pomdp_r=2)
+    obs_props = ObservationProperties(render_agents=ARO.SEPERATE, omit_agent_self=True, pomdp_r=2)
    move_props = {'allow_square_movement': True,
-                  'allow_diagonal_movement': False,
+                  'allow_diagonal_movement': True,
                  'allow_no_op': False}
-    factory = ItemFactory(n_agents=3, done_at_collision=False,
+    factory = ItemFactory(n_agents=2, done_at_collision=False,
                          level_name='rooms', max_steps=400,
                          obs_prop=obs_props, parse_doors=True,
                          record_episodes=True, verbose=True,
@@ -378,7 +381,8 @@ if __name__ == '__main__':
    # noinspection DuplicatedCode
    n_actions = factory.action_space.n - 1
-    _ = factory.observation_space
+    obs_space = factory.observation_space
    obs_space_named = factory.named_observation_space
    for epoch in range(4):
        random_actions = [[random.randint(0, n_actions) for _
--- a/environments/helpers.py
+++ b/environments/helpers.py
@@ -1,12 +1,12 @@
 import itertools
 from collections import defaultdict
-from enum import Enum, auto
+from enum import Enum
-from typing import Tuple, Union
+from pathlib import Path
 from typing import Tuple, Union, Dict, List
 import networkx as nx
 import numpy as np
-from pathlib import Path
+from numpy.typing import ArrayLike
 from stable_baselines3 import PPO, DQN, A2C
 MODEL_MAP = dict(PPO=PPO, DQN=DQN, A2C=A2C)
@@ -29,6 +29,7 @@ class Constants(Enum):
    LEVEL               = 'Level'
    AGENT               = 'Agent'
    AGENT_PLACEHOLDER   = 'AGENT_PLACEHOLDER'
    GLOBAL_POSITION     = 'GLOBAL_POSITION'
    FREE_CELL           = 0
    OCCUPIED_CELL       = 1
    SHADOWED_CELL       = -1
@@ -109,6 +110,58 @@ ACTIONMAP = defaultdict(lambda: (0, 0), {m.NORTH: (-1, 0), m.NORTHEAST: (-1, +1)
                        )
 class ObservationTranslator:
    def __init__(self, obs_shape_2d: (int, int), this_named_observation_space: Dict[str, dict],
                 *per_agent_named_obs_space: Dict[str, dict],
                 placeholder_fill_value: Union[int, str] = 'N'):
        assert len(obs_shape_2d) == 2
        self.obs_shape = obs_shape_2d
        if isinstance(placeholder_fill_value, str):
            if placeholder_fill_value.lower() in ['normal', 'n']:
                self.random_fill = lambda: np.random.normal(size=self.obs_shape)
            elif placeholder_fill_value.lower() in ['uniform', 'u']:
                self.random_fill = lambda: np.random.uniform(size=self.obs_shape)
            else:
                raise ValueError('Please chooe between "uniform" or "normal"')
        else:
            self.random_fill = None
        self._this_named_obs_space = this_named_observation_space
        self._per_agent_named_obs_space = list(per_agent_named_obs_space)
    def translate_observation(self, agent_idx: int, obs: np.ndarray):
        target_obs_space = self._per_agent_named_obs_space[agent_idx]
        translation = [idx_space_dict['explained_idxs'] for name, idx_space_dict in target_obs_space.items()]
        flat_translation = [x for y in translation for x in y]
        return np.take(obs, flat_translation, axis=1 if obs.ndim == 4 else 0)
    def translate_observations(self, observations: List[ArrayLike]):
        return [self.translate_observation(idx, observation) for idx, observation in enumerate(observations)]
    def __call__(self, observations):
        return self.translate_observations(observations)
 class ActionTranslator:
    def __init__(self, target_named_action_space: Dict[str, int], *per_agent_named_action_space: Dict[str, int]):
        self._target_named_action_space = target_named_action_space
        self._per_agent_named_action_space = list(per_agent_named_action_space)
        self._per_agent_idx_actions = [{idx: a for a, idx in x.items()} for x in self._per_agent_named_action_space]
    def translate_action(self, agent_idx: int, action: int):
        named_action = self._per_agent_idx_actions[agent_idx][action]
        translated_action = self._target_named_action_space[named_action]
        return translated_action
    def translate_actions(self, actions: List[int]):
        return [self.translate_action(idx, action) for idx, action in enumerate(actions)]
    def __call__(self, actions):
        return self.translate_actions(actions)
 # Utility functions
 def parse_level(path):
    with path.open('r') as lvl:
@@ -128,7 +181,7 @@ def one_hot_level(level, wall_char: Union[c, str] = c.WALL):
    return binary_grid
-def check_position(slice_to_check_against: np.ndarray, position_to_check: Tuple[int, int]):
+def check_position(slice_to_check_against: ArrayLike, position_to_check: Tuple[int, int]):
    x_pos, y_pos = position_to_check
    # Check if agent colides with grid boundrys
@@ -177,6 +230,7 @@ def points_to_graph(coordiniates_or_tiles, allow_euclidean_connections=True, all
                graph.add_edge(a, b)
    return graph
 if __name__ == '__main__':
    parsed_level = parse_level(Path(__file__).parent / 'factory' / 'levels' / 'simple.txt')
    y = one_hot_level(parsed_level)
--- a/environments/utility_classes.py
+++ b/environments/utility_classes.py
@@ -20,10 +20,10 @@ class ObservationProperties(NamedTuple):
    render_agents: AgentRenderOptions = AgentRenderOptions.SEPERATE
    omit_agent_self: bool = True
    additional_agent_placeholder: Union[None, str, int] = None
-    cast_shadows = True
+    cast_shadows: bool = True
    frames_to_stack: int = 0
    pomdp_r: int = 0
-    show_global_position_info: bool = True
+    show_global_position_info: bool = False
 class MarlFrameStack(gym.ObservationWrapper):
--- a/reload_agent.py
+++ b/reload_agent.py
@@ -3,6 +3,7 @@ from pathlib import Path
 import numpy as np
 import yaml
 from stable_baselines3 import A2C
 from environments import helpers as h
 from environments.helpers import Constants as c
@@ -16,13 +17,12 @@ warnings.filterwarnings('ignore', category=UserWarning)
 if __name__ == '__main__':
-    determin = False
+    determin = True
    render = True
-    record = True
+    record = False
    seed = 67
    n_agents = 1
-    out_path = Path('study_out/e_1_new_reward/no_obs/dirt/A2C_new_reward/0_A2C_new_reward')
+    out_path = Path('study_out/single_run_with_export/dirt')
    out_path_2 = Path('study_out/e_1_obs_stack_3_gae_0.25_n_steps_16/seperate_N/dirt/A2C_obs_stack_3_gae_0.25_n_steps_16/1_A2C_obs_stack_3_gae_0.25_n_steps_16')
    model_path = out_path
    with (out_path / f'env_params.json').open('r') as f:
@@ -35,10 +35,9 @@ if __name__ == '__main__':
        env_kwargs.update(record_episodes=record, done_at_collision=True)
    this_model = out_path / 'model.zip'
    other_model = out_path / 'model.zip'
-    model_cls = next(val for key, val in h.MODEL_MAP.items() if key in out_path.parent.name)
+    model_cls = A2C  # next(val for key, val in h.MODEL_MAP.items() if key in out_path.parent.name)
-    models = [model_cls.load(this_model)]  # , model_cls.load(other_model)]
+    models = [model_cls.load(this_model)]
    # Init Env
    with DirtFactory(**env_kwargs) as env:
@@ -59,6 +58,8 @@ if __name__ == '__main__':
                rew += step_r
                if render:
                    env.render()
                if not env.unwrapped.unwrapped[c.AGENT][0].temp_valid:
                    print('Invalid ACtions')
                if done_bool:
                    break
            print(f'Factory run {episode} done, reward is:\n    {rew}')
--- a/studies/e_1.py
+++ b/studies/e_1.py
@@ -434,8 +434,8 @@ if __name__ == '__main__':
                    # Iteration
                    start_mp_baseline_run(env_map, policy_path)
-                    # for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
+                    # for policy_path in (y for y in policy_path.iterdir() if y.is_dir()):
-                    #    load_model_run_baseline(seed_path)
+                    #    load_model_run_baseline(policy_path)
        print('Baseline Tracking done')
    # Then iterate over every model and monitor "ood behavior" - "is it ood?"
@@ -448,11 +448,11 @@ if __name__ == '__main__':
                for policy_path in [x for x in env_path.iterdir() if x. is_dir()]:
                    # FIXME: Pick random seed or iterate over available seeds
                    # First seed path version
-                    # seed_path = next((y for y in policy_path.iterdir() if y.is_dir()))
+                    # policy_path = next((y for y in policy_path.iterdir() if y.is_dir()))
                    # Iteration
                    start_mp_study_run(env_map, policy_path)
-                    #for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
+                    #for policy_path in (y for y in policy_path.iterdir() if y.is_dir()):
-                    #    load_model_run_study(seed_path, env_map[env_path.name][0], observation_modes[obs_mode])
+                    #    load_model_run_study(policy_path, env_map[env_path.name][0], observation_modes[obs_mode])
        print('OOD Tracking Done')
    # Plotting
--- a/studies/single_run_with_export.py
+++ b/studies/single_run_with_export.py
@@ -0,0 +1,226 @@
 import sys
 from pathlib import Path
 try:
    # noinspection PyUnboundLocalVariable
    if __package__ is None:
        DIR = Path(__file__).resolve().parent
        sys.path.insert(0, str(DIR.parent))
        __package__ = DIR.name
    else:
        DIR = None
 except NameError:
    DIR = None
    pass
 import simplejson
 from environments.helpers import ActionTranslator, ObservationTranslator
 from environments.logging.recorder import EnvRecorder
 from environments import helpers as h
 from environments.factory.factory_dirt import DirtProperties, DirtFactory
 from environments.factory.factory_item import ItemProperties, ItemFactory
 from environments.factory.factory_dest import DestProperties, DestFactory, DestModeOptions
 from environments.factory.combined_factories import DirtDestItemFactory
 from environments.logging.envmonitor import EnvMonitor
 from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
 """
 In this studie, we want to export trained Agents for debugging purposes.
 """
 def encapsule_env_factory(env_fctry, env_kwrgs):
    def _init():
        with env_fctry(**env_kwrgs) as init_env:
            return init_env
    return _init
 def load_model_run_baseline(policy_path, env_to_run):
    # retrieve model class
    model_cls = h.MODEL_MAP['A2C']
    # Load both agents
    model = model_cls.load(policy_path / 'model.zip', device='cpu')
    # Load old env kwargs
    with next(policy_path.glob('*.json')).open('r') as f:
        env_kwargs = simplejson.load(f)
        env_kwargs.update(done_at_collision=True)
    # Init Env
    with env_to_run(**env_kwargs) as env_factory:
        monitored_env_factory = EnvMonitor(env_factory)
        recorded_env_factory = EnvRecorder(monitored_env_factory)
        # Evaluation Loop for i in range(n Episodes)
        for episode in range(5):
            env_state = recorded_env_factory.reset()
            rew, done_bool = 0, False
            while not done_bool:
                action = model.predict(env_state, deterministic=True)[0]
                env_state, step_r, done_bool, info_obj = recorded_env_factory.step(action)
                rew += step_r
                if done_bool:
                    break
            print(f'Factory run {episode} done, reward is:\n    {rew}')
        recorded_env_factory.save_run(filepath=policy_path / f'monitor.pick')
        recorded_env_factory.save_records(filepath=policy_path / f'recorder.json')
 def load_model_run_combined(root_path, env_to_run, env_kwargs):
    # retrieve model class
    model_cls = h.MODEL_MAP['A2C']
    # Load both agents
    models = [model_cls.load(model_zip, device='cpu') for model_zip in root_path.rglob('model.zip')]
    # Load old env kwargs
    env_kwargs = env_kwargs.copy()
    env_kwargs.update(
        n_agents=len(models),
        done_at_collision=False)
    # Init Env
    with env_to_run(**env_kwargs) as env_factory:
        action_translator = ActionTranslator(env_factory.named_action_space,
                                             *[x.named_action_space for x in models])
        observation_translator = ObservationTranslator(env_factory.observation_space.shape[-2:],
                                                       env_factory.named_observation_space,
                                                       *[x.named_observation_space for x in models])
        monitored_env_factory = EnvMonitor(env_factory)
        recorded_env_factory = EnvRecorder(monitored_env_factory)
        # Evaluation Loop for i in range(n Episodes)
        for episode in range(5):
            env_state = recorded_env_factory.reset()
            rew, done_bool = 0, False
            while not done_bool:
                translated_observations = observation_translator(env_state)
                actions = [model.predict(translated_observations[model_idx], deterministic=True)[0]
                           for model_idx, model in enumerate(models)]
                translated_actions = action_translator(actions)
                env_state, step_r, done_bool, info_obj = recorded_env_factory.step(translated_actions)
                rew += step_r
                if done_bool:
                    break
            print(f'Factory run {episode} done, reward is:\n    {rew}')
        recorded_env_factory.save_run(filepath=policy_path / f'monitor.pick')
        recorded_env_factory.save_records(filepath=policy_path / f'recorder.json')
 if __name__ == '__main__':
    # What to do:
    train = True
    individual_run = True
    combined_run = True
    train_steps = 2e6
    frames_to_stack = 3
    # Define a global studi save path
    study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}'
    # Define Global Env Parameters
    # Define properties object parameters
    obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,
                                      additional_agent_placeholder=None,
                                      omit_agent_self=True,
                                      frames_to_stack=frames_to_stack,
                                      pomdp_r=2, cast_shadows=True)
    move_props = MovementProperties(allow_diagonal_movement=True,
                                    allow_square_movement=True,
                                    allow_no_op=False)
    dirt_props = DirtProperties(initial_dirt_ratio=0.35, initial_dirt_spawn_r_var=0.1,
                                clean_amount=0.34,
                                max_spawn_amount=0.1, max_global_amount=20,
                                max_local_amount=1, spawn_frequency=0, max_spawn_ratio=0.05,
                                dirt_smear_amount=0.0, agent_can_interact=True)
    item_props = ItemProperties(n_items=10, agent_can_interact=True,
                                spawn_frequency=30, n_drop_off_locations=2,
                                max_agent_inventory_capacity=15)
    dest_props = DestProperties(n_dests=4, spawn_mode=DestModeOptions.GROUPED, spawn_frequency=1)
    factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True,
                          level_name='rooms', doors_have_area=True,
                          verbose=False,
                          mv_prop=move_props,
                          obs_prop=obs_props,
                          done_at_collision=False
                          )
    # Bundle both environments with global kwargs and parameters
    env_map = {}
    env_map.update({'dirt': (DirtFactory, dict(dirt_prop=dirt_props,
                                               **factory_kwargs.copy()))})
    env_map.update({'item': (ItemFactory, dict(item_prop=item_props,
                                               **factory_kwargs.copy()))})
    env_map.update({'dest': (DestFactory, dict(dest_prop=dest_props,
                                               **factory_kwargs.copy()))})
    env_map.update({'combined': (DirtDestItemFactory, dict(dest_prop=dest_props,
                                                           item_prop=item_props,
                                                           dirt_prop=dirt_props,
                                                           **factory_kwargs.copy()))})
    env_names = list(env_map.keys())
    # Train starts here ############################################################
    # Build Major Loop  parameters, parameter versions, Env Classes and models
    if train:
        for env_key in (env_key for env_key in env_map if 'combined' != env_key):
            model_cls = h.MODEL_MAP['A2C']
            combination_path = study_root_path / env_key
            env_class, env_kwargs = env_map[env_key]
            # Output folder
            if (combination_path / 'monitor.pick').exists():
                continue
            combination_path.mkdir(parents=True, exist_ok=True)
            with env_class(**env_kwargs) as env_factory:
                param_path = combination_path / f'env_params.json'
                env_factory.save_params(param_path)
                # EnvMonitor Init
                callbacks = [EnvMonitor(env_factory)]
                # Model Init
                model = model_cls("MlpPolicy", env_factory,
                                  verbose=1, seed=69, device='cpu')
                # Model train
                model.learn(total_timesteps=int(train_steps), callback=callbacks)
                # Model save
                model.named_action_space = env_factory.unwrapped.named_action_space
                model.named_observation_space = env_factory.unwrapped.named_observation_space
                save_path = combination_path / f'model.zip'
                model.save(save_path)
                # Monitor Save
                callbacks[0].save_run(combination_path / 'monitor.pick')
                # Better be save then sorry: Clean up!
                del env_factory, model
                import gc
                gc.collect()
    # Train ends here ############################################################
    # Evaluation starts here #####################################################
    # First Iterate over every model and monitor "as trained"
    if individual_run:
        print('Start Individual Recording')
        for env_key in (env_key for env_key in env_map if 'combined' != env_key):
            # For trained policy in study_root_path / identifier
            policy_path = study_root_path / env_key
            load_model_run_baseline(policy_path, env_map[policy_path.name][0])
            # for policy_path in (y for y in policy_path.iterdir() if y.is_dir()):
            #    load_model_run_baseline(policy_path)
        print('Start Individual Training')
    # Then iterate over every model and monitor "ood behavior" - "is it ood?"
    if combined_run:
        print('Start combined run')
        for env_key in (env_key for env_key in env_map if 'combined' == env_key):
            # For trained policy in study_root_path / identifier
            factory, kwargs = env_map[env_key]
            load_model_run_combined(study_root_path, factory, kwargs)
        print('OOD Tracking Done')