Experiments look good

2022-01-15 12:37:58 +01:00
parent d29ccbbb71
commit 823aa075b9
14 changed files with 478 additions and 297 deletions
--- a/environments/factory/base/base_factory.py
+++ b/environments/factory/base/base_factory.py
@ -15,8 +15,8 @@ from environments import helpers as h
 from environments.helpers import Constants as c
 from environments.helpers import EnvActions as a
 from environments.helpers import Rewards as r
-from environments.factory.base.objects import Agent, Tile, Action
-from environments.factory.base.registers import Actions, Entities, Agents, Doors, FloorTiles, WallTiles, PlaceHolders, \
+from environments.factory.base.objects import Agent, Floor, Action
+from environments.factory.base.registers import Actions, Entities, Agents, Doors, Floors, Walls, PlaceHolders, \
    GlobalPositions
 from environments.utility_classes import MovementProperties, ObservationProperties, MarlFrameStack
 from environments.utility_classes import AgentRenderOptions as a_obs
@ -121,7 +121,7 @@ class BaseFactory(gym.Env):
        self.doors_have_area = doors_have_area
        self.individual_rewards = individual_rewards

-        # Reset
+        # TODO: Reset ---> document this
        self.reset()

    def __getitem__(self, item):
@ -141,21 +141,21 @@ class BaseFactory(gym.Env):
        self._obs_shape = self._level_shape if not self.obs_prop.pomdp_r else (self.pomdp_diameter, ) * 2

        # Walls
-        walls = WallTiles.from_argwhere_coordinates(
+        walls = Walls.from_argwhere_coordinates(
            np.argwhere(level_array == c.OCCUPIED_CELL),
            self._level_shape
        )
        self._entities.register_additional_items({c.WALLS: walls})

        # Floor
-        floor = FloorTiles.from_argwhere_coordinates(
+        floor = Floors.from_argwhere_coordinates(
            np.argwhere(level_array == c.FREE_CELL),
            self._level_shape
        )
        self._entities.register_additional_items({c.FLOOR: floor})

        # NOPOS
-        self._NO_POS_TILE = Tile(c.NO_POS, None)
+        self._NO_POS_TILE = Floor(c.NO_POS, None)

        # Doors
        if self.parse_doors:
@ -170,7 +170,7 @@ class BaseFactory(gym.Env):

        # Actions
        self._actions = Actions(self.mv_prop, can_use_doors=self.parse_doors)
-        if additional_actions := self.additional_actions:
+        if additional_actions := self.actions_hook:
            self._actions.register_additional_items(additional_actions)

        # Agents
@ -202,7 +202,7 @@ class BaseFactory(gym.Env):
            self._entities.register_additional_items({c.AGENT_PLACEHOLDER: placeholder})

        # Additional Entitites from SubEnvs
-        if additional_entities := self.additional_entities:
+        if additional_entities := self.entities_hook:
            self._entities.register_additional_items(additional_entities)

        if self.obs_prop.show_global_position_info:
@ -217,7 +217,7 @@ class BaseFactory(gym.Env):

    def reset(self) -> (np.typing.ArrayLike, int, bool, dict):
        _ = self._base_init_env()
-        self.do_additional_reset()
+        self.reset_hook()

        self._steps = 0

@ -233,7 +233,7 @@ class BaseFactory(gym.Env):
        self._steps += 1

        # Pre step Hook for later use
-        self.hook_pre_step()
+        self.pre_step_hook()

        for action, agent in zip(actions, self[c.AGENT]):
            agent.clear_temp_state()
@ -244,7 +244,7 @@ class BaseFactory(gym.Env):
                action_valid, reward = self._do_move_action(agent, action_obj)
            elif a.NOOP == action_obj:
                action_valid = c.VALID
-                reward = dict(value=r.NOOP, reason=a.NOOP, info={f'{agent.pos}_NOOP': 1})
+                reward = dict(value=r.NOOP, reason=a.NOOP, info={f'{agent.name}_NOOP': 1, 'NOOP': 1})
            elif a.USE_DOOR == action_obj:
                action_valid, reward = self._handle_door_interaction(agent)
            else:
@ -258,7 +258,7 @@ class BaseFactory(gym.Env):
            agent.step_result = step_result

        # Additional step and Reward, Info Init
-        rewards, info = self.do_additional_step()
+        rewards, info = self.step_hook()
        # Todo: Make this faster, so that only tiles of entities that can collide are searched.
        tiles_with_collisions = self.get_all_tiles_with_collisions()
        for tile in tiles_with_collisions:
@ -297,7 +297,7 @@ class BaseFactory(gym.Env):
            info.update(self._summarize_state())

        # Post step Hook for later use
-        info.update(self.hook_post_step())
+        info.update(self.post_step_hook())

        obs, _ = self._build_observations()

@ -314,11 +314,11 @@ class BaseFactory(gym.Env):
                door.use()
                valid = c.VALID
                self.print(f'{agent.name} just used a {door.name} at {door.pos}')
-                info_dict = {f'{agent.name}_door_use': 1}
+                info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
            # When he doesn't...
            else:
                valid = c.NOT_VALID
-                info_dict = {f'{agent.name}_failed_door_use': 1}
+                info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
                self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')

        else:
@ -334,7 +334,7 @@ class BaseFactory(gym.Env):
        per_agent_obsn = dict()
        # Generel Observations
        lvl_obs = self[c.WALLS].as_array()
-        door_obs = self[c.DOORS].as_array()
+        door_obs = self[c.DOORS].as_array() if self.parse_doors else None
        if self.obs_prop.render_agents == a_obs.NOT:
            global_agent_obs = None
        elif self.obs_prop.omit_agent_self and self.n_agents == 1:
@ -342,7 +342,7 @@ class BaseFactory(gym.Env):
        else:
            global_agent_obs = self[c.AGENT].as_array().copy()
        placeholder_obs = self[c.AGENT_PLACEHOLDER].as_array() if self[c.AGENT_PLACEHOLDER] else None
-        add_obs_dict = self._additional_observations()
+        add_obs_dict = self.observations_hook()

        for agent_idx, agent in enumerate(self[c.AGENT]):
            obs_dict = dict()
@ -367,17 +367,17 @@ class BaseFactory(gym.Env):

            obs_dict[c.WALLS] = lvl_obs
            if self.obs_prop.render_agents in [a_obs.SEPERATE, a_obs.COMBINED] and agent_obs is not None:
-                obs_dict[c.AGENT] = agent_obs
+                obs_dict[c.AGENT] = agent_obs[:]
            if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None:
                obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs
            if self.parse_doors and door_obs is not None:
-                obs_dict[c.DOORS] = door_obs
+                obs_dict[c.DOORS] = door_obs[:]
            obs_dict.update(add_obs_dict)
            obsn = np.vstack(list(obs_dict.values()))
            if self.obs_prop.pomdp_r:
                obsn = self._do_pomdp_cutout(agent, obsn)

-            raw_obs = self._additional_per_agent_raw_observations(agent)
+            raw_obs = self.per_agent_raw_observations_hook(agent)
            raw_obs = {key: np.expand_dims(val, 0) if val.ndim != 3 else val for key, val in raw_obs.items()}
            obsn = np.vstack((obsn, *raw_obs.values()))

@ -387,6 +387,12 @@ class BaseFactory(gym.Env):
                                              zip(keys, idxs, list(idxs[1:]) + [idxs[-1]+1, ])}

            # Shadow Casting
+            if agent.step_result is not None:
+                pass
+            else:
+                assert self._steps == 0
+                agent.step_result = {'action_name': a.NOOP, 'action_valid': True,
+                                     'collisions': [], 'lightmap': None}
            if self.obs_prop.cast_shadows:
                try:
                    light_block_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
@ -430,17 +436,15 @@ class BaseFactory(gym.Env):
                if door_shadowing:
                    # noinspection PyUnboundLocalVariable
                    light_block_map[xs, ys] = 0
-                if agent.step_result:
-                    agent.step_result['lightmap'] = light_block_map
-                    pass
-                else:
-                    assert self._steps == 0
-                    agent.step_result = {'action_name': a.NOOP, 'action_valid': True,
-                                         'collisions': [], 'lightmap': light_block_map}
+
+                agent.step_result['lightmap'] = light_block_map

                obsn[shadowed_obs] = ((obsn[shadowed_obs] * light_block_map) + 0.) - (1 - light_block_map)
            else:
-                pass
+                if self._pomdp_r:
+                    agent.step_result['lightmap'] = np.ones(self._obs_shape)
+                else:
+                    agent.step_result['lightmap'] = None

            per_agent_obsn[agent.name] = obsn

@ -484,7 +488,7 @@ class BaseFactory(gym.Env):
            oobs = np.pad(oobs, ((0, 0), (x0_pad, x1_pad), (y0_pad, y1_pad)), 'constant')
        return oobs

-    def get_all_tiles_with_collisions(self) -> List[Tile]:
+    def get_all_tiles_with_collisions(self) -> List[Floor]:
        tiles = [x for x in self[c.FLOOR] if len(x.guests_that_can_collide) > 1]
        if False:
            tiles_with_collisions = list()
@ -503,22 +507,22 @@ class BaseFactory(gym.Env):
            valid = agent.move(new_tile)
            if valid:
                # This will spam your logs, beware!
-                # self.print(f'{agent.name} just moved from {agent.last_pos} to {agent.pos}.')
-                # info_dict.update({f'{agent.pos}_move': 1})
+                self.print(f'{agent.name} just moved {action.identifier} from {agent.last_pos} to {agent.pos}.')
+                info_dict.update({f'{agent.name}_move': 1, 'move': 1})
                pass
            else:
                valid = c.NOT_VALID
-                self.print(f'{agent.name} just hit the wall at {agent.pos}.')
-                info_dict.update({f'{agent.name}_wall_collide': 1})
+                self.print(f'{agent.name} just hit the wall at {agent.pos}. ({action.identifier})')
+                info_dict.update({f'{agent.name}_wall_collide': 1, 'wall_collide': 1})
        else:
            # Agent seems to be trying to Leave the level
-            self.print(f'{agent.name} tried to leave the level {agent.pos}.')
-            info_dict.update({f'{agent.name}_wall_collide': 1})
+            self.print(f'{agent.name} tried to leave the level {agent.pos}. ({action.identifier})')
+            info_dict.update({f'{agent.name}_wall_collide': 1, 'wall_collide': 1})
        reward_value = r.MOVEMENTS_VALID if valid else r.MOVEMENTS_FAIL
        reward = {'value': reward_value, 'reason': action.identifier, 'info': info_dict}
        return valid, reward

-    def _check_agent_move(self, agent, action: Action) -> (Tile, bool):
+    def _check_agent_move(self, agent, action: Action) -> (Floor, bool):
        # Actions
        x_diff, y_diff = h.ACTIONMAP[action.identifier]
        x_new = agent.x + x_diff
@ -556,10 +560,6 @@ class BaseFactory(gym.Env):

        return new_tile, valid

-    @abc.abstractmethod
-    def additional_per_agent_rewards(self, agent) -> List[dict]:
-        return []
-
    def build_reward_result(self, global_env_rewards: list) -> (int, dict):
        # Returns: Reward, Info
        info = defaultdict(lambda: 0.0)
@ -567,7 +567,7 @@ class BaseFactory(gym.Env):
        # Gather additional sub-env rewards and calculate collisions
        for agent in self[c.AGENT]:

-            rewards = self.additional_per_agent_rewards(agent)
+            rewards = self.per_agent_reward_hook(agent)
            for reward in rewards:
                agent.step_result['rewards'].append(reward)
            if collisions := agent.step_result['collisions']:
@ -601,6 +601,12 @@ class BaseFactory(gym.Env):
            self.print(f"reward is {reward}")
        return reward, combined_info_dict

+    def start_recording(self):
+        self._record_episodes = True
+
+    def stop_recording(self):
+        self._record_episodes = False
+
    # noinspection PyGlobalUndefined
    def render(self, mode='human'):
        if not self._renderer:  # lazy init
@ -621,7 +627,7 @@ class BaseFactory(gym.Env):
            for i, door in enumerate(self[c.DOORS]):
                name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
                doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
-        additional_assets = self.render_additional_assets()
+        additional_assets = self.render_assets_hook()

        return self._renderer.render(walls + doors + additional_assets + agents)

@ -652,7 +658,8 @@ class BaseFactory(gym.Env):

    # Properties which are called by the base class to extend beyond attributes of the base class
    @property
-    def additional_actions(self) -> Union[Action, List[Action]]:
+    @abc.abstractmethod
+    def actions_hook(self) -> Union[Action, List[Action]]:
        """
        When heriting from this Base Class, you musst implement this methode!!!

@ -662,7 +669,8 @@ class BaseFactory(gym.Env):
        return []

    @property
-    def additional_entities(self) -> Dict[(str, Entities)]:
+    @abc.abstractmethod
+    def entities_hook(self) -> Dict[(str, Entities)]:
        """
        When heriting from this Base Class, you musst implement this methode!!!

@ -674,27 +682,39 @@ class BaseFactory(gym.Env):
    # Functions which provide additions to functions of the base class
    #  Always call super!!!!!!
    @abc.abstractmethod
-    def do_additional_reset(self) -> None:
+    def reset_hook(self) -> None:
        pass

    @abc.abstractmethod
-    def do_additional_step(self) -> (List[dict], dict):
-        return [], {}
+    def pre_step_hook(self) -> None:
+        pass

    @abc.abstractmethod
    def do_additional_actions(self, agent: Agent, action: Action) -> (bool, dict):
        return None

+    @abc.abstractmethod
+    def step_hook(self) -> (List[dict], dict):
+        return [], {}
+
    @abc.abstractmethod
    def check_additional_done(self) -> (bool, dict):
        return False, {}

    @abc.abstractmethod
-    def _additional_observations(self) -> Dict[str, np.typing.ArrayLike]:
+    def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
        return {}

    @abc.abstractmethod
-    def _additional_per_agent_raw_observations(self, agent) -> Dict[str, np.typing.ArrayLike]:
+    def per_agent_reward_hook(self, agent: Agent) -> Dict[str, dict]:
+        return {}
+
+    @abc.abstractmethod
+    def post_step_hook(self) -> dict:
+        return {}
+
+    @abc.abstractmethod
+    def per_agent_raw_observations_hook(self, agent) -> Dict[str, np.typing.ArrayLike]:
        additional_raw_observations = {}
        if self.obs_prop.show_global_position_info:
            global_pos_obs = np.zeros(self._obs_shape)
@ -703,19 +723,5 @@ class BaseFactory(gym.Env):
        return additional_raw_observations

    @abc.abstractmethod
-    def additional_per_agent_reward(self, agent: Agent) -> Dict[str, dict]:
-        return {}
-
-    @abc.abstractmethod
-    def render_additional_assets(self):
+    def render_assets_hook(self):
        return []
-
-    # Hooks for in between operations.
-    #  Always call super!!!!!!
-    @abc.abstractmethod
-    def hook_pre_step(self) -> None:
-        pass
-
-    @abc.abstractmethod
-    def hook_post_step(self) -> dict:
-        return {}
--- a/environments/factory/base/objects.py
+++ b/environments/factory/base/objects.py
@ -9,10 +9,11 @@ from environments.helpers import Constants as c
 import itertools

 ##########################################################################
-# ##################### Base Object Definition ######################### #
+# ##################### Base Object Building Blocks ######################### #
 ##########################################################################


+# TODO: Missing Documentation
 class Object:

    """Generell Objects for Organisation and Maintanance such as Actions etc..."""
@ -53,8 +54,10 @@ class Object:

    def __eq__(self, other) -> bool:
        return other == self.identifier
+# Base


+# TODO: Missing Documentation
 class EnvObject(Object):

    """Objects that hold Information that are observable, but have no position on the env grid. Inventories etc..."""
@ -78,27 +81,10 @@ class EnvObject(Object):
        self._register.delete_env_object(self)
        self._register = register
        return self._register == register
+# With Rendering


-class BoundingMixin(Object):
-
-    @property
-    def bound_entity(self):
-        return self._bound_entity
-
-    def __init__(self,entity_to_be_bound, *args, **kwargs):
-        super(BoundingMixin, self).__init__(*args, **kwargs)
-        assert entity_to_be_bound is not None
-        self._bound_entity = entity_to_be_bound
-
-    @property
-    def name(self):
-        return f'{super(BoundingMixin, self).name}({self._bound_entity.name})'
-
-    def belongs_to_entity(self, entity):
-        return entity == self.bound_entity
-
-
+# TODO: Missing Documentation
 class Entity(EnvObject):
    """Full Env Entity that lives on the env Grid. Doors, Items, Dirt etc..."""

@ -133,8 +119,10 @@ class Entity(EnvObject):

    def __repr__(self):
        return super(Entity, self).__repr__() + f'(@{self.pos})'
+# With Position in Env


+# TODO: Missing Documentation
 class MoveableEntity(Entity):

    @property
@ -169,6 +157,27 @@ class MoveableEntity(Entity):
            return c.VALID
        else:
            return c.NOT_VALID
+# Can Move
+
+
+# TODO: Missing Documentation
+class BoundingMixin(Object):
+
+    @property
+    def bound_entity(self):
+        return self._bound_entity
+
+    def __init__(self,entity_to_be_bound, *args, **kwargs):
+        super(BoundingMixin, self).__init__(*args, **kwargs)
+        assert entity_to_be_bound is not None
+        self._bound_entity = entity_to_be_bound
+
+    @property
+    def name(self):
+        return f'{super(BoundingMixin, self).name}({self._bound_entity.name})'
+
+    def belongs_to_entity(self, entity):
+        return entity == self.bound_entity


 ##########################################################################
@ -216,7 +225,7 @@ class GlobalPosition(BoundingMixin, EnvObject):
        self._normalized = normalized


-class Tile(EnvObject):
+class Floor(EnvObject):

    @property
    def encoding(self):
@ -243,7 +252,7 @@ class Tile(EnvObject):
        return self._pos

    def __init__(self, pos, *args, **kwargs):
-        super(Tile, self).__init__(*args, **kwargs)
+        super(Floor, self).__init__(*args, **kwargs)
        self._guests = dict()
        self._pos = tuple(pos)

@ -277,7 +286,7 @@ class Tile(EnvObject):
        return dict(name=self.name, x=int(self.x), y=int(self.y))


-class Wall(Tile):
+class Wall(Floor):

    @property
    def can_collide(self):
@ -302,7 +311,7 @@ class Door(Entity):
    @property
    def encoding(self):
        # This is important as it shadow is checked by occupation value
-        return c.OCCUPIED_CELL if self.is_closed else 2
+        return c.OCCUPIED_CELL if self.is_closed else 0.5

    @property
    def str_state(self):
@ -396,5 +405,5 @@ class Agent(MoveableEntity):

    def summarize_state(self, **kwargs):
        state_dict = super().summarize_state(**kwargs)
-        state_dict.update(valid=bool(self.temp_action_result['valid']), action=str(self.temp_action_result['action']))
+        state_dict.update(valid=bool(self.step_result['action_valid']), action=str(self.step_result['action_name']))
        return state_dict
--- a/environments/factory/base/registers.py
+++ b/environments/factory/base/registers.py
@ -6,7 +6,7 @@ from typing import List, Union, Dict, Tuple
 import numpy as np
 import six

-from environments.factory.base.objects import Entity, Tile, Agent, Door, Action, Wall, PlaceHolder, GlobalPosition, \
+from environments.factory.base.objects import Entity, Floor, Agent, Door, Action, Wall, PlaceHolder, GlobalPosition, \
    Object, EnvObject
 from environments.utility_classes import MovementProperties
 from environments import helpers as h
@ -271,12 +271,9 @@ class GlobalPositions(EnvObjectRegister):

    _accepted_objects = GlobalPosition

-    is_blocking_light = False
-    can_be_shadowed = False
-    can_collide = False
-
    def __init__(self, *args, **kwargs):
-        super(GlobalPositions, self).__init__(*args, is_per_agent=True, individual_slices=True, **kwargs)
+        super(GlobalPositions, self).__init__(*args, is_per_agent=True, individual_slices=True, is_blocking_light = False,
+                                              can_be_shadowed = False, can_collide = False, **kwargs)

    def as_array(self):
        # FIXME DEBUG!!! make this lazy?
@ -377,7 +374,7 @@ class Entities(ObjectRegister):
        return found_entities


-class WallTiles(EntityRegister):
+class Walls(EntityRegister):
    _accepted_objects = Wall

    def as_array(self):
@ -390,9 +387,9 @@ class WallTiles(EntityRegister):
        return self._array

    def __init__(self, *args, is_blocking_light=True, **kwargs):
-        super(WallTiles, self).__init__(*args, individual_slices=False,
-                                        can_collide=True,
-                                        is_blocking_light=is_blocking_light, **kwargs)
+        super(Walls, self).__init__(*args, individual_slices=False,
+                                    can_collide=True,
+                                    is_blocking_light=is_blocking_light, **kwargs)
        self._value = c.OCCUPIED_CELL

    @classmethod
@ -411,16 +408,16 @@ class WallTiles(EntityRegister):

    def summarize_states(self, n_steps=None):
        if n_steps == h.STEPS_START:
-            return super(WallTiles, self).summarize_states(n_steps=n_steps)
+            return super(Walls, self).summarize_states(n_steps=n_steps)
        else:
            return {}


-class FloorTiles(WallTiles):
-    _accepted_objects = Tile
+class Floors(Walls):
+    _accepted_objects = Floor

    def __init__(self, *args, is_blocking_light=False, **kwargs):
-        super(FloorTiles, self).__init__(*args, is_blocking_light=is_blocking_light, **kwargs)
+        super(Floors, self).__init__(*args, is_blocking_light=is_blocking_light, **kwargs)
        self._value = c.FREE_CELL

    @property
@ -430,7 +427,7 @@ class FloorTiles(WallTiles):
        return tiles

    @property
-    def empty_tiles(self) -> List[Tile]:
+    def empty_tiles(self) -> List[Floor]:
        tiles = [tile for tile in self if tile.is_empty()]
        random.shuffle(tiles)
        return tiles