Debugging

2022-01-11 10:54:02 +01:00
parent 435056f373
commit 3150757347
6 changed files with 67 additions and 58 deletions
--- a/environments/factory/base/base_factory.py
+++ b/environments/factory/base/base_factory.py
@ -35,7 +35,7 @@ class BaseFactory(gym.Env):

    @property
    def named_action_space(self):
-        return {x.identifier.value: idx for idx, x in enumerate(self._actions.values())}
+        return {x.identifier: idx for idx, x in enumerate(self._actions.values())}

    @property
    def observation_space(self):
@ -287,7 +287,7 @@ class BaseFactory(gym.Env):
                doors.tick_doors()

        # Finalize
-        reward, reward_info = self.build_reward_result()
+        reward, reward_info = self.build_reward_result(rewards)

        info.update(reward_info)
        if self._steps >= self.max_steps:
@ -313,8 +313,8 @@ class BaseFactory(gym.Env):
            if door is not None:
                door.use()
                valid = c.VALID
-                self.print(f'{agent.name} just used a door {door.name}')
-                info_dict = {f'{agent.name}_door_use_{door.name}': 1}
+                self.print(f'{agent.name} just used a {door.name} at {door.pos}')
+                info_dict = {f'{agent.name}_door_use': 1}
            # When he doesn't...
            else:
                valid = c.NOT_VALID
@ -478,8 +478,7 @@ class BaseFactory(gym.Env):
        return oobs

    def get_all_tiles_with_collisions(self) -> List[Tile]:
-        tiles = [x.tile for y in self._entities for x in y if
-                 y.can_collide and not isinstance(y, WallTiles) and x.can_collide and len(x.tile.guests) > 1]
+        tiles = [x for x in self[c.FLOOR] if len(x.guests_that_can_collide) > 1]
        if False:
            tiles_with_collisions = list()
            for tile in self[c.FLOOR]:
@ -503,11 +502,11 @@ class BaseFactory(gym.Env):
            else:
                valid = c.NOT_VALID
                self.print(f'{agent.name} just hit the wall at {agent.pos}.')
-                info_dict.update({f'{agent.pos}_wall_collide': 1})
+                info_dict.update({f'{agent.name}_wall_collide': 1})
        else:
            # Agent seems to be trying to Leave the level
            self.print(f'{agent.name} tried to leave the level {agent.pos}.')
-            info_dict.update({f'{agent.pos}_wall_collide': 1})
+            info_dict.update({f'{agent.name}_wall_collide': 1})
        reward_value = r.MOVEMENTS_VALID if valid else r.MOVEMENTS_FAIL
        reward = {'value': reward_value, 'reason': action.identifier, 'info': info_dict}
        return valid, reward
@ -554,7 +553,7 @@ class BaseFactory(gym.Env):
    def additional_per_agent_rewards(self, agent) -> List[dict]:
        return []

-    def build_reward_result(self) -> (int, dict):
+    def build_reward_result(self, global_env_rewards: list) -> (int, dict):
        # Returns: Reward, Info
        info = defaultdict(lambda: 0.0)

@ -584,12 +583,14 @@ class BaseFactory(gym.Env):
        combined_info_dict = dict(combined_info_dict)
        combined_info_dict.update(info)

+        global_reward_sum = sum(global_env_rewards)
        if self.individual_rewards:
            self.print(f"rewards are {comb_rewards}")
            reward = list(comb_rewards.values())
+            reward = [x + global_reward_sum for x in reward]
            return reward, combined_info_dict
        else:
-            reward = sum(comb_rewards.values())
+            reward = sum(comb_rewards.values()) + global_reward_sum
            self.print(f"reward is {reward}")
        return reward, combined_info_dict

--- a/environments/factory/factory_dirt.py
+++ b/environments/factory/factory_dirt.py
@ -268,7 +268,7 @@ class DirtFactory(BaseFactory):

 if __name__ == '__main__':
    from environments.utility_classes import AgentRenderOptions as aro
-    render = False
+    render = True

    dirt_props = DirtProperties(
        initial_dirt_ratio=0.35,
@ -293,11 +293,11 @@ if __name__ == '__main__':
    global_timings = []
    for i in range(10):

-        factory = DirtFactory(n_agents=2, done_at_collision=False,
+        factory = DirtFactory(n_agents=4, done_at_collision=False,
                              level_name='rooms', max_steps=1000,
                              doors_have_area=False,
                              obs_prop=obs_props, parse_doors=True,
-                              verbose=False,
+                              verbose=True,
                              mv_prop=move_props, dirt_prop=dirt_props,
                              # inject_agents=[TSPDirtAgent],
                              )
@ -307,6 +307,7 @@ if __name__ == '__main__':
        _ = factory.observation_space
        obs_space = factory.observation_space
        obs_space_named = factory.named_observation_space
+        action_space_named = factory.named_action_space
        times = []
        for epoch in range(10):
            start_time = time.time()
--- a/environments/helpers.py
+++ b/environments/helpers.py
@ -78,12 +78,12 @@ class EnvActions:

 class Rewards:

-    MOVEMENTS_VALID = -0.001
-    MOVEMENTS_FAIL  = -0.001
-    NOOP = -0.1
-    USE_DOOR_VALID = -0.001
-    USE_DOOR_FAIL  = -0.001
-    COLLISION      = -1
+    MOVEMENTS_VALID = -0.01
+    MOVEMENTS_FAIL  = -0.1
+    NOOP = -0.01
+    USE_DOOR_VALID = -0.01
+    USE_DOOR_FAIL  = -0.1
+    COLLISION      = -0.5


 m = EnvActions
@ -120,7 +120,7 @@ class ObservationTranslator:

    def translate_observation(self, agent_idx: int, obs: np.ndarray):
        target_obs_space = self._per_agent_named_obs_space[agent_idx]
-        translation = [idx_space_dict['explained_idxs'] for name, idx_space_dict in target_obs_space.items()]
+        translation = [idx_space_dict for name, idx_space_dict in target_obs_space.items()]
        flat_translation = [x for y in translation for x in y]
        return np.take(obs, flat_translation, axis=1 if obs.ndim == 4 else 0)