DEbugging done but he does not learn

2025-06-19 10:52:54 +02:00 · 2021-08-27 16:36:19 +02:00
parent 4731f63ba6
commit 2bf9aaed15
6 changed files with 20 additions and 25 deletions
--- a/environments/factory/base/base_factory.py
+++ b/environments/factory/base/base_factory.py
@ -181,7 +181,7 @@ class BaseFactory(gym.Env):
        # Move this in a seperate function?
        for action, agent in zip(actions, self[c.AGENT]):
            agent.clear_temp_sate()
-            action_obj = self._actions[action]
+            action_obj = self._actions[int(action)]
            if self._actions.is_moving_action(action_obj):
                valid = self._move_or_colide(agent, action_obj)
            elif h.EnvActions.NOOP == agent.temp_action:
@ -285,6 +285,7 @@ class BaseFactory(gym.Env):

        if r := self.pomdp_r:
            x, y = self._level_shape
+            self._padded_obs_cube[:] = c.SHADOWED_CELL.value
            self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube
            global_x, global_y = map(sum, zip(agent.pos, (r, r)))
            x0, x1 = max(0, global_x - self.pomdp_r), global_x + self.pomdp_r + 1
@ -321,9 +322,7 @@ class BaseFactory(gym.Env):
                light_block_map[xs, ys] = 0
            agent.temp_light_map = light_block_map
            for obs_idx in can_be_shadowed_idxs:
-                obs[obs_idx] = (obs[obs_idx] * light_block_map) - (
-                        (1 - light_block_map) * obs[0]
-                )
+                obs[obs_idx] = ((obs[obs_idx] * light_block_map) + 0.) - (1 - light_block_map)  # * obs[0])

            return obs
        else:
@ -404,7 +403,7 @@ class BaseFactory(gym.Env):
                    self.print(f'{agent.name} did just use the door at {agent.pos}.')
                    info_dict.update(door_used=1)
                else:
-                    reward -= 0.01
+                    reward -= 0.00
                    self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
                    info_dict.update({f'{agent.name}_failed_action': 1})
                    info_dict.update({f'{agent.name}_failed_door_open': 1})
@ -416,6 +415,9 @@ class BaseFactory(gym.Env):
            reward += additional_reward
            info_dict.update(additional_info_dict)

+            if agent.temp_collisions:
+                self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
+
            for other_agent in agent.temp_collisions:
                info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1})

--- a/environments/factory/base/registers.py
+++ b/environments/factory/base/registers.py
@ -48,7 +48,7 @@ class Register:
        return self._register.items()

    def __getitem__(self, item):
-        if isinstance(item, int):
+        if isinstance(item, (int, np.int64, np.int32)):
            try:
                return next(v for i, v in enumerate(self._register.values()) if i == item)
            except StopIteration:
--- a/environments/factory/double_task_factory.py
+++ b/environments/factory/double_task_factory.py
@ -291,7 +291,7 @@ if __name__ == '__main__':

    factory = DoubleTaskFactory(item_props, n_agents=3, done_at_collision=False, frames_to_stack=0,
                                level_name='rooms', max_steps=4000,
-                                omit_agent_slice_in_obs=True, parse_doors=True, pomdp_r=3,
+                                omit_agent_in_obs=True, parse_doors=True, pomdp_r=3,
                                record_episodes=False, verbose=False
                                )

--- a/environments/factory/simple_factory.py
+++ b/environments/factory/simple_factory.py
@ -18,12 +18,6 @@ from environments.utility_classes import MovementProperties
 CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP


-class ObsSlice(Enum):
-    OWN = -1
-    LEVEL = c.LEVEL.value
-    AGENT = c.AGENT.value
-
-
 class DirtProperties(NamedTuple):
    clean_amount: int = 1               # How much does the robot clean with one actions.
    max_spawn_ratio: float = 0.2        # On max how much tiles does the dirt spawn in percent.
@ -33,7 +27,6 @@ class DirtProperties(NamedTuple):
    max_global_amount: int = 20         # Max dirt amount in the whole environment.
    dirt_smear_amount: float = 0.2      # Agents smear dirt, when not cleaning up in place.
    agent_can_interact: bool = True     # Whether the agents can interact with the dirt in this environment.
-    on_obs_slice: Enum = ObsSlice.LEVEL


 class Dirt(Entity):
@ -217,16 +210,13 @@ class SimpleFactory(BaseFactory):
        info_dict.update(dirty_tile_count=dirty_tile_count)
        info_dict.update(dirt_distribution_score=dirt_distribution_score)

-        if agent.temp_collisions:
-            self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
-
        if agent.temp_action == CLEAN_UP_ACTION:
            if agent.temp_valid:
                reward += 0.5
                self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
                info_dict.update(dirt_cleaned=1)
            else:
-                reward -= 0.01
+                reward -= 0.00
                self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
                info_dict.update({f'{agent.name}_failed_action': 1})
                info_dict.update({f'{agent.name}_failed_action': 1})
@ -244,9 +234,9 @@ if __name__ == '__main__':
    move_props = MovementProperties(True, True, False)

    factory = SimpleFactory(n_agents=1, done_at_collision=False, frames_to_stack=0,
-                            level_name='rooms', max_steps=400,
-                            omit_agent_slice_in_obs=True, parse_doors=True, pomdp_r=2,
-                            record_episodes=False, verbose=False
+                            level_name='rooms', max_steps=400, combin_agent_obs=True,
+                            omit_agent_in_obs=True, parse_doors=True, pomdp_r=2,
+                            record_episodes=False, verbose=True
                            )

    # noinspection DuplicatedCode
--- a/main.py
+++ b/main.py
@ -115,8 +115,8 @@ if __name__ == '__main__':
                               movement_properties=move_props,
                               pomdp_radius=2, max_steps=500, parse_doors=True,
                               level_name='rooms', frames_to_stack=3,
-                               omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False,
-                               cast_shadows=True, doors_have_area=False, seed=seed, verbose=True,
+                               omit_agent_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False,
+                               cast_shadows=True, doors_have_area=False, seed=seed, verbose=False,
                               ) as env:

                if modeL_type.__name__ in ["PPO", "A2C"]:
@ -151,8 +151,11 @@ if __name__ == '__main__':
                save_path.parent.mkdir(parents=True, exist_ok=True)
                model.save(save_path)
                env.save_params(out_path.parent / f'env_{model.__class__.__name__}_{time_stamp}.yaml')
+                print("Model Trained and saved")
+        print("Model Group Done.. Plotting...")

        if out_path:
            combine_runs(out_path.parent)
+    print("All Models Done... Evaluating")
    if out_path:
        compare_runs(Path('debug_out'), time_stamp, 'step_reward')
--- a/reload_agent.py
+++ b/reload_agent.py
@ -15,7 +15,7 @@ warnings.filterwarnings('ignore', category=UserWarning)

 if __name__ == '__main__':

-    model_name = 'A2C_1629467677'
+    model_name = 'A2C_1630073286'
    run_id = 0
    out_path = Path(__file__).parent / 'debug_out'
    model_path = out_path / model_name
@ -27,7 +27,7 @@ if __name__ == '__main__':
                                                         max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05,
                                                         dirt_smear_amount=0.5),
                          combin_agent_slices_in_obs=True, omit_agent_slice_in_obs=True)
-    with DoubleTaskFactory(**env_kwargs) as env:
+    with SimpleFactory(**env_kwargs) as env:

        # Edit THIS:
        model_files = list(natsorted((model_path / f'{run_id}_{model_name}').rglob('model_*.zip')))