diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py index f863d0b..3e96e90 100644 --- a/environments/factory/base/base_factory.py +++ b/environments/factory/base/base_factory.py @@ -181,7 +181,7 @@ class BaseFactory(gym.Env): # Move this in a seperate function? for action, agent in zip(actions, self[c.AGENT]): agent.clear_temp_sate() - action_obj = self._actions[action] + action_obj = self._actions[int(action)] if self._actions.is_moving_action(action_obj): valid = self._move_or_colide(agent, action_obj) elif h.EnvActions.NOOP == agent.temp_action: @@ -285,6 +285,7 @@ class BaseFactory(gym.Env): if r := self.pomdp_r: x, y = self._level_shape + self._padded_obs_cube[:] = c.SHADOWED_CELL.value self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube global_x, global_y = map(sum, zip(agent.pos, (r, r))) x0, x1 = max(0, global_x - self.pomdp_r), global_x + self.pomdp_r + 1 @@ -321,9 +322,7 @@ class BaseFactory(gym.Env): light_block_map[xs, ys] = 0 agent.temp_light_map = light_block_map for obs_idx in can_be_shadowed_idxs: - obs[obs_idx] = (obs[obs_idx] * light_block_map) - ( - (1 - light_block_map) * obs[0] - ) + obs[obs_idx] = ((obs[obs_idx] * light_block_map) + 0.) - (1 - light_block_map) # * obs[0]) return obs else: @@ -404,7 +403,7 @@ class BaseFactory(gym.Env): self.print(f'{agent.name} did just use the door at {agent.pos}.') info_dict.update(door_used=1) else: - reward -= 0.01 + reward -= 0.00 self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.') info_dict.update({f'{agent.name}_failed_action': 1}) info_dict.update({f'{agent.name}_failed_door_open': 1}) @@ -416,6 +415,9 @@ class BaseFactory(gym.Env): reward += additional_reward info_dict.update(additional_info_dict) + if agent.temp_collisions: + self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}') + for other_agent in agent.temp_collisions: info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1}) diff --git a/environments/factory/base/registers.py b/environments/factory/base/registers.py index 600aa52..6cb9b21 100644 --- a/environments/factory/base/registers.py +++ b/environments/factory/base/registers.py @@ -48,7 +48,7 @@ class Register: return self._register.items() def __getitem__(self, item): - if isinstance(item, int): + if isinstance(item, (int, np.int64, np.int32)): try: return next(v for i, v in enumerate(self._register.values()) if i == item) except StopIteration: diff --git a/environments/factory/double_task_factory.py b/environments/factory/double_task_factory.py index 8d370bf..1f9d3e1 100644 --- a/environments/factory/double_task_factory.py +++ b/environments/factory/double_task_factory.py @@ -291,7 +291,7 @@ if __name__ == '__main__': factory = DoubleTaskFactory(item_props, n_agents=3, done_at_collision=False, frames_to_stack=0, level_name='rooms', max_steps=4000, - omit_agent_slice_in_obs=True, parse_doors=True, pomdp_r=3, + omit_agent_in_obs=True, parse_doors=True, pomdp_r=3, record_episodes=False, verbose=False ) diff --git a/environments/factory/simple_factory.py b/environments/factory/simple_factory.py index 23a07c4..667e145 100644 --- a/environments/factory/simple_factory.py +++ b/environments/factory/simple_factory.py @@ -18,12 +18,6 @@ from environments.utility_classes import MovementProperties CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP -class ObsSlice(Enum): - OWN = -1 - LEVEL = c.LEVEL.value - AGENT = c.AGENT.value - - class DirtProperties(NamedTuple): clean_amount: int = 1 # How much does the robot clean with one actions. max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent. @@ -33,7 +27,6 @@ class DirtProperties(NamedTuple): max_global_amount: int = 20 # Max dirt amount in the whole environment. dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place. agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment. - on_obs_slice: Enum = ObsSlice.LEVEL class Dirt(Entity): @@ -217,16 +210,13 @@ class SimpleFactory(BaseFactory): info_dict.update(dirty_tile_count=dirty_tile_count) info_dict.update(dirt_distribution_score=dirt_distribution_score) - if agent.temp_collisions: - self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}') - if agent.temp_action == CLEAN_UP_ACTION: if agent.temp_valid: reward += 0.5 self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.') info_dict.update(dirt_cleaned=1) else: - reward -= 0.01 + reward -= 0.00 self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.') info_dict.update({f'{agent.name}_failed_action': 1}) info_dict.update({f'{agent.name}_failed_action': 1}) @@ -244,9 +234,9 @@ if __name__ == '__main__': move_props = MovementProperties(True, True, False) factory = SimpleFactory(n_agents=1, done_at_collision=False, frames_to_stack=0, - level_name='rooms', max_steps=400, - omit_agent_slice_in_obs=True, parse_doors=True, pomdp_r=2, - record_episodes=False, verbose=False + level_name='rooms', max_steps=400, combin_agent_obs=True, + omit_agent_in_obs=True, parse_doors=True, pomdp_r=2, + record_episodes=False, verbose=True ) # noinspection DuplicatedCode diff --git a/main.py b/main.py index 830523b..5967248 100644 --- a/main.py +++ b/main.py @@ -115,8 +115,8 @@ if __name__ == '__main__': movement_properties=move_props, pomdp_radius=2, max_steps=500, parse_doors=True, level_name='rooms', frames_to_stack=3, - omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False, - cast_shadows=True, doors_have_area=False, seed=seed, verbose=True, + omit_agent_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False, + cast_shadows=True, doors_have_area=False, seed=seed, verbose=False, ) as env: if modeL_type.__name__ in ["PPO", "A2C"]: @@ -151,8 +151,11 @@ if __name__ == '__main__': save_path.parent.mkdir(parents=True, exist_ok=True) model.save(save_path) env.save_params(out_path.parent / f'env_{model.__class__.__name__}_{time_stamp}.yaml') + print("Model Trained and saved") + print("Model Group Done.. Plotting...") if out_path: combine_runs(out_path.parent) + print("All Models Done... Evaluating") if out_path: compare_runs(Path('debug_out'), time_stamp, 'step_reward') diff --git a/reload_agent.py b/reload_agent.py index 74f68d5..f018df7 100644 --- a/reload_agent.py +++ b/reload_agent.py @@ -15,7 +15,7 @@ warnings.filterwarnings('ignore', category=UserWarning) if __name__ == '__main__': - model_name = 'A2C_1629467677' + model_name = 'A2C_1630073286' run_id = 0 out_path = Path(__file__).parent / 'debug_out' model_path = out_path / model_name @@ -27,7 +27,7 @@ if __name__ == '__main__': max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05, dirt_smear_amount=0.5), combin_agent_slices_in_obs=True, omit_agent_slice_in_obs=True) - with DoubleTaskFactory(**env_kwargs) as env: + with SimpleFactory(**env_kwargs) as env: # Edit THIS: model_files = list(natsorted((model_path / f'{run_id}_{model_name}').rglob('model_*.zip')))