From de821ebc0ce74813e206c4461037247a53f52708 Mon Sep 17 00:00:00 2001 From: Steffen Illium Date: Fri, 16 Jul 2021 08:07:02 +0200 Subject: [PATCH] Major Bug resolved --- environments/factory/base/base_factory.py | 46 +++++++++++++++-------- environments/factory/base/objects.py | 12 +++--- environments/factory/simple_factory.py | 11 ++---- environments/helpers.py | 4 +- main.py | 10 ++--- reload_agent.py | 13 ++++--- 6 files changed, 54 insertions(+), 42 deletions(-) diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py index 0768ce3..f48fccc 100644 --- a/environments/factory/base/base_factory.py +++ b/environments/factory/base/base_factory.py @@ -27,19 +27,19 @@ class BaseFactory(gym.Env): @property def observation_space(self): if self.combin_agent_slices_in_obs: - agent_slice = 1 + n_agent_slices = 1 else: # not self.combin_agent_slices_in_obs: if self.omit_agent_slice_in_obs: - agent_slice = self.n_agents - 1 + n_agent_slices = self.n_agents - 1 else: # not self.omit_agent_slice_in_obs: - agent_slice = self.n_agents + n_agent_slices = self.n_agents if self.pomdp_radius: - shape = (self._obs_cube.shape[0] - agent_slice, self.pomdp_radius * 2 + 1, self.pomdp_radius * 2 + 1) + shape = (self._slices.n - n_agent_slices, self.pomdp_radius * 2 + 1, self.pomdp_radius * 2 + 1) space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32) return space else: - shape = [x-agent_slice if idx == 0 else x for idx, x in enumerate(self._obs_cube.shape)] + shape = [x-n_agent_slices if idx == 0 else x for idx, x in enumerate(self._level_shape)] space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32) return space @@ -133,7 +133,7 @@ class BaseFactory(gym.Env): # Agents agents = [] for i in range(self.n_agents): - agents.append(Slice(f'{c.AGENT.name}#{i}', np.zeros_like(level[0].slice))) + agents.append(Slice(f'{c.AGENT.name}#{i}', np.zeros_like(level[0].slice, dtype=np.float32))) state_slices.register_additional_items(level+doors+agents) # Additional Slices from SubDomains @@ -143,12 +143,14 @@ class BaseFactory(gym.Env): def _init_obs_cube(self) -> np.ndarray: x, y = self._slices.by_enum(c.LEVEL).shape - state = np.zeros((len(self._slices), x, y)) + state = np.zeros((len(self._slices), x, y), dtype=np.float32) state[0] = self._slices.by_enum(c.LEVEL).slice if r := self.pomdp_radius: - self._padded_obs_cube = np.full((len(self._slices), x + r*2, y + r*2), c.FREE_CELL.value) + self._padded_obs_cube = np.full((len(self._slices), x + r*2, y + r*2), c.FREE_CELL.value, dtype=np.float32) self._padded_obs_cube[0] = c.OCCUPIED_CELL.value self._padded_obs_cube[:, r:r+x, r:r+y] = state + if self.combin_agent_slices_in_obs and self.n_agents > 1: + self._combined_obs_cube = np.zeros(self.observation_space.shape, dtype=np.float32) return state def _init_entities(self): @@ -177,18 +179,25 @@ class BaseFactory(gym.Env): self._slices = self._init_state_slices() self._obs_cube = self._init_obs_cube() self._entitites = self._init_entities() + self.do_additional_reset() self._flush_state() self._steps = 0 - info = self._summarize_state() if self.record_episodes else {} - return None, None, None, info + obs = self._get_observations() + return obs def pre_step(self) -> None: pass - def post_step(self) -> dict: + def do_additional_reset(self) -> None: pass + def do_additional_step(self) -> dict: + return {} + + def post_step(self) -> dict: + return {} + def step(self, actions): actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]' @@ -219,6 +228,10 @@ class BaseFactory(gym.Env): agent.temp_action = action agent.temp_valid = valid + # In-between step Hook for later use + info = self.do_additional_step() + + # Write to observation cube self._flush_state() tiles_with_collisions = self.get_all_tiles_with_collisions() @@ -237,7 +250,8 @@ class BaseFactory(gym.Env): self._doors.tick_doors() # Finalize - reward, info = self.calculate_reward() + reward, reward_info = self.calculate_reward() + info.update(reward_info) if self._steps >= self.max_steps: done = True info.update(step_reward=reward, step=self._steps) @@ -255,10 +269,10 @@ class BaseFactory(gym.Env): self._obs_cube[np.arange(len(self._slices)) != self._slices.get_idx(c.LEVEL)] = c.FREE_CELL.value if self.parse_doors: for door in self._doors: - if door.is_open: - self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.IS_OPEN_DOOR.value - else: - self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.IS_CLOSED_DOOR.value + if door.is_open and self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] != c.OPEN_DOOR.value: + self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.OPEN_DOOR.value + elif door.is_closed and self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] != c.CLOSED_DOOR.value: + self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.CLOSED_DOOR.value for agent in self._agents: self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.pos] = c.OCCUPIED_CELL.value if agent.last_pos != h.NO_POS: diff --git a/environments/factory/base/objects.py b/environments/factory/base/objects.py index 7e61524..7954841 100644 --- a/environments/factory/base/objects.py +++ b/environments/factory/base/objects.py @@ -199,7 +199,7 @@ class Door(Entity): def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10): super(Door, self).__init__(*args) - self._state = c.IS_CLOSED_DOOR + self._state = c.CLOSED_DOOR self.auto_close_interval = auto_close_interval self.time_to_close = -1 neighbor_pos = list(itertools.product([-1, 1, 0], repeat=2))[:-1] @@ -215,18 +215,18 @@ class Door(Entity): @property def is_closed(self): - return self._state == c.IS_CLOSED_DOOR + return self._state == c.CLOSED_DOOR @property def is_open(self): - return self._state == c.IS_OPEN_DOOR + return self._state == c.OPEN_DOOR @property def status(self): return self._state def use(self): - if self._state == c.IS_OPEN_DOOR: + if self._state == c.OPEN_DOOR: self._close() else: self._open() @@ -239,12 +239,12 @@ class Door(Entity): def _open(self): self.connectivity.add_edges_from([(self.pos, x) for x in self.connectivity.nodes]) - self._state = c.IS_OPEN_DOOR + self._state = c.OPEN_DOOR self.time_to_close = self.auto_close_interval def _close(self): self.connectivity.remove_node(self.pos) - self._state = c.IS_CLOSED_DOOR + self._state = c.CLOSED_DOOR def is_linked(self, old_pos, new_pos): try: diff --git a/environments/factory/simple_factory.py b/environments/factory/simple_factory.py index cb05e27..5ae482e 100644 --- a/environments/factory/simple_factory.py +++ b/environments/factory/simple_factory.py @@ -99,7 +99,7 @@ class SimpleFactory(BaseFactory): free_for_dirt = self._tiles.empty_tiles # randomly distribute dirt across the grid - n_dirt_tiles = int(random.uniform(0, self.dirt_properties.max_spawn_ratio) * len(free_for_dirt)) + n_dirt_tiles = max(0, int(random.uniform(0, self.dirt_properties.max_spawn_ratio) * len(free_for_dirt))) for tile in free_for_dirt[:n_dirt_tiles]: new_value = dirt_slice[tile.pos] + self.dirt_properties.gain_amount dirt_slice[tile.pos] = min(new_value, self.dirt_properties.max_local_amount) @@ -115,7 +115,7 @@ class SimpleFactory(BaseFactory): else: return False - def post_step(self) -> dict: + def do_additional_step(self) -> dict: if smear_amount := self.dirt_properties.dirt_smear_amount: dirt_slice = self._slices.by_name(DIRT).slice for agent in self._agents: @@ -144,12 +144,9 @@ class SimpleFactory(BaseFactory): else: raise RuntimeError('This should not happen!!!') - def reset(self) -> (np.ndarray, int, bool, dict): - _ = super().reset() # state, reward, done, info ... = + def do_additional_reset(self) -> None: self.spawn_dirt() self._next_dirt_spawn = self.dirt_properties.spawn_frequency - obs = self._get_observations() - return obs def calculate_reward(self) -> (int, dict): info_dict = dict() @@ -174,7 +171,7 @@ class SimpleFactory(BaseFactory): if self._is_clean_up_action(agent.temp_action): if agent.temp_valid: - reward += 1 + reward += 0.5 self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.') info_dict.update(dirt_cleaned=1) else: diff --git a/environments/helpers.py b/environments/helpers.py index fc6a514..6b058ac 100644 --- a/environments/helpers.py +++ b/environments/helpers.py @@ -17,8 +17,8 @@ class Constants(Enum): OCCUPIED_CELL = 1 DOORS = 'doors' - IS_CLOSED_DOOR = 1 - IS_OPEN_DOOR = -1 + CLOSED_DOOR = 1 + OPEN_DOOR = -1 LEVEL_IDX = 0 diff --git a/main.py b/main.py index 6fb263b..bd700fd 100644 --- a/main.py +++ b/main.py @@ -92,8 +92,8 @@ if __name__ == '__main__': from algorithms.reg_dqn import RegDQN # from sb3_contrib import QRDQN - dirt_props = DirtProperties(clean_amount=6, gain_amount=1, max_global_amount=30, - max_local_amount=5, spawn_frequency=5, max_spawn_ratio=0.05, + dirt_props = DirtProperties(clean_amount=1, gain_amount=0.1, max_global_amount=20, + max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05, dirt_smear_amount=0.0) move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True, @@ -102,11 +102,11 @@ if __name__ == '__main__': out_path = None - for modeL_type in [A2C, PPO, RegDQN, DQN]: # , QRDQN]: + for modeL_type in [A2C]: # , PPO, RegDQN, DQN]: # , QRDQN]: for seed in range(3): - with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=False, - movement_properties=move_props, level_name='rooms', frames_to_stack=4, + with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=True, + movement_properties=move_props, level_name='rooms', frames_to_stack=0, omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False ) as env: diff --git a/reload_agent.py b/reload_agent.py index 861b3f2..a0c5283 100644 --- a/reload_agent.py +++ b/reload_agent.py @@ -14,17 +14,18 @@ warnings.filterwarnings('ignore', category=UserWarning) if __name__ == '__main__': - model_name = 'A2C_1626103200' + model_name = 'PPO_1626384768' run_id = 0 out_path = Path(__file__).parent / 'debug_out' model_path = out_path / model_name with (model_path / f'env_{model_name}.yaml').open('r') as f: env_kwargs = yaml.load(f, Loader=yaml.FullLoader) - env_kwargs.update(dirt_properties=DirtProperties(clean_amount=1, gain_amount=0.3, max_global_amount=20, - max_local_amount=2, spawn_frequency=5, max_spawn_ratio=0.05, - dirt_smear_amount=0.2), - combin_agent_slices_in_obs=True, omit_agent_slice_in_obs=True) + if False: + env_kwargs.update(dirt_properties=DirtProperties(clean_amount=1, gain_amount=0.1, max_global_amount=20, + max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05, + dirt_smear_amount=0.5), + combin_agent_slices_in_obs=True, omit_agent_slice_in_obs=True) with SimpleFactory(**env_kwargs) as env: # Edit THIS: @@ -32,5 +33,5 @@ if __name__ == '__main__': this_model = model_files[0] model = PPO.load(this_model) - evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=True, render=True) + evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=False, render=True) print(evaluation_result)