From 50c0d90c777ad87ee96650ce3431561f1ecf708b Mon Sep 17 00:00:00 2001 From: Steffen Illium Date: Tue, 7 Sep 2021 17:41:15 +0200 Subject: [PATCH] Error Resolvement --- environments/factory/base/base_factory.py | 50 ++++++++++++++--------- environments/factory/base/registers.py | 5 ++- environments/factory/factory_item.py | 31 +++++++------- main.py | 9 ++-- 4 files changed, 55 insertions(+), 40 deletions(-) diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py index 8930f79..b1b70b1 100644 --- a/environments/factory/base/base_factory.py +++ b/environments/factory/base/base_factory.py @@ -278,22 +278,23 @@ class BaseFactory(gym.Env): for key, array in state_array_dict.items(): # Flush state array object representation to obs cube - if self[key].is_per_agent: - per_agent_idx = self[key].idx_by_entity(agent) - z = 1 - self._obs_cube[running_idx: running_idx+z] = array[per_agent_idx] - else: - z = array.shape[0] - self._obs_cube[running_idx: running_idx+z] = array - # Define which OBS SLices cast a Shadow - if self[key].is_blocking_light: - for i in range(z): - shadowing_idxs.append(running_idx + i) - # Define which OBS SLices are effected by shadows - if self[key].can_be_shadowed: - for i in range(z): - can_be_shadowed_idxs.append(running_idx + i) - running_idx += z + if not self[key].hide_from_obs_builder: + if self[key].is_per_agent: + per_agent_idx = self[key].idx_by_entity(agent) + z = 1 + self._obs_cube[running_idx: running_idx+z] = array[per_agent_idx] + else: + z = array.shape[0] + self._obs_cube[running_idx: running_idx+z] = array + # Define which OBS SLices cast a Shadow + if self[key].is_blocking_light: + for i in range(z): + shadowing_idxs.append(running_idx + i) + # Define which OBS SLices are effected by shadows + if self[key].can_be_shadowed: + for i in range(z): + can_be_shadowed_idxs.append(running_idx + i) + running_idx += z if agent_pos_is_omitted: state_array_dict[c.AGENT][0, agent.x, agent.y] += agent.encoding @@ -341,10 +342,14 @@ class BaseFactory(gym.Env): agent.temp_light_map = light_block_map for obs_idx in can_be_shadowed_idxs: obs[obs_idx] = ((obs[obs_idx] * light_block_map) + 0.) - (1 - light_block_map) # * obs[0]) - - return obs else: - return obs + pass + + for additional_obs in self.additional_obs_build(): + obs[running_idx:running_idx+additional_obs.shape[0]] = additional_obs + running_idx += additional_obs.shape[0] + + return obs def get_all_tiles_with_collisions(self) -> List[Tile]: tiles_with_collisions = list() @@ -479,7 +484,8 @@ class BaseFactory(gym.Env): summary = {f'{REC_TAC}step': self._steps} if self._steps == 0: - summary.update({f'{REC_TAC}{self[c.WALLS].name}': {self[c.WALLS].summarize_states()}}) + summary.update({f'{REC_TAC}{self[c.WALLS].name}': {self[c.WALLS].summarize_states()}, + 'FactoryName': self.__class__.__name__}) for entity_group in self._entities: if not isinstance(entity_group, WallTiles): summary.update({f'{REC_TAC}{entity_group.name}': entity_group.summarize_states()}) @@ -512,6 +518,10 @@ class BaseFactory(gym.Env): # Functions which provide additions to functions of the base class # Always call super!!!!!! + @abc.abstractmethod + def additional_obs_build(self) -> List[np.ndarray]: + return [] + @abc.abstractmethod def do_additional_reset(self) -> None: pass diff --git a/environments/factory/base/registers.py b/environments/factory/base/registers.py index 52ebd60..ed4fa13 100644 --- a/environments/factory/base/registers.py +++ b/environments/factory/base/registers.py @@ -63,6 +63,9 @@ class Register: class ObjectRegister(Register): + + hide_from_obs_builder = False + def __init__(self, level_shape: (int, int), *args, individual_slices=False, is_per_agent=False, **kwargs): super(ObjectRegister, self).__init__(*args, **kwargs) self.is_per_agent = is_per_agent @@ -76,7 +79,7 @@ class ObjectRegister(Register): self._array = np.zeros((1, *self._level_shape)) else: if self.individual_slices: - self._array = np.concatenate((self._array, np.zeros((1, *self._level_shape)))) + self._array = np.concatenate((self._array, np.zeros((1, *self._array.shape[1:])))) def summarize_states(self): return [val.summarize_state() for val in self.values()] diff --git a/environments/factory/factory_item.py b/environments/factory/factory_item.py index e97b8e7..3b7021a 100644 --- a/environments/factory/factory_item.py +++ b/environments/factory/factory_item.py @@ -9,7 +9,7 @@ from environments.helpers import Constants as c from environments import helpers as h from environments.factory.base.objects import Agent, Entity, Action, Tile, MoveableEntity from environments.factory.base.registers import Entities, EntityObjectRegister, ObjectRegister, \ - MovingEntityObjectRegister + MovingEntityObjectRegister, Register from environments.factory.renderer import RenderEntity @@ -66,22 +66,19 @@ class Inventory(UserList): def __init__(self, pomdp_r: int, level_shape: (int, int), agent: Agent, capacity: int): super(Inventory, self).__init__() self.agent = agent - self.capacity = capacity self.pomdp_r = pomdp_r self._level_shape = level_shape - self._array = np.zeros((1, *self._level_shape)) + if self.pomdp_r: + self._array = np.zeros((1, pomdp_r * 2 + 1, pomdp_r * 2 + 1)) + else: + self._array = np.zeros((1, *self._level_shape)) + self.capacity = min(capacity, self._array.size) def as_array(self): self._array[:] = c.FREE_CELL.value - max_x = self.pomdp_r * 2 + 1 if self.pomdp_r else self._level_shape[0] - if self.pomdp_r: - x, y = max(self.agent.x - self.pomdp_r, 0), max(self.agent.y - self.pomdp_r, 0) - else: - x, y = (0, 0) - for item_idx, item in enumerate(self): - x_diff, y_diff = divmod(item_idx, max_x) - self._array[0, int(x + x_diff), int(y + y_diff)] = item.encoding + x_diff, y_diff = divmod(item_idx, self._array.shape[1]) + self._array[0, int(x_diff), int(y_diff)] = item.encoding return self._array def __repr__(self): @@ -105,8 +102,9 @@ class Inventories(ObjectRegister): _accepted_objects = Inventory is_blocking_light = False can_be_shadowed = False + hide_from_obs_builder = True - def __init__(self, *args, **kwargs): + def __init__(self, *args, pomdp_r=0, **kwargs): super(Inventories, self).__init__(*args, is_per_agent=True, individual_slices=True, **kwargs) self.is_observable = True @@ -213,13 +211,18 @@ class ItemFactory(BaseFactory): empty_tiles = self[c.FLOOR].empty_tiles[:self.item_properties.n_items] item_register.spawn_items(empty_tiles) - inventories = Inventories(self._level_shape) + inventories = Inventories(self._level_shape if not self.pomdp_r else ((self.pomdp_diameter,) * 2)) inventories.spawn_inventories(self[c.AGENT], self.pomdp_r, self.item_properties.max_agent_inventory_capacity) super_entities.update({c.DROP_OFF: drop_offs, c.ITEM: item_register, c.INVENTORY: inventories}) return super_entities + def additional_obs_build(self) -> List[np.ndarray]: + super_additional_obs_build = super().additional_obs_build() + super_additional_obs_build.append(self[c.INVENTORY].as_array()) + return super_additional_obs_build + def do_item_action(self, agent: Agent): inventory = self[c.INVENTORY].by_entity(agent) if drop_off := self[c.DROP_OFF].by_pos(agent.pos): @@ -285,7 +288,7 @@ class ItemFactory(BaseFactory): if self[c.DROP_OFF].by_pos(agent.pos): info_dict.update({f'{agent.name}_item_dropoff': 1}) - reward += 1 + reward += 0.5 else: info_dict.update({f'{agent.name}_item_pickup': 1}) reward += 0.1 diff --git a/main.py b/main.py index 07d1212..722d0fb 100644 --- a/main.py +++ b/main.py @@ -95,7 +95,7 @@ def compare_runs(run_path: Path, run_identifier: int, parameter: Union[str, List def make_env(env_kwargs_dict): def _init(): - with DirtItemFactory(**env_kwargs_dict) as init_env: + with ItemFactory(**env_kwargs_dict) as init_env: return init_env return _init @@ -128,11 +128,10 @@ if __name__ == '__main__': for modeL_type in [A2C, PPO, DQN]: # ,RegDQN, QRDQN]: for seed in range(3): env_kwargs = dict(n_agents=1, - # with_dirt=True, item_properties=item_props, - dirt_properties=dirt_props, + # dirt_properties=dirt_props, movement_properties=move_props, - pomdp_r=2, max_steps=400, parse_doors=True, + pomdp_r=2, max_steps=400, parse_doors=False, level_name='rooms', frames_to_stack=3, omit_agent_in_obs=True, combin_agent_obs=True, record_episodes=False, cast_shadows=True, doors_have_area=False, env_seed=seed, verbose=False, @@ -140,7 +139,7 @@ if __name__ == '__main__': if modeL_type.__name__ in ["PPO", "A2C"]: kwargs = dict(ent_coef=0.01) - env = SubprocVecEnv([make_env(env_kwargs) for _ in range(1)], start_method="spawn") + env = SubprocVecEnv([make_env(env_kwargs) for _ in range(10)], start_method="spawn") elif modeL_type.__name__ in ["RegDQN", "DQN", "QRDQN"]: env = make_env(env_kwargs)() kwargs = dict(buffer_size=50000,