From 02523fc05e0eed672fe3df836ce7cae30cfd4a75 Mon Sep 17 00:00:00 2001 From: steffen-illium Date: Wed, 16 Jun 2021 17:28:49 +0200 Subject: [PATCH 1/2] =?UTF-8?q?Fick=20die=20T=C3=BCren?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- environments/factory/base_factory.py | 123 +++++++++++++----- environments/factory/levels/rooms.txt | 2 +- environments/factory/simple_factory.py | 14 +- environments/helpers.py | 23 +++- environments/utility_classes.py | 173 +++++++++++++++++++++---- main.py | 9 +- 6 files changed, 271 insertions(+), 73 deletions(-) diff --git a/environments/factory/base_factory.py b/environments/factory/base_factory.py index b8d5da6..47535c1 100644 --- a/environments/factory/base_factory.py +++ b/environments/factory/base_factory.py @@ -1,4 +1,3 @@ -from argparse import Namespace from pathlib import Path from typing import List, Union, Iterable @@ -10,7 +9,7 @@ import yaml from gym.wrappers import FrameStack from environments import helpers as h -from environments.utility_classes import Actions, StateSlice, AgentState, MovementProperties, Zones +from environments.utility_classes import Actions, StateSlices, AgentState, MovementProperties, Zones, DoorState # noinspection PyAttributeOutsideInit @@ -23,6 +22,7 @@ class BaseFactory(gym.Env): @property def observation_space(self): agent_slice = self.n_agents if self.omit_agent_slice_in_obs else 0 + agent_slice = 1 if self.combin_agent_slices_in_obs else agent_slice if self.pomdp_radius: return spaces.Box(low=0, high=1, shape=(self._state.shape[0] - agent_slice, self.pomdp_radius * 2 + 1, self.pomdp_radius * 2 + 1), dtype=np.float32) @@ -47,7 +47,7 @@ class BaseFactory(gym.Env): omit_agent_slice_in_obs=False, **kwargs): assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \ (not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \ - 'Both options are exclusive' + 'Both options are exclusive' assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1." self.movement_properties = movement_properties @@ -61,13 +61,23 @@ class BaseFactory(gym.Env): self.frames_to_stack = frames_to_stack self.done_at_collision = False - _actions = Actions(self.movement_properties) - self._actions = _actions + self.additional_actions + self._actions = Actions(self.movement_properties) + self._actions.register_additional_items(self.additional_actions) + + self._state_slices = StateSlices() level_filepath = Path(__file__).parent / h.LEVELS_DIR / f'{self.level_name}.txt' parsed_level = h.parse_level(level_filepath) self._level = h.one_hot_level(parsed_level) - self._state_slices = StateSlice(n_agents) + parsed_doors = h.one_hot_level(parsed_level, h.DOOR) + if parsed_doors.any(): + self._doors = parsed_doors + level_slices = ['level', 'doors'] + else: + level_slices = ['level'] + offset = len(level_slices) + self._state_slices.register_additional_items([*level_slices, + *[f'agent#{i}' for i in range(offset, n_agents + offset)]]) if 'additional_slices' in kwargs: self._state_slices.register_additional_items(kwargs.get('additional_slices')) self._zones = Zones(parsed_level) @@ -87,7 +97,8 @@ class BaseFactory(gym.Env): def reset(self) -> (np.ndarray, int, bool, dict): self._steps = 0 - self._agent_states = [] + self._agent_states = list() + # Agent placement ... agents = np.zeros((self.n_agents, *self._level.shape), dtype=np.int8) floor_tiles = np.argwhere(self._level == h.IS_FREE_CELL) @@ -96,10 +107,18 @@ class BaseFactory(gym.Env): for i, (x, y) in enumerate(floor_tiles[:self.n_agents]): agents[i, x, y] = h.IS_OCCUPIED_CELL agent_state = AgentState(i, -1) - agent_state.update(pos=[x, y]) + agent_state.update(pos=(x, y)) self._agent_states.append(agent_state) # state.shape = level, agent 1,..., agent n, - self._state = np.concatenate((np.expand_dims(self._level, axis=0), agents), axis=0) + if 'doors' in self._state_slices.values(): + self._door_states = [DoorState(i, tuple(pos)) for i, pos + in enumerate(np.argwhere(self._doors == h.IS_OCCUPIED_CELL))] + self._state = np.concatenate((np.expand_dims(self._level, axis=0), + np.expand_dims(self._doors, axis=0), + agents), axis=0) + + else: + self._state = np.concatenate((np.expand_dims(self._level, axis=0), agents), axis=0) # Returns State return None @@ -108,9 +127,13 @@ class BaseFactory(gym.Env): obs = self._build_per_agent_obs(0) elif self.n_agents >= 2: obs = np.stack([self._build_per_agent_obs(agent_i) for agent_i in range(self.n_agents)]) + else: + raise ValueError('n_agents cannot be smaller than 1!!') return obs def _build_per_agent_obs(self, agent_i: int) -> np.ndarray: + first_agent_slice = self._state_slices.AGENTSTARTIDX + # Todo: make this more efficient! if self.pomdp_radius: global_pos = self._agent_states[agent_i].pos x0, x1 = max(0, global_pos[0] - self.pomdp_radius), global_pos[0] + self.pomdp_radius + 1 @@ -118,13 +141,10 @@ class BaseFactory(gym.Env): obs = self._state[:, x0:x1, y0:y1] if obs.shape[1] != self.pomdp_radius * 2 + 1 or obs.shape[2] != self.pomdp_radius * 2 + 1: obs_padded = np.full((obs.shape[0], self.pomdp_radius * 2 + 1, self.pomdp_radius * 2 + 1), 1) - try: - a_pos = np.argwhere(obs[h.AGENT_START_IDX + agent_i] == h.IS_OCCUPIED_CELL)[0] - except IndexError: - print('NO') + a_pos = np.argwhere(obs[first_agent_slice + agent_i] == h.IS_OCCUPIED_CELL)[0] obs_padded[:, - abs(a_pos[0]-self.pomdp_radius):abs(a_pos[0]-self.pomdp_radius)+obs.shape[1], - abs(a_pos[1]-self.pomdp_radius):abs(a_pos[1]-self.pomdp_radius)+obs.shape[2]] = obs + abs(a_pos[0]-self.pomdp_radius):abs(a_pos[0]-self.pomdp_radius)+obs.shape[1], + abs(a_pos[1]-self.pomdp_radius):abs(a_pos[1]-self.pomdp_radius)+obs.shape[2]] = obs obs = obs_padded else: obs = self._state @@ -135,7 +155,7 @@ class BaseFactory(gym.Env): if self.combin_agent_slices_in_obs: agent_obs = np.sum(obs[[key for key, val in self._state_slices.items() if 'agent' in val]], axis=0, keepdims=True) - obs = np.concatenate((obs[:h.AGENT_START_IDX], agent_obs, obs[h.AGENT_START_IDX+self.n_agents:])) + obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:])) return obs else: return obs @@ -150,9 +170,7 @@ class BaseFactory(gym.Env): done = False # Move this in a seperate function? - agent_states = list() for agent_i, action in enumerate(actions): - agent_i_state = AgentState(agent_i, action) if self._actions.is_moving_action(action): pos, valid = self.move_or_colide(agent_i, action) elif self._actions.is_no_op(action): @@ -160,16 +178,14 @@ class BaseFactory(gym.Env): else: pos, valid = self.do_additional_actions(agent_i, action) # Update state accordingly - agent_i_state.update(pos=pos, action_valid=valid) - agent_states.append(agent_i_state) + self._agent_states[agent_i].update(pos=pos, action_valid=valid, action=action) - for i, collision_vec in enumerate(self.check_all_collisions(agent_states, self._state.shape[0])): - agent_states[i].update(collision_vector=collision_vec) + for i, collision_vec in enumerate(self.check_all_collisions(self._agent_states, self._state.shape[0])): + self._agent_states[i].update(collision_vector=collision_vec) if self.done_at_collision and collision_vec.any(): done = True - self._agent_states = agent_states - reward, info = self.calculate_reward(agent_states) + reward, info = self.calculate_reward(self._agent_states) if self._steps >= self.max_steps: done = True @@ -189,8 +205,12 @@ class BaseFactory(gym.Env): def check_collisions(self, agent_state: AgentState) -> np.ndarray: pos_x, pos_y = agent_state.pos # FixMe: We need to find a way to spare out some dimensions, eg. an info dimension etc... a[?,] + # https://numpy.org/doc/stable/reference/arrays.indexing.html#boolean-array-indexing collisions_vec = self._state[:, pos_x, pos_y].copy() # "vertical fiber" at position of agent i - collisions_vec[h.AGENT_START_IDX + agent_state.i] = h.IS_FREE_CELL # no self-collisions + collisions_vec[self._state_slices.AGENTSTARTIDX + agent_state.i] = h.IS_FREE_CELL # no self-collisions + if 'door' in self._state_slices.values(): + collisions_vec[self._state_slices.by_name('doors')] = h.IS_FREE_CELL # no door-collisions + if agent_state.action_valid: # ToDo: Place a function hook here pass @@ -201,8 +221,8 @@ class BaseFactory(gym.Env): def do_move(self, agent_i: int, old_pos: (int, int), new_pos: (int, int)) -> None: (x, y), (x_new, y_new) = old_pos, new_pos - self._state[agent_i + h.AGENT_START_IDX, x, y] = h.IS_FREE_CELL - self._state[agent_i + h.AGENT_START_IDX, x_new, y_new] = h.IS_OCCUPIED_CELL + self._state[agent_i + self._state_slices.AGENTSTARTIDX, x, y] = h.IS_FREE_CELL + self._state[agent_i + self._state_slices.AGENTSTARTIDX, x_new, y_new] = h.IS_OCCUPIED_CELL def move_or_colide(self, agent_i: int, action: int) -> ((int, int), bool): old_pos, new_pos, valid = self._check_agent_move(agent_i=agent_i, action=self._actions[action]) @@ -215,7 +235,8 @@ class BaseFactory(gym.Env): return old_pos, valid def _check_agent_move(self, agent_i, action: str): - agent_slice = self._state[h.AGENT_START_IDX + agent_i] # horizontal slice from state tensor + agent_slice_idx = self._state_slices.AGENTSTARTIDX + agent_i + agent_slice = self._state[agent_slice_idx] # horizontal slice from state tensor agent_pos = np.argwhere(agent_slice == 1) if len(agent_pos) > 1: raise AssertionError('Only one agent per slice is allowed.') @@ -226,17 +247,50 @@ class BaseFactory(gym.Env): x_new = x + x_diff y_new = y + y_diff + if h.DOORS in self._state_slices.values(): + door = [door for door in self._door_states if door.pos == (x, y)] + if door: + door = door[0] + if door.is_open: + pass + else: # door.is_closed: + local_door_map = self._state[self._state_slices.by_name(h.LEVEL)][door.pos[0]-1:door.pos[0]+2, + door.pos[1]-1:door.pos[1]+2] + local_agent_map = np.zeros_like(local_door_map) + local_agent_map[tuple(np.subtract(door.pos, self._agent_states[agent_i]._last_pos))] += 1 + local_agent_map[tuple(np.subtract(door.pos, (x_new, y_new)))] += 1 + if np.all(local_door_map == h.HORIZONTAL_DOOR_MAP): + # This is a horizontal Door Configuration + if np.sum(local_agent_map[0]) >= 2 or np.sum(local_agent_map[-1]) >= 2: + # The Agent goes back to where he came from + pass + else: + # The Agent tries to go through a closed door + return (x, y), (x, y), h.NOT_VALID + else: + # This is a vertical Door Configuration + if np.sum(local_agent_map[:, 0]) >= 2 or np.sum(local_agent_map[:, -1]) >= 2: + # The Agent goes back to where he came from + pass + else: + return (x, y), (x, y), h.NOT_VALID + else: + pass + else: + pass + valid = h.check_position(self._state[h.LEVEL_IDX], (x_new, y_new)) return (x, y), (x_new, y_new), valid def agent_i_position(self, agent_i: int) -> (int, int): - positions = np.argwhere(self._state[h.AGENT_START_IDX + agent_i] == h.IS_OCCUPIED_CELL) + positions = np.argwhere(self._state[self._state_slices.AGENTSTARTIDX + agent_i] == h.IS_OCCUPIED_CELL) assert positions.shape[0] == 1 pos_x, pos_y = positions[0] # a.flatten() return pos_x, pos_y def free_cells(self, excluded_slices: Union[None, List[int], int] = None) -> np.array: + excluded_slices = excluded_slices or [] assert isinstance(excluded_slices, (int, list)) excluded_slices = excluded_slices if isinstance(excluded_slices, list) else [excluded_slices] @@ -245,9 +299,14 @@ class BaseFactory(gym.Env): if excluded_slices: # Todo: Is there a cleaner way? - inds = list(range(self._state.shape[0])) - excluded_slices = [inds[x] if x < 0 else x for x in excluded_slices] - state = self._state[[x for x in inds if x not in excluded_slices]] + # inds = list(range(self._state.shape[0])) + # excluded_slices = [inds[x] if x < 0 else x for x in excluded_slices] + # state = self._state[[x for x in inds if x not in excluded_slices]] + + # Yes there is! + bool_array = np.full(self._state.shape[0], True) + bool_array[excluded_slices] = False + state = self._state[bool_array] free_cells = np.argwhere(state.sum(0) == h.IS_FREE_CELL) np.random.shuffle(free_cells) diff --git a/environments/factory/levels/rooms.txt b/environments/factory/levels/rooms.txt index 43e8193..781de13 100644 --- a/environments/factory/levels/rooms.txt +++ b/environments/factory/levels/rooms.txt @@ -7,7 +7,7 @@ ###x#######x### #1111##2222222# #11111#2222#22# -#11111x2222222# +#11111D2222222# #11111#2222222# #11111#2222222# ############### \ No newline at end of file diff --git a/environments/factory/simple_factory.py b/environments/factory/simple_factory.py index 15f9d6d..30de773 100644 --- a/environments/factory/simple_factory.py +++ b/environments/factory/simple_factory.py @@ -1,4 +1,3 @@ -from collections import OrderedDict from typing import List, Union, NamedTuple import random @@ -8,7 +7,7 @@ from environments.factory.base_factory import BaseFactory from environments import helpers as h from environments.factory.renderer import Renderer, Entity -from environments.utility_classes import AgentState, MovementProperties, Register +from environments.utility_classes import AgentState, MovementProperties DIRT_INDEX = -1 CLEAN_UP_ACTION = 'clean_up' @@ -27,20 +26,20 @@ class DirtProperties(NamedTuple): class SimpleFactory(BaseFactory): @property - def additional_actions(self) -> Union[str, List[str]]: - return CLEAN_UP_ACTION + def additional_actions(self) -> List[str]: + return [CLEAN_UP_ACTION] def _is_clean_up_action(self, action: Union[str, int]): if isinstance(action, str): action = self._actions.by_name(action) return self._actions[action] == CLEAN_UP_ACTION - def __init__(self, *args, dirt_properties: DirtProperties, verbose=False, **kwargs): + def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), verbose=False, **kwargs): self.dirt_properties = dirt_properties self.verbose = verbose self.max_dirt = 20 self._renderer = None # expensive - don't use it when not required ! - super(SimpleFactory, self).__init__(*args, additional_slices='dirt', **kwargs) + super(SimpleFactory, self).__init__(*args, additional_slices=['dirt'], **kwargs) def render(self): @@ -190,7 +189,8 @@ if __name__ == '__main__': move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True) dirt_props = DirtProperties() factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=10, - combin_agent_slices_in_obs=True, omit_agent_slice_in_obs=False, level_name='rooms') + combin_agent_slices_in_obs=True, level_name='rooms', + pomdp_radius=3) n_actions = factory.action_space.n - 1 diff --git a/environments/helpers.py b/environments/helpers.py index 3f0b2a3..6357739 100644 --- a/environments/helpers.py +++ b/environments/helpers.py @@ -6,22 +6,35 @@ from pathlib import Path # Constants WALL = '#' +DOOR = 'D' DANGER_ZONE = 'x' LEVELS_DIR = 'levels' +LEVEL = 'level' +DOORS = 'doors' LEVEL_IDX = 0 -AGENT_START_IDX = 1 IS_FREE_CELL = 0 IS_OCCUPIED_CELL = 1 TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles'] IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count'] ACTIONMAP = defaultdict(lambda: (0, 0), dict(north=(-1, 0), east=(0, 1), - south=(1, 0), west=(0, -1), - north_east=(-1, +1), south_east=(1, 1), - south_west=(+1, -1), north_west=(-1, -1) - ) + south=(1, 0), west=(0, -1), + north_east=(-1, +1), south_east=(1, 1), + south_west=(+1, -1), north_west=(-1, -1) + ) ) +HORIZONTAL_DOOR_MAP = np.asarray([[0, 0, 0], [1, 0, 1], [0, 0, 0]]) +VERTICAL_DOOR_MAP = np.asarray([[0, 1, 0], [0, 0, 0], [0, 1, 0]]) + +HORIZONTAL_DOOR_ZONE_1 = np.asarray([[1, 1, 1], [0, 0, 0], [0, 0, 0]]) +HORIZONTAL_DOOR_ZONE_2 = np.asarray([[0, 0, 0], [0, 0, 0], [1, 1, 1]]) +VERTICAL_DOOR_ZONE_1 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]]) +VERTICAL_DOOR_ZONE_2 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]]) + +NOT_VALID = False +VALID = True + # Utility functions def parse_level(path): diff --git a/environments/utility_classes.py b/environments/utility_classes.py index bc7be60..4b5a00b 100644 --- a/environments/utility_classes.py +++ b/environments/utility_classes.py @@ -1,14 +1,19 @@ -from typing import Union, List, NamedTuple +from typing import Union, List, NamedTuple, Tuple import numpy as np from environments import helpers as h +IS_CLOSED = 'CLOSED' +IS_OPEN = 'OPEN' + + class MovementProperties(NamedTuple): allow_square_movement: bool = True allow_diagonal_movement: bool = False allow_no_op: bool = False + # Preperations for Entities (not used yet) class Entity: @@ -25,8 +30,59 @@ class Entity: self._identifier = identifier +class Door(Entity): + + @property + def is_closed(self): + return self._state == IS_CLOSED + + @property + def is_open(self): + return self._state == IS_OPEN + + @property + def status(self): + return self._state + + def __init__(self, *args, closed_on_init=True, **kwargs): + super(Door, self).__init__(*args, **kwargs) + self._state = IS_CLOSED if closed_on_init else IS_OPEN + + def use(self): + self._state: str = IS_CLOSED if self._state == IS_OPEN else IS_OPEN + pass + + +class Agent(Entity): + + @property + def direction_of_vision(self): + return self._direction_of_vision + + def __init__(self, *args, **kwargs): + super(Agent, self).__init__(*args, **kwargs) + self._direction_of_vision = (None, None) + + def move(self, new_pos: Tuple[int, int]): + x_old, y_old = self.pos + self._pos = new_pos + x_new, y_new = new_pos + self._direction_of_vision = (x_old-x_new, y_old-y_new) + return self.pos + + class AgentState: + @property + def collisions(self): + return np.argwhere(self.collision_vector != 0).flatten() + + @property + def direction_of_view(self): + last_x, last_y = self._last_pos + curr_x, curr_y = self.pos + return last_x-curr_x, last_y-curr_y + def __init__(self, i: int, action: int): self.i = i self.action = action @@ -34,18 +90,43 @@ class AgentState: self.collision_vector = None self.action_valid = None self.pos = None - self.info = {} - - @property - def collisions(self): - return np.argwhere(self.collision_vector != 0).flatten() + self._last_pos = (-1, -1) def update(self, **kwargs): # is this hacky?? o.0 + last_pos = self.pos for key, value in kwargs.items(): if hasattr(self, key): self.__setattr__(key, value) else: - raise AttributeError(f'"{key}" cannot be updated, this attr is not a part of {self.__class__.__name__}') + raise AttributeError(f'"{key}" cannot be updated, this attr is not a part of {self.__name__}') + if self.action_valid and last_pos != self.pos: + self._last_pos = last_pos + + def reset(self): + self.__init__(self.i, self.action) + + +class DoorState: + + def __init__(self, i: int, pos: Tuple[int, int], closed_on_init=True): + self.i = i + self.pos = pos + self._state = self._state = IS_CLOSED if closed_on_init else IS_OPEN + + @property + def is_closed(self): + return self._state == IS_CLOSED + + @property + def is_open(self): + return self._state == IS_OPEN + + @property + def status(self): + return self._state + + def use(self): + self._state: str = IS_CLOSED if self._state == IS_OPEN else IS_OPEN class Register: @@ -60,24 +141,31 @@ class Register: def __len__(self): return len(self._register) - def __add__(self, other: Union[str, List[str]]): - other = other if isinstance(other, list) else [other] - assert all([isinstance(x, str) for x in other]), f'All item names have to be of type {str}.' - self._register.update({key+len(self._register): value for key, value in enumerate(other)}) + def __add__(self, other: str): + assert isinstance(other, str), f'All item names have to be of type {str}' + self._register.update({len(self._register): other}) return self - def register_additional_items(self, other: Union[str, List[str]]): - self_with_additional_items = self + other - return self_with_additional_items + def register_additional_items(self, others: List[str]): + for other in others: + self + other + return self def keys(self): return self._register.keys() + def values(self): + return self._register.values() + def items(self): return self._register.items() def __getitem__(self, item): - return self._register[item] + try: + return self._register[item] + except KeyError: + print('NO') + raise def by_name(self, item): return list(self._register.keys())[list(self._register.values()).index(item)] @@ -86,6 +174,28 @@ class Register: return f'{self.__class__.__name__}({self._register})' +class Agents(Register): + + def __init__(self, n_agents): + super(Agents, self).__init__() + self.register_additional_items([f'agent#{i}' for i in range(n_agents)]) + self._agents = [Agent(x, (-1, -1)) for x in self.keys()] + pass + + def __getitem__(self, item): + return self._agents[item] + + def get_name(self, item): + return self._register[item] + + def by_name(self, item): + return self[super(Agents, self).by_name(item)] + + def __add__(self, other): + super(Agents, self).__add__(other) + self._agents.append(Agent(len(self)+1, (-1, -1))) + + class Actions(Register): @property @@ -96,15 +206,12 @@ class Actions(Register): self.allow_no_op = movement_properties.allow_no_op self.allow_diagonal_movement = movement_properties.allow_diagonal_movement self.allow_square_movement = movement_properties.allow_square_movement - # FIXME: There is a bug in helpers because there actions are ints. and the order matters. - # assert not(self.allow_square_movement is False and self.allow_diagonal_movement is True), \ - # "There is a bug in helpers!!!" super(Actions, self).__init__() if self.allow_square_movement: - self + ['north', 'east', 'south', 'west'] + self.register_additional_items(['north', 'east', 'south', 'west']) if self.allow_diagonal_movement: - self + ['north_east', 'south_east', 'south_west', 'north_west'] + self.register_additional_items(['north_east', 'south_east', 'south_west', 'north_west']) self._movement_actions = self._register.copy() if self.allow_no_op: self + 'no-op' @@ -121,12 +228,19 @@ class Actions(Register): return self[action] == 'no-op' -class StateSlice(Register): +class StateSlices(Register): - def __init__(self, n_agents: int): - super(StateSlice, self).__init__() - offset = 1 # AGENT_START_IDX - self.register_additional_items(['level', *[f'agent#{i}' for i in range(offset, n_agents+offset)]]) + @property + def AGENTSTARTIDX(self): + if self._agent_start_idx: + return self._agent_start_idx + else: + self._agent_start_idx = min([idx for idx, x in self.items() if 'agent' in x]) + return self._agent_start_idx + + def __init__(self): + super(StateSlices, self).__init__() + self._agent_start_idx = None class Zones(Register): @@ -160,3 +274,12 @@ class Zones(Register): def __getitem__(self, item): return self._zone_slices[item] + + def get_name(self, item): + return self._register[item] + + def by_name(self, item): + return self[super(Zones, self).by_name(item)] + + def register_additional_items(self, other: Union[str, List[str]]): + raise AttributeError('You are not allowed to add additional Zones in runtime.') diff --git a/main.py b/main.py index e844f4e..ee04369 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,6 @@ from pathlib import Path import time import pandas as pd -from gym.wrappers import FrameStack from stable_baselines3.common.callbacks import CallbackList @@ -111,8 +110,12 @@ if __name__ == '__main__': if modeL_type.__name__ in ["PPO", "A2C"]: kwargs = dict(ent_coef=0.01) elif modeL_type.__name__ in ["RegDQN", "DQN", "QRDQN"]: - kwargs = dict(target_update_interval=500, buffer_size=30000, learning_starts=5000, - exploration_final_eps=0.01, batch_size=96) + kwargs = dict(buffer_size=50000, + learning_starts=25000, + batch_size=64, + target_update_interval=5000, + exploration_fraction=0.25, + exploration_final_eps=0.025) else: raise NameError(f'The model "{model.__name__}" has the wrong name.') model = modeL_type("MlpPolicy", env, verbose=1, seed=seed, device='cpu', **kwargs) From 1777ee9d5ffc718826110fcd69d802af1db0c7f0 Mon Sep 17 00:00:00 2001 From: steffen-illium Date: Wed, 16 Jun 2021 17:48:35 +0200 Subject: [PATCH 2/2] =?UTF-8?q?Fick=20die=20T=C3=BCren=20II?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- environments/factory/base_factory.py | 18 ++++++++++++++---- environments/utility_classes.py | 12 ++++++++++-- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/environments/factory/base_factory.py b/environments/factory/base_factory.py index 47535c1..539d60d 100644 --- a/environments/factory/base_factory.py +++ b/environments/factory/base_factory.py @@ -62,9 +62,6 @@ class BaseFactory(gym.Env): self.done_at_collision = False - self._actions = Actions(self.movement_properties) - self._actions.register_additional_items(self.additional_actions) - self._state_slices = StateSlices() level_filepath = Path(__file__).parent / h.LEVELS_DIR / f'{self.level_name}.txt' parsed_level = h.parse_level(level_filepath) @@ -73,14 +70,19 @@ class BaseFactory(gym.Env): if parsed_doors.any(): self._doors = parsed_doors level_slices = ['level', 'doors'] + can_use_doors = True else: level_slices = ['level'] + can_use_doors = False offset = len(level_slices) self._state_slices.register_additional_items([*level_slices, *[f'agent#{i}' for i in range(offset, n_agents + offset)]]) if 'additional_slices' in kwargs: self._state_slices.register_additional_items(kwargs.get('additional_slices')) self._zones = Zones(parsed_level) + + self._actions = Actions(self.movement_properties, can_use_doors=can_use_doors) + self._actions.register_additional_items(self.additional_actions) self.reset() @property @@ -174,7 +176,14 @@ class BaseFactory(gym.Env): if self._actions.is_moving_action(action): pos, valid = self.move_or_colide(agent_i, action) elif self._actions.is_no_op(action): - pos, valid = self.agent_i_position(agent_i), True + pos, valid = self._agent_states[agent_i].pos, h.VALID + elif self._actions.is_door_usage(action): + try: + door = [door for door in self._door_states if door.pos == self._agent_states[agent_i].pos][0] + door.use() + pos, valid = self._agent_states[agent_i].pos, h.VALID + except IndexError: + pos, valid = self._agent_states[agent_i].pos, h.NOT_VALID else: pos, valid = self.do_additional_actions(agent_i, action) # Update state accordingly @@ -273,6 +282,7 @@ class BaseFactory(gym.Env): # The Agent goes back to where he came from pass else: + # The Agent tries to go through a closed door return (x, y), (x, y), h.NOT_VALID else: pass diff --git a/environments/utility_classes.py b/environments/utility_classes.py index 4b5a00b..0449946 100644 --- a/environments/utility_classes.py +++ b/environments/utility_classes.py @@ -202,10 +202,11 @@ class Actions(Register): def movement_actions(self): return self._movement_actions - def __init__(self, movement_properties: MovementProperties): + def __init__(self, movement_properties: MovementProperties, can_use_doors=False): self.allow_no_op = movement_properties.allow_no_op self.allow_diagonal_movement = movement_properties.allow_diagonal_movement self.allow_square_movement = movement_properties.allow_square_movement + self.can_use_doors = can_use_doors super(Actions, self).__init__() if self.allow_square_movement: @@ -213,8 +214,10 @@ class Actions(Register): if self.allow_diagonal_movement: self.register_additional_items(['north_east', 'south_east', 'south_west', 'north_west']) self._movement_actions = self._register.copy() + if self.can_use_doors: + self.register_additional_items(['use_door']) if self.allow_no_op: - self + 'no-op' + self.register_additional_items(['no-op']) def is_moving_action(self, action: Union[str, int]): if isinstance(action, str): @@ -227,6 +230,11 @@ class Actions(Register): action = self.by_name(action) return self[action] == 'no-op' + def is_door_usage(self, action: Union[str, int]): + if isinstance(action, str): + action = self.by_name(action) + return self[action] == 'use_door' + class StateSlices(Register):