diff --git a/environments/factory/assets/drop_off.png b/environments/factory/assets/drop_off.png new file mode 100644 index 0000000..6ca1ece Binary files /dev/null and b/environments/factory/assets/drop_off.png differ diff --git a/environments/factory/assets/item.png b/environments/factory/assets/item.png new file mode 100644 index 0000000..9f7a591 Binary files /dev/null and b/environments/factory/assets/item.png differ diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py index c21fc56..c2b92cd 100644 --- a/environments/factory/base/base_factory.py +++ b/environments/factory/base/base_factory.py @@ -1,3 +1,4 @@ +import abc import time from pathlib import Path from typing import List, Union, Iterable @@ -10,6 +11,7 @@ import yaml from gym.wrappers import FrameStack from environments.factory.base.shadow_casting import Map +from environments.factory.renderer import Renderer, RenderEntity from environments.helpers import Constants as c, Constants from environments import helpers as h from environments.factory.base.objects import Slice, Agent, Tile, Action @@ -28,20 +30,7 @@ class BaseFactory(gym.Env): @property def observation_space(self): - if self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs: - if self.n_agents > 1: - slices = self._slices.n - (self._agents.n - 1) - else: - slices = self._slices.n - 1 - elif self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs: - slices = self._slices.n - (self._agents.n - 1) - elif not self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs: - slices = self._slices.n - self._agents.n - elif not self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs: - slices = self._slices.n - else: - raise RuntimeError('This should not happen!') - + slices = self._slices.n_observable_slices level_shape = (self.pomdp_r * 2 + 1, self.pomdp_r * 2 + 1) if self.pomdp_r else self._level_shape space = spaces.Box(low=0, high=1, shape=(slices, *level_shape), dtype=np.float32) return space @@ -54,36 +43,6 @@ class BaseFactory(gym.Env): def movement_actions(self): return self._actions.movement_actions - @property - def additional_actions(self) -> Union[str, List[str]]: - """ - When heriting from this Base Class, you musst implement this methode!!! - - :return: A list of Actions-object holding all additional actions. - :rtype: List[Action] - """ - raise NotImplementedError('Please register additional actions ') - - @property - def additional_entities(self) -> Union[Entities, List[Entities]]: - """ - When heriting from this Base Class, you musst implement this methode!!! - - :return: A single Entites collection or a list of such. - :rtype: Union[Entities, List[Entities]] - """ - raise NotImplementedError('Please register additional entities.') - - @property - def additional_slices(self) -> Union[Slice, List[Slice]]: - """ - When heriting from this Base Class, you musst implement this methode!!! - - :return: A list of Slice-objects. - :rtype: List[Slice] - """ - raise NotImplementedError('Please register additional slices.') - def __enter__(self): return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack) @@ -94,17 +53,20 @@ class BaseFactory(gym.Env): movement_properties: MovementProperties = MovementProperties(), parse_doors=False, combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False, omit_agent_slice_in_obs=False, done_at_collision=False, cast_shadows=True, - verbose=False, doors_have_area=True, **kwargs): + verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs): assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1." # Attribute Assignment - self._base_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns())) + self.env_seed = env_seed + self._base_rng = np.random.default_rng(self.env_seed) self.movement_properties = movement_properties self.level_name = level_name self._level_shape = None self.verbose = verbose + self._renderer = None # expensive - don't use it when not required ! self.n_agents = n_agents + self.max_steps = max_steps self.pomdp_r = pomdp_r self.combin_agent_slices_in_obs = combin_agent_slices_in_obs @@ -132,25 +94,37 @@ class BaseFactory(gym.Env): # Level level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt' parsed_level = h.parse_level(level_filepath) - level = [Slice(c.LEVEL.name, h.one_hot_level(parsed_level), is_blocking_light=True)] + level = [Slice(c.LEVEL, h.one_hot_level(parsed_level), is_blocking_light=True)] self._level_shape = level[0].shape # Doors parsed_doors = h.one_hot_level(parsed_level, c.DOOR) if parsed_doors.any(): - doors = [Slice(c.DOORS.name, parsed_doors, is_blocking_light=True)] + doors = [Slice(c.DOORS, parsed_doors, is_blocking_light=True)] else: doors = [] # Agents agents = [] - for i in range(self.n_agents): - agents.append(Slice(f'{c.AGENT.name}#{i}', np.zeros_like(level[0].slice, dtype=np.float32))) - state_slices.register_additional_items(level+doors+agents) + agent_names = [f'{c.AGENT.value}#{i}' for i in range(self.n_agents)] - # Additional Slices from SubDomains - if additional_slices := self.additional_slices: - state_slices.register_additional_items(additional_slices) + if self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs: + if self.n_agents == 1: + observables = [False] + else: + observables = [True] + ([False] * (self.n_agents - 1)) + elif self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs: + observables = [True] + ([False] * (self.n_agents - 1)) + elif not self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs: + observables = [False] + ([True] * (self.n_agents - 1)) + elif not self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs: + observables = [True] * self.n_agents + else: + raise RuntimeError('This should not happen!') + + for observable, agent_name in zip(observables, agent_names): + agents.append(Slice(agent_name, np.zeros_like(level[0].slice, dtype=np.float32), is_observable=observable)) + state_slices.register_additional_items(level+doors+agents+self.additional_slices) return state_slices def _init_obs_cube(self) -> np.ndarray: @@ -198,18 +172,6 @@ class BaseFactory(gym.Env): obs = self._get_observations() return obs - def pre_step(self) -> None: - pass - - def do_additional_reset(self) -> None: - pass - - def do_additional_step(self) -> dict: - return {} - - def post_step(self) -> dict: - return {} - def step(self, actions): actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]' @@ -217,31 +179,22 @@ class BaseFactory(gym.Env): done = False # Pre step Hook for later use - self.pre_step() + self.hook_pre_step() # Move this in a seperate function? for action, agent in zip(actions, self._agents): agent.clear_temp_sate() - action_name = self._actions[action] - if self._actions.is_moving_action(action): - valid = self._move_or_colide(agent, action_name) - elif self._actions.is_no_op(action): + action_obj = self._actions[action] + if self._actions.is_moving_action(action_obj): + valid = self._move_or_colide(agent, action_obj) + elif self._actions.is_no_op(action_obj): valid = c.VALID.value - elif self._actions.is_door_usage(action): - # Check if agent really is standing on a door: - if self.doors_have_area: - door = self._doors.get_near_position(agent.pos) - else: - door = self._doors.by_pos(agent.pos) - if door is not None: - door.use() - valid = c.VALID.value - # When he doesn't... - else: - valid = c.NOT_VALID.value + elif self._actions.is_door_usage(action_obj): + valid = self._handle_door_interaction(agent) else: - valid = self.do_additional_actions(agent, action) - agent.temp_action = action + valid = self.do_additional_actions(agent, action_obj) + assert valid is not None, 'This should not happen, every Action musst be detected correctly!' + agent.temp_action = action_obj agent.temp_valid = valid # In-between step Hook for later use @@ -275,12 +228,25 @@ class BaseFactory(gym.Env): info.update(self._summarize_state()) # Post step Hook for later use - info.update(self.post_step()) + info.update(self.hook_post_step()) obs = self._get_observations() return obs, reward, done, info + def _handle_door_interaction(self, agent): + # Check if agent really is standing on a door: + if self.doors_have_area: + door = self._doors.get_near_position(agent.pos) + else: + door = self._doors.by_pos(agent.pos) + if door is not None: + door.use() + return c.VALID.value + # When he doesn't... + else: + return c.NOT_VALID.value + def _flush_state(self): self._obs_cube[np.arange(len(self._slices)) != self._slices.get_idx(c.LEVEL)] = c.FREE_CELL.value if self.parse_doors: @@ -291,7 +257,7 @@ class BaseFactory(gym.Env): self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.CLOSED_DOOR.value for agent in self._agents: self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.pos] = c.OCCUPIED_CELL.value - if agent.last_pos != h.NO_POS: + if agent.last_pos != c.NO_POS: self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.last_pos] = c.FREE_CELL.value def _get_observations(self) -> np.ndarray: @@ -318,8 +284,8 @@ class BaseFactory(gym.Env): obs = self._obs_cube if self.cast_shadows: - obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx, slice - in enumerate(self._slices) if slice.is_blocking_light] + obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx, obs_slice + in enumerate(self._slices) if obs_slice.is_blocking_light] door_shadowing = False if door := self._doors.by_pos(agent.pos): if door.is_closed: @@ -332,6 +298,7 @@ class BaseFactory(gym.Env): xs, ys = zip(*blocking) else: xs, ys = zip(*group) + # noinspection PyTypeChecker obs_block_light[self._slices.get_idx(c.LEVEL)][xs, ys] = False light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int)) @@ -340,9 +307,14 @@ class BaseFactory(gym.Env): else: light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape)) if door_shadowing: + # noinspection PyUnboundLocalVariable light_block_map[xs, ys] = 0 agent.temp_light_map = light_block_map - obs = (obs * light_block_map) - ((1 - light_block_map) * obs[self._slices.get_idx(c.LEVEL)]) + for obs_idx in range(obs.shape[0]): + if self._slices[obs_idx].can_be_shadowed: + obs[obs_idx] = (obs[obs_idx] * light_block_map) - ( + (1 - light_block_map) * obs[self._slices.get_idx(c.LEVEL)] + ) if self.combin_agent_slices_in_obs and self.n_agents > 1: agent_obs = np.sum(obs[[key for key, l_slice in self._slices.items() if c.AGENT.name in l_slice.name and @@ -357,9 +329,6 @@ class BaseFactory(gym.Env): else: return obs - def do_additional_actions(self, agent: Agent, action: int) -> bool: - raise NotImplementedError - def get_all_tiles_with_collisions(self) -> List[Tile]: tiles_with_collisions = list() for tile in self._tiles: @@ -392,7 +361,7 @@ class BaseFactory(gym.Env): valid = c.VALID return tile, valid - if self.parse_doors and agent.last_pos != h.NO_POS: + if self.parse_doors and agent.last_pos != c.NO_POS: if door := self._doors.by_pos(new_tile.pos): if door.can_collide: return agent.tile, c.NOT_VALID @@ -416,10 +385,63 @@ class BaseFactory(gym.Env): def calculate_reward(self) -> (int, dict): # Returns: Reward, Info - raise NotImplementedError + info_dict = dict() + reward = 0 + + for agent in self._agents: + if self._actions.is_moving_action(agent.temp_action): + if agent.temp_valid: + # info_dict.update(movement=1) + reward -= 0.00 + else: + # self.print('collision') + reward -= 0.01 + self.print(f'{agent.name} just hit the wall at {agent.pos}.') + info_dict.update({f'{agent.name}_vs_LEVEL': 1}) + + elif self._actions.is_door_usage(agent.temp_action): + if agent.temp_valid: + self.print(f'{agent.name} did just use the door at {agent.pos}.') + info_dict.update(door_used=1) + else: + reward -= 0.01 + self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.') + info_dict.update({f'{agent.name}_failed_action': 1}) + info_dict.update({f'{agent.name}_failed_door_open': 1}) + elif self._actions.is_no_op(agent.temp_action): + info_dict.update(no_op=1) + reward -= 0.00 + + additional_reward, additional_info_dict = self.calculate_additional_reward(agent) + reward += additional_reward + info_dict.update(additional_info_dict) + + for other_agent in agent.temp_collisions: + info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1}) + + self.print(f"reward is {reward}") + return reward, info_dict def render(self, mode='human'): - raise NotImplementedError + if not self._renderer: # lazy init + height, width = self._obs_cube.shape[1:] + self._renderer = Renderer(width, height, view_radius=self.pomdp_r, fps=5) + + walls = [RenderEntity('wall', pos) + for pos in np.argwhere(self._slices.by_enum(c.LEVEL).slice == c.OCCUPIED_CELL.value)] + + agents = [] + for i, agent in enumerate(self._agents): + name, state = h.asset_str(agent) + agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.temp_light_map)) + doors = [] + if self.parse_doors: + for i, door in enumerate(self._doors): + name, state = 'door_open' if door.is_open else 'door_closed', 'blank' + doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1)) + additional_assets = self.render_additional_assets() + + self._renderer.render(walls + doors + additional_assets + agents) def save_params(self, filepath: Path): # noinspection PyProtectedMember @@ -440,3 +462,66 @@ class BaseFactory(gym.Env): def print(self, string): if self.verbose: print(string) + + # Properties which are called by the base class to extend beyond attributes of the base class + @property + def additional_actions(self) -> Union[Action, List[Action]]: + """ + When heriting from this Base Class, you musst implement this methode!!! + + :return: A list of Actions-object holding all additional actions. + :rtype: List[Action] + """ + return [] + + @property + def additional_entities(self) -> Union[Entities, List[Entities]]: + """ + When heriting from this Base Class, you musst implement this methode!!! + + :return: A single Entites collection or a list of such. + :rtype: Union[Entities, List[Entities]] + """ + return [] + + @property + def additional_slices(self) -> Union[Slice, List[Slice]]: + """ + When heriting from this Base Class, you musst implement this methode!!! + + :return: A list of Slice-objects. + :rtype: List[Slice] + """ + return [] + + # Functions which provide additions to functions of the base class + # Always call super!!!!!! + @abc.abstractmethod + def do_additional_reset(self) -> None: + pass + + @abc.abstractmethod + def do_additional_step(self) -> dict: + return {} + + @abc.abstractmethod + def do_additional_actions(self, agent: Agent, action: int) -> Union[None, bool]: + return None + + @abc.abstractmethod + def calculate_additional_reward(self, agent: Agent) -> (int, dict): + return 0, {} + + @abc.abstractmethod + def render_additional_assets(self): + return [] + + # Hooks for in between operations. + # Always call super!!!!!! + @abc.abstractmethod + def hook_pre_step(self) -> None: + pass + + @abc.abstractmethod + def hook_post_step(self) -> dict: + return {} diff --git a/environments/factory/base/objects.py b/environments/factory/base/objects.py index 62767ec..757579d 100644 --- a/environments/factory/base/objects.py +++ b/environments/factory/base/objects.py @@ -1,8 +1,5 @@ -import itertools - import networkx as nx import numpy as np -from environments import helpers as h from environments.helpers import Constants as c import itertools @@ -16,35 +13,32 @@ class Object: def __bool__(self): return True - @property - def i(self): - return self._identifier - @property def name(self): - return self._identifier + return self._name - def __init__(self, identifier, **kwargs): - self._identifier = identifier + def __init__(self, name, name_is_identifier=False, **kwargs): + name = name.name if hasattr(name, 'name') else name + self._name = f'{self.__class__.__name__}#{name}' if name_is_identifier else name if kwargs: print(f'Following kwargs were passed, but ignored: {kwargs}') def __repr__(self): - return f'{self.__class__.__name__}({self._identifier})' + return f'{self.__class__.__name__}({self.name})' class Action(Object): - @property - def name(self): - return self.i - def __init__(self, *args): super(Action, self).__init__(*args) class Slice(Object): + @property + def is_observable(self): + return self._is_observable + @property def shape(self): return self.slice.shape @@ -57,10 +51,16 @@ class Slice(Object): def free_tiles(self): return np.argwhere(self.slice == c.FREE_CELL.value) - def __init__(self, identifier, arrayslice, is_blocking_light=False): + def __init__(self, identifier, arrayslice, is_blocking_light=False, can_be_shadowed=True, is_observable=True): super(Slice, self).__init__(identifier) self.slice = arrayslice self.is_blocking_light = is_blocking_light + self.can_be_shadowed = can_be_shadowed + self._is_observable = is_observable + + def set_slice(self, new_slice: np.ndarray): + assert self.slice.shape == new_slice.shape + self.slice = new_slice class Wall(Object): @@ -89,8 +89,8 @@ class Tile(Object): def pos(self): return self._pos - def __init__(self, i, pos): - super(Tile, self).__init__(i) + def __init__(self, i, pos, **kwargs): + super(Tile, self).__init__(i, **kwargs) self._guests = dict() self._pos = tuple(pos) @@ -164,7 +164,7 @@ class MoveableEntity(Entity): if self._last_tile: return self._last_tile.pos else: - return h.NO_POS + return c.NO_POS @property def direction_of_view(self): @@ -206,8 +206,8 @@ class Door(Entity): return [node for node in self.connectivity.nodes if node not in range(len(self.connectivity_subgroups)) and node != self.pos] - def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False): - super(Door, self).__init__(*args) + def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False, **kwargs): + super(Door, self).__init__(*args, **kwargs) self._state = c.CLOSED_DOOR self.has_area = has_area self.auto_close_interval = auto_close_interval @@ -270,8 +270,8 @@ class Door(Entity): class Agent(MoveableEntity): - def __init__(self, *args): - super(Agent, self).__init__(*args) + def __init__(self, *args, **kwargs): + super(Agent, self).__init__(*args, **kwargs) self.clear_temp_sate() # noinspection PyAttributeOutsideInit @@ -280,5 +280,5 @@ class Agent(MoveableEntity): # if attr.startswith('temp'): self.temp_collisions = [] self.temp_valid = None - self.temp_action = -1 + self.temp_action = None self.temp_light_map = None diff --git a/environments/factory/base/registers.py b/environments/factory/base/registers.py index 26ba575..6167dbe 100644 --- a/environments/factory/base/registers.py +++ b/environments/factory/base/registers.py @@ -1,9 +1,7 @@ -import itertools import random from enum import Enum from typing import List, Union -import networkx as nx import numpy as np from environments.factory.base.objects import Entity, Tile, Agent, Door, Slice, Action @@ -16,11 +14,8 @@ class Register: _accepted_objects = Entity @classmethod - def from_argwhere_coordinates(cls, positions: (int, int), tiles): - entities = [cls._accepted_objects(i, tiles.by_pos(position)) for i, position in enumerate(positions)] - registered_obj = cls() - registered_obj.register_additional_items(entities) - return registered_obj + def from_argwhere_coordinates(cls, positions: [(int, int)], tiles): + return cls.from_tiles([tiles.by_pos(position) for position in positions]) @property def name(self): @@ -72,8 +67,8 @@ class Register: def by_name(self, item): return self[self._names[item]] - def by_enum(self, enum: Enum): - return self[self._names[enum.name]] + def by_enum(self, enum_obj: Enum): + return self[self._names[enum_obj.name]] def __repr__(self): return f'{self.__class__.__name__}({self._register})' @@ -84,13 +79,13 @@ class Register: def get_idx_by_name(self, item): return self._names[item] - def get_idx(self, enum: Enum): - return self._names[enum.name] + def get_idx(self, enum_obj: Enum): + return self._names[enum_obj.name] @classmethod def from_tiles(cls, tiles, **kwargs): - entities = [cls._accepted_objects(f'{cls._accepted_objects.__name__.upper()}#{i}', tile, **kwargs) - for i, tile in enumerate(tiles)] + # objects_name = cls._accepted_objects.__name__ + entities = [cls._accepted_objects(i, tile, name_is_identifier=True, **kwargs) for i, tile in enumerate(tiles)] registered_obj = cls() registered_obj.register_additional_items(entities) return registered_obj @@ -98,14 +93,6 @@ class Register: class EntityRegister(Register): - @classmethod - def from_argwhere_coordinates(cls, argwhere_coordinates, **kwargs): - tiles = cls() - tiles.register_additional_items( - [cls._accepted_objects(i, pos, **kwargs) for i, pos in enumerate(argwhere_coordinates)] - ) - return tiles - def __init__(self): super(EntityRegister, self).__init__() self._tiles = dict() @@ -141,6 +128,15 @@ class Entities(Register): class FloorTiles(EntityRegister): _accepted_objects = Tile + @classmethod + def from_argwhere_coordinates(cls, argwhere_coordinates): + tiles = cls() + # noinspection PyTypeChecker + tiles.register_additional_items( + [cls._accepted_objects(i, pos, name_is_identifier=True) for i, pos in enumerate(argwhere_coordinates)] + ) + return tiles + @property def occupied_tiles(self): tiles = [tile for tile in self if tile.is_occupied()] @@ -148,7 +144,7 @@ class FloorTiles(EntityRegister): return tiles @property - def empty_tiles(self): + def empty_tiles(self) -> List[Tile]: tiles = [tile for tile in self if tile.is_empty()] random.shuffle(tiles) return tiles @@ -185,6 +181,7 @@ class Actions(Register): def movement_actions(self): return self._movement_actions + # noinspection PyTypeChecker def __init__(self, movement_properties: MovementProperties, can_use_doors=False): self.allow_no_op = movement_properties.allow_no_op self.allow_diagonal_movement = movement_properties.allow_diagonal_movement @@ -193,43 +190,47 @@ class Actions(Register): super(Actions, self).__init__() if self.allow_square_movement: - self.register_additional_items([self._accepted_objects(direction) for direction in h.MANHATTAN_MOVES]) + self.register_additional_items([self._accepted_objects(direction) for direction in h.ManhattanMoves]) if self.allow_diagonal_movement: - self.register_additional_items([self._accepted_objects(direction) for direction in h.DIAGONAL_MOVES]) + self.register_additional_items([self._accepted_objects(direction) for direction in h.DiagonalMoves]) self._movement_actions = self._register.copy() if self.can_use_doors: - self.register_additional_items([self._accepted_objects('use_door')]) + self.register_additional_items([self._accepted_objects(h.EnvActions.USE_DOOR)]) if self.allow_no_op: - self.register_additional_items([self._accepted_objects('no-op')]) + self.register_additional_items([self._accepted_objects(h.EnvActions.NOOP)]) def is_moving_action(self, action: Union[int]): - #if isinstance(action, Action): - # return (action.name in h.MANHATTAN_MOVES and self.allow_square_movement) or \ - # (action.name in h.DIAGONAL_MOVES and self.allow_diagonal_movement) - #else: - return action in self.movement_actions.keys() + return action in self.movement_actions.values() - def is_no_op(self, action: Union[str, int]): - if isinstance(action, str): - action = self.by_name(action) - return self[action].name == 'no-op' + def is_no_op(self, action: Union[str, Action, int]): + if isinstance(action, int): + action = self[action] + if isinstance(action, Action): + action = action.name + return action == h.EnvActions.NOOP.name def is_door_usage(self, action: Union[str, int]): - if isinstance(action, str): - action = self.by_name(action) - return self[action].name == 'use_door' + if isinstance(action, int): + action = self[action] + if isinstance(action, Action): + action = action.name + return action == h.EnvActions.USE_DOOR.name class StateSlices(Register): _accepted_objects = Slice + @property + def n_observable_slices(self): + return len([x for x in self if x.is_observable]) + @property def AGENTSTARTIDX(self): if self._agent_start_idx: return self._agent_start_idx else: - self._agent_start_idx = min([idx for idx, x in self.items() if c.AGENT.name in x.name]) + self._agent_start_idx = min([idx for idx, x in self.items() if c.AGENT.value in x.name]) return self._agent_start_idx def __init__(self): diff --git a/environments/factory/double_task_factory.py b/environments/factory/double_task_factory.py new file mode 100644 index 0000000..d5d8dc6 --- /dev/null +++ b/environments/factory/double_task_factory.py @@ -0,0 +1,229 @@ +import time +from collections import deque +from enum import Enum +from typing import List, Union, NamedTuple +import numpy as np + +from environments.factory.simple_factory import SimpleFactory +from environments.helpers import Constants as c +from environments import helpers as h +from environments.factory.base.objects import Agent, Slice, Entity, Action +from environments.factory.base.registers import Entities + +from environments.factory.renderer import RenderEntity + + +PICK_UP = 'pick_up' +DROP_OFF = 'drop_off' +NO_ITEM = 0 +ITEM_DROP_OFF = -1 + + +def inventory_slice_name(agent_i): + if isinstance(agent_i, int): + return f'{c.INVENTORY.name}_{c.AGENT.value}#{agent_i}' + else: + return f'{c.INVENTORY.name}_{agent_i}' + + +class DropOffLocation(Entity): + + def __init__(self, *args, storage_size_until_full: int = 5, **kwargs): + super(DropOffLocation, self).__init__(DROP_OFF, *args, **kwargs) + self.storage = deque(maxlen=storage_size_until_full) + + def place_item(self, item): + self.storage.append(item) + return True + + @property + def is_full(self): + return self.storage.maxlen == len(self.storage) + + +class ItemProperties(NamedTuple): + n_items: int = 1 # How many items are there at the same time + spawn_frequency: int = 5 # Spawn Frequency in Steps + max_dropoff_storage_size: int = 5 # How many items are needed until the drop off is full + max_agent_storage_size: int = 5 # How many items are needed until the agent inventory is full + agent_can_interact: bool = True # Whether agents have the possibility to interact with the domain items + + +# noinspection PyAttributeOutsideInit,PyUnresolvedReferences +class DoubleTaskFactory(SimpleFactory): + # noinspection PyMissingConstructor + def __init__(self, item_properties: ItemProperties, *args, with_dirt=False, env_seed=time.time_ns(), **kwargs): + self.item_properties = item_properties + kwargs.update(env_seed=env_seed) + self._item_rng = np.random.default_rng(env_seed) + assert item_properties.n_items < kwargs.get('pomdp_r', 0) ** 2 or not kwargs.get('pomdp_r', 0) + self._super = self.__class__ if with_dirt else SimpleFactory + super(self._super, self).__init__(*args, **kwargs) + + @property + def additional_actions(self) -> Union[Action, List[Action]]: + super_actions = super(self._super, self).additional_actions + super_actions.append(Action(h.EnvActions.ITEM_ACTION)) + return super_actions + + @property + def additional_entities(self) -> Union[Entities, List[Entities]]: + super_entities = super(self._super, self).additional_entities + return super_entities + + @property + def additional_slices(self) -> Union[Slice, List[Slice]]: + super_slices = super(self._super, self).additional_slices + super_slices.append(Slice(c.ITEM, np.zeros(self._level_shape))) + super_slices.extend([Slice(inventory_slice_name(agent_i), np.zeros(self._level_shape), can_be_shadowed=False) + for agent_i in range(self.n_agents)]) + return super_slices + + def _flush_state(self): + super(self._super, self)._flush_state() + + # Flush environmental item state + slice_idx = self._slices.get_idx(c.ITEM) + self._obs_cube[slice_idx] = self._slices[slice_idx].slice + + # Flush per agent inventory state + for agent in self._agents: + agent_slice_idx = self._slices.get_idx_by_name(inventory_slice_name(agent.name)) + self._slices[agent_slice_idx].slice[:] = 0 + if len(agent.inventory) > 0: + max_x = self.pomdp_r if self.pomdp_r else self._level_shape[0] + x, y = (0, 0) if not self.pomdp_r else (max(agent.x - max_x, 0), max(agent.y - max_x, 0)) + for item in agent.inventory: + x_diff, y_diff = divmod(item, max_x) + self._slices[agent_slice_idx].slice[int(x+x_diff), int(y+y_diff)] = item + self._obs_cube[agent_slice_idx] = self._slices[agent_slice_idx].slice + + def _is_item_action(self, action): + if isinstance(action, int): + action = self._actions[action] + if isinstance(action, Action): + action = action.name + return action == h.EnvActions.ITEM_ACTION.name + + def do_item_action(self, agent: Agent): + item_slice = self._slices.by_enum(c.ITEM).slice + + if item := item_slice[agent.pos]: + if item == ITEM_DROP_OFF: + if agent.inventory: + valid = self._item_drop_off.place_item(agent.inventory.pop(0)) + return valid + else: + return c.NOT_VALID + + elif item != NO_ITEM: + if len(agent.inventory) < self.item_properties.max_agent_storage_size: + agent.inventory.append(item_slice[agent.pos]) + item_slice[agent.pos] = NO_ITEM + else: + return c.NOT_VALID + return c.VALID + else: + return c.NOT_VALID + + def do_additional_actions(self, agent: Agent, action: int) -> Union[None, bool]: + valid = super(self._super, self).do_additional_actions(agent, action) + if valid is None: + if self._is_item_action(action): + if self.item_properties.agent_can_interact: + valid = self.do_item_action(agent) + return bool(valid) + else: + return False + else: + return None + else: + return valid + + def do_additional_reset(self) -> None: + super(self._super, self).do_additional_reset() + self.spawn_drop_off_location() + self.spawn_items(self.item_properties.n_items) + self._next_item_spawn = self.item_properties.spawn_frequency + for agent in self._agents: + agent.inventory = list() + + def do_additional_step(self) -> dict: + info_dict = super(self._super, self).do_additional_step() + if not self._next_item_spawn: + if item_to_spawn := (self.item_properties.n_items - + (np.sum(self._slices.by_enum(c.ITEM).slice.astype(bool)) - 1)): + self.spawn_items(item_to_spawn) + self._next_item_spawn = self.item_properties.spawn_frequency + else: + self.print('No Items are spawning, limit is reached.') + else: + self._next_item_spawn -= 1 + return info_dict + + def spawn_drop_off_location(self): + single_empty_tile = self._tiles.empty_tiles[0] + self._item_drop_off = DropOffLocation(single_empty_tile, + storage_size_until_full=self.item_properties.max_dropoff_storage_size) + single_empty_tile.enter(self._item_drop_off) + self._slices.by_enum(c.ITEM).slice[single_empty_tile.pos] = ITEM_DROP_OFF + + def calculate_additional_reward(self, agent: Agent) -> (int, dict): + reward, info_dict = super(self._super, self).calculate_additional_reward(agent) + if self._is_item_action(agent.temp_action): + if agent.temp_valid: + if agent.pos == self._item_drop_off.pos: + info_dict.update({f'{agent.name}_item_dropoff': 1}) + + reward += 1 + else: + info_dict.update({f'{agent.name}_item_pickup': 1}) + reward += 0.1 + else: + info_dict.update({f'{agent.name}_failed_item_action': 1}) + reward -= 0.1 + return reward, info_dict + + def render_additional_assets(self, mode='human'): + additional_assets = super(self._super, self).render_additional_assets() + item_slice = self._slices.by_enum(c.ITEM).slice + items = [RenderEntity(DROP_OFF if item_slice[tile.pos] == ITEM_DROP_OFF else c.ITEM.value, tile.pos) + for tile in [tile for tile in self._tiles if item_slice[tile.pos] != NO_ITEM]] + additional_assets.extend(items) + return additional_assets + + def spawn_items(self, n_items): + tiles = self._tiles.empty_tiles[:n_items] + item_slice = self._slices.by_enum(c.ITEM).slice + for idx, tile in enumerate(tiles, start=1): + item_slice[tile.pos] = idx + pass + + +if __name__ == '__main__': + import random + render = True + + item_props = ItemProperties() + + factory = DoubleTaskFactory(item_props, n_agents=1, done_at_collision=False, frames_to_stack=0, + level_name='rooms', max_steps=400, + omit_agent_slice_in_obs=True, parse_doors=True, pomdp_r=3, + record_episodes=False, verbose=False + ) + + n_actions = factory.action_space.n - 1 + _ = factory.observation_space + + for epoch in range(100): + random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)] + env_state = factory.reset() + rew = 0 + for agent_i_action in random_actions: + env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) + rew += step_r + if render: + factory.render() + if done_bool: + break + print(f'Factory run {epoch} done, reward is:\n {rew}') diff --git a/environments/factory/item_pickup.py b/environments/factory/item_pickup.py deleted file mode 100644 index 02b197a..0000000 --- a/environments/factory/item_pickup.py +++ /dev/null @@ -1,115 +0,0 @@ -import time -from collections import deque -from typing import List, Union, NamedTuple -import numpy as np - -from environments.helpers import Constants as c -from environments import helpers as h -from environments.factory.base.base_factory import BaseFactory -from environments.factory.base.objects import Agent, Action, Object, Slice, Entity -from environments.factory.base.registers import Entities - -from environments.factory.renderer import Renderer -from environments.utility_classes import MovementProperties - - - -ITEM = 'item' -INVENTORY = 'inventory' -PICK_UP = 'pick_up' -DROP_DOWN = 'drop_down' -ITEM_ACTION = 'item_action' -NO_ITEM = 0 -ITEM_DROP_OFF = -1 - - -def inventory_slice_name(agent): - return f'{agent.name}_{INVENTORY}' - - -class DropOffLocation(Entity): - - def __init__(self, *args, storage_size_until_full: int = 5, **kwargs): - super(DropOffLocation, self).__init__(*args, **kwargs) - self.storage = deque(maxlen=storage_size_until_full) - - def place_item(self, item): - self.storage.append(item) - return True - - @property - def is_full(self): - return self.storage.maxlen == len(self.storage) - - -class ItemProperties(NamedTuple): - n_items: int = 1 # How many items are there at the same time - spawn_frequency: int = 5 # Spawn Frequency in Steps - max_dropoff_storage_size: int = 5 # How many items are needed until the drop off is full - max_agent_storage_size: int = 5 # How many items are needed until the agent inventory is full - - -# noinspection PyAttributeOutsideInit -class ItemFactory(BaseFactory): - def __init__(self, item_properties: ItemProperties, *args, **kwargs): - self.item_properties = item_properties - self._item_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns())) - super(ItemFactory, self).__init__(*args, **kwargs) - - @property - def additional_actions(self) -> Union[str, List[str]]: - return [ITEM_ACTION] - - @property - def additional_entities(self) -> Union[Entities, List[Entities]]: - return [] - - @property - def additional_slices(self) -> Union[Slice, List[Slice]]: - return [Slice(ITEM, np.zeros(self._level_shape))] + [ - Slice(inventory_slice_name(agent), np.zeros(self._level_shape)) for agent in self._agents] - - def _is_item_action(self, action): - if isinstance(action, str): - action = self._actions.by_name(action) - return self._actions[action].name == ITEM_ACTION - - def do_item_action(self, agent): - item_slice = self._slices.by_name(ITEM).slice - inventory_slice = self._slices.by_name(inventory_slice_name(agent)).slice - - if item := item_slice[agent.pos]: - if item == ITEM_DROP_OFF: - - valid = self._item_drop_off.place_item(inventory_slice.sum()) - - - item_slice[agent.pos] = NO_ITEM - return True - else: - return False - - def do_additional_actions(self, agent: Agent, action: int) -> bool: - if self._is_item_action(action): - valid = self.do_item_action(agent) - return valid - else: - raise RuntimeError('This should not happen!!!') - - def do_additional_reset(self) -> None: - self.spawn_drop_off_location() - self.spawn_items(self.n_items) - if self.n_items > 1: - self._next_item_spawn = self.item_properties.spawn_frequency - - def spawn_drop_off_location(self): - single_empty_tile = self._tiles.empty_tiles[0] - self._item_drop_off = DropOffLocation(storage_size_until_full=self.item_properties.max_dropoff_storage_size) - - def calculate_reward(self) -> (int, dict): - pass - - def render(self, mode='human'): - pass - - diff --git a/environments/factory/renderer.py b/environments/factory/renderer.py index bf25f77..42491db 100644 --- a/environments/factory/renderer.py +++ b/environments/factory/renderer.py @@ -8,7 +8,7 @@ from typing import NamedTuple, Any import time -class Entity(NamedTuple): +class RenderEntity(NamedTuple): name: str pos: np.array value: float = 1 @@ -108,7 +108,7 @@ class Renderer: blits.extendleft(vis_rects) if entity.state != 'blank': agent_state_blits = self.blit_params( - Entity(entity.state, (entity.pos[0]+0.12, entity.pos[1]), 0.48, 'scale') + RenderEntity(entity.state, (entity.pos[0] + 0.12, entity.pos[1]), 0.48, 'scale') ) textsurface = self.font.render(str(entity.id), False, (0, 0, 0)) text_blit = dict(source=textsurface, dest=(bp['dest'].center[0]-.07*self.cell_size, @@ -125,6 +125,6 @@ class Renderer: if __name__ == '__main__': renderer = Renderer(fps=2, cell_size=40) for i in range(15): - entity_1 = Entity('agent', [5, i], 1, 'idle', 'idle') + entity_1 = RenderEntity('agent', [5, i], 1, 'idle', 'idle') renderer.render([entity_1]) diff --git a/environments/factory/simple_factory.py b/environments/factory/simple_factory.py index 694e69e..20b340c 100644 --- a/environments/factory/simple_factory.py +++ b/environments/factory/simple_factory.py @@ -1,4 +1,5 @@ import time +from enum import Enum from typing import List, Union, NamedTuple import random @@ -7,24 +8,32 @@ import numpy as np from environments.helpers import Constants as c from environments import helpers as h from environments.factory.base.base_factory import BaseFactory -from environments.factory.base.objects import Agent, Action, Object, Slice +from environments.factory.base.objects import Agent, Action, Slice from environments.factory.base.registers import Entities -from environments.factory.renderer import Renderer, Entity +from environments.factory.renderer import RenderEntity from environments.utility_classes import MovementProperties -DIRT = "dirt" -CLEAN_UP_ACTION = 'clean_up' + +CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP + + +class ObsSlice(Enum): + OWN = -1 + LEVEL = c.LEVEL.value + AGENT = c.AGENT.value class DirtProperties(NamedTuple): clean_amount: int = 1 # How much does the robot clean with one actions. max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent. - gain_amount: float = 0.3 # How much dirt does spawn per tile - spawn_frequency: int = 5 # Spawn Frequency in Steps + gain_amount: float = 0.3 # How much dirt does spawn per tile. + spawn_frequency: int = 5 # Spawn Frequency in Steps. max_local_amount: int = 2 # Max dirt amount per tile. max_global_amount: int = 20 # Max dirt amount in the whole environment. - dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place + dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place. + agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment. + on_obs_slice: Enum = ObsSlice.LEVEL def softmax(x): @@ -41,69 +50,50 @@ def entropy(x): class SimpleFactory(BaseFactory): @property - def additional_actions(self) -> List[Object]: - return [Action(CLEAN_UP_ACTION)] + def additional_actions(self) -> Union[Action, List[Action]]: + super_actions = super(SimpleFactory, self).additional_actions + if self.dirt_properties.agent_can_interact: + super_actions.append(Action(CLEAN_UP_ACTION)) + return super_actions @property def additional_entities(self) -> Union[Entities, List[Entities]]: - return [] + super_entities = super(SimpleFactory, self).additional_entities + return super_entities @property def additional_slices(self) -> List[Slice]: - return [Slice('dirt', np.zeros(self._level_shape))] + super_slices = super(SimpleFactory, self).additional_slices + super_slices.extend([Slice(c.DIRT, np.zeros(self._level_shape))]) + return super_slices - def _is_clean_up_action(self, action: Union[str, int]): - if isinstance(action, str): - action = self._actions.by_name(action) - return self._actions[action].name == CLEAN_UP_ACTION + def _is_clean_up_action(self, action: Union[str, Action, int]): + if isinstance(action, int): + action = self._actions[action] + if isinstance(action, Action): + action = action.name + return action == CLEAN_UP_ACTION.name - def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), **kwargs): + def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), env_seed=time.time_ns(), **kwargs): self.dirt_properties = dirt_properties - self._renderer = None # expensive - don't use it when not required ! - self._dirt_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns())) + self._dirt_rng = np.random.default_rng(env_seed) + kwargs.update(env_seed=env_seed) super(SimpleFactory, self).__init__(*args, **kwargs) def _flush_state(self): super(SimpleFactory, self)._flush_state() - self._obs_cube[self._slices.get_idx_by_name(DIRT)] = self._slices.by_name(DIRT).slice + self._obs_cube[self._slices.get_idx(c.DIRT)] = self._slices.by_enum(c.DIRT).slice - def render(self, mode='human'): - - if not self._renderer: # lazy init - height, width = self._obs_cube.shape[1:] - self._renderer = Renderer(width, height, view_radius=self.pomdp_r, fps=5) - dirt_slice = self._slices.by_name(DIRT).slice - dirt = [Entity('dirt', tile.pos, min(0.15 + dirt_slice[tile.pos], 1.5), 'scale') + def render_additional_assets(self, mode='human'): + additional_assets = super(SimpleFactory, self).render_additional_assets() + dirt_slice = self._slices.by_enum(c.DIRT).slice + dirt = [RenderEntity('dirt', tile.pos, min(0.15 + dirt_slice[tile.pos], 1.5), 'scale') for tile in [tile for tile in self._tiles if dirt_slice[tile.pos]]] - walls = [Entity('wall', pos) - for pos in np.argwhere(self._slices.by_enum(c.LEVEL).slice == c.OCCUPIED_CELL.value)] - - def asset_str(agent): - # What does this abonimation do? - # if any([x is None for x in [self._slices[j] for j in agent.collisions]]): - # print('error') - col_names = [x.name for x in agent.temp_collisions] - if c.AGENT.value in col_names: - return 'agent_collision', 'blank' - elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names: - return c.AGENT.value, 'invalid' - elif self._is_clean_up_action(agent.temp_action): - return c.AGENT.value, 'valid' - else: - return c.AGENT.value, 'idle' - agents = [] - for i, agent in enumerate(self._agents): - name, state = asset_str(agent) - agents.append(Entity(name, agent.pos, 1, 'none', state, i+1, agent.temp_light_map)) - doors = [] - if self.parse_doors: - for i, door in enumerate(self._doors): - name, state = 'door_open' if door.is_open else 'door_closed', 'blank' - agents.append(Entity(name, door.pos, 1, 'none', state, i+1)) - self._renderer.render(dirt+walls+agents+doors) + additional_assets.extend(dirt) + return additional_assets def spawn_dirt(self) -> None: - dirt_slice = self._slices.by_name(DIRT).slice + dirt_slice = self._slices.by_enum(c.DIRT).slice # dirty_tiles = [tile for tile in self._tiles if dirt_slice[tile.pos]] curr_dirt_amount = dirt_slice.sum() if not curr_dirt_amount > self.dirt_properties.max_global_amount: @@ -119,7 +109,7 @@ class SimpleFactory(BaseFactory): pass def clean_up(self, agent: Agent) -> bool: - dirt_slice = self._slices.by_name(DIRT).slice + dirt_slice = self._slices.by_enum(c.DIRT).slice if old_dirt_amount := dirt_slice[agent.pos]: new_dirt_amount = old_dirt_amount - self.dirt_properties.clean_amount dirt_slice[agent.pos] = max(new_dirt_amount, c.FREE_CELL.value) @@ -128,10 +118,11 @@ class SimpleFactory(BaseFactory): return False def do_additional_step(self) -> dict: + info_dict = super(SimpleFactory, self).do_additional_step() if smear_amount := self.dirt_properties.dirt_smear_amount: - dirt_slice = self._slices.by_name(DIRT).slice + dirt_slice = self._slices.by_enum(c.DIRT).slice for agent in self._agents: - if agent.temp_valid and agent.last_pos != h.NO_POS: + if agent.temp_valid and agent.last_pos != c.NO_POS: if dirt := dirt_slice[agent.last_pos]: if smeared_dirt := round(dirt * smear_amount, 2): dirt_slice[agent.last_pos] = max(0, dirt_slice[agent.last_pos]-smeared_dirt) @@ -144,23 +135,30 @@ class SimpleFactory(BaseFactory): self._next_dirt_spawn = self.dirt_properties.spawn_frequency else: self._next_dirt_spawn -= 1 - return {} + return info_dict - def do_additional_actions(self, agent: Agent, action: int) -> bool: - if self._is_clean_up_action(action): - valid = self.clean_up(agent) - return valid + def do_additional_actions(self, agent: Agent, action: int) -> Union[None, bool]: + valid = super(SimpleFactory, self).do_additional_actions(agent, action) + if valid is None: + if self._is_clean_up_action(action): + if self.dirt_properties.agent_can_interact: + valid = self.clean_up(agent) + return valid + else: + return False + else: + return None else: - return c.NOT_VALID.value + return valid def do_additional_reset(self) -> None: + super(SimpleFactory, self).do_additional_reset() self.spawn_dirt() self._next_dirt_spawn = self.dirt_properties.spawn_frequency - def calculate_reward(self) -> (int, dict): - info_dict = dict() - - dirt_slice = self._slices.by_name(DIRT).slice + def calculate_additional_reward(self, agent: Agent) -> (int, dict): + reward, info_dict = super(SimpleFactory, self).calculate_additional_reward(agent) + dirt_slice = self._slices.by_enum(c.DIRT).slice dirty_tiles = [dirt_slice[tile.pos] for tile in self._tiles if dirt_slice[tile.pos]] current_dirt_amount = sum(dirty_tiles) dirty_tile_count = len(dirty_tiles) @@ -173,56 +171,21 @@ class SimpleFactory(BaseFactory): info_dict.update(dirty_tile_count=dirty_tile_count) info_dict.update(dirt_distribution_score=dirt_distribution_score) - try: - # penalty = current_dirt_amount - reward = 0 - except (ZeroDivisionError, RuntimeWarning): - reward = 0 - - for agent in self._agents: - if agent.temp_collisions: - self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}') - - if self._is_clean_up_action(agent.temp_action): - if agent.temp_valid: - reward += 0.5 - self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.') - info_dict.update(dirt_cleaned=1) - else: - reward -= 0.01 - self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.') - info_dict.update({f'{agent.name}_failed_action': 1}) - info_dict.update({f'{agent.name}_failed_action': 1}) - info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1}) - - elif self._actions.is_moving_action(agent.temp_action): - if agent.temp_valid: - # info_dict.update(movement=1) - reward -= 0.00 - else: - # self.print('collision') - reward -= 0.01 - self.print(f'{agent.name} just hit the wall at {agent.pos}.') - info_dict.update({f'{agent.name}_vs_LEVEL': 1}) - - elif self._actions.is_door_usage(agent.temp_action): - if agent.temp_valid: - self.print(f'{agent.name} did just use the door at {agent.pos}.') - info_dict.update(door_used=1) - else: - reward -= 0.01 - self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.') - info_dict.update({f'{agent.name}_failed_action': 1}) - info_dict.update({f'{agent.name}_failed_door_open': 1}) + if agent.temp_collisions: + self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}') + if self._is_clean_up_action(agent.temp_action): + if agent.temp_valid: + reward += 0.5 + self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.') + info_dict.update(dirt_cleaned=1) else: - info_dict.update(no_op=1) - reward -= 0.00 + reward -= 0.01 + self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.') + info_dict.update({f'{agent.name}_failed_action': 1}) + info_dict.update({f'{agent.name}_failed_action': 1}) + info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1}) - for other_agent in agent.temp_collisions: - info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1}) - - self.print(f"reward is {reward}") # Potential based rewards -> # track the last reward , minus the current reward = potential return reward, info_dict diff --git a/environments/helpers.py b/environments/helpers.py index 5d340f6..2ec7ee1 100644 --- a/environments/helpers.py +++ b/environments/helpers.py @@ -5,58 +5,76 @@ from typing import Tuple, Union import numpy as np from pathlib import Path - -# Constants -class Constants(Enum): - WALL = '#' - DOOR = 'D' - DANGER_ZONE = 'x' - LEVEL = 'level' - AGENT = 'Agent' - FREE_CELL = 0 - OCCUPIED_CELL = 1 - - DOORS = 'doors' - CLOSED_DOOR = 1 - OPEN_DOOR = -1 - - ACTION = auto() - COLLISIONS = auto() - VALID = True - NOT_VALID = False - - def __bool__(self): - return bool(self.value) - - LEVELS_DIR = 'levels' TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles'] IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count', 'terminal_observation', 'episode'] -MANHATTAN_MOVES = ['north', 'east', 'south', 'west'] -DIAGONAL_MOVES = ['north_east', 'south_east', 'south_west', 'north_west'] -NO_POS = (-9999, -9999) +# Constants +class Constants(Enum): + WALL = '#' + DOOR = 'D' + DANGER_ZONE = 'x' + LEVEL = 'level' + AGENT = 'Agent' + FREE_CELL = 0 + OCCUPIED_CELL = 1 + NO_POS = (-9999, -9999) -ACTIONMAP = defaultdict(lambda: (0, 0), dict(north=(-1, 0), east=(0, 1), - south=(1, 0), west=(0, -1), - north_east=(-1, +1), south_east=(1, 1), - south_west=(+1, -1), north_west=(-1, -1) - ) + DOORS = 'doors' + CLOSED_DOOR = 1 + OPEN_DOOR = -1 + + ACTION = auto() + COLLISIONS = auto() + VALID = True + NOT_VALID = False + + # Dirt Env + DIRT = 'dirt' + + # Item Env + ITEM = 'item' + INVENTORY = 'inventory' + + def __bool__(self): + return bool(self.value) + + +class ManhattanMoves(Enum): + NORTH = 'north' + EAST = 'east' + SOUTH = 'south' + WEST = 'west' + + +class DiagonalMoves(Enum): + NORTHEAST = 'north_east' + SOUTHEAST = 'south_east' + SOUTHWEST = 'south_west' + NORTHWEST = 'north_west' + + +class EnvActions(Enum): + NOOP = 'no_op' + USE_DOOR = 'use_door' + CLEAN_UP = 'clean_up' + ITEM_ACTION = 'item_action' + + +d = DiagonalMoves +m = ManhattanMoves +c = Constants + +ACTIONMAP = defaultdict(lambda: (0, 0), {m.NORTH.name: (-1, 0), d.NORTHEAST.name: (-1, +1), + m.EAST.name: (0, 1), d.SOUTHEAST.name: (1, 1), + m.SOUTH.name: (1, 0), d.SOUTHWEST.name: (+1, -1), + m.WEST.name: (0, -1), d.NORTHWEST.name: (-1, -1) + } ) -HORIZONTAL_DOOR_MAP = np.asarray([[0, 0, 0], [1, 0, 1], [0, 0, 0]]) -VERTICAL_DOOR_MAP = np.asarray([[0, 1, 0], [0, 0, 0], [0, 1, 0]]) - -HORIZONTAL_DOOR_ZONE_1 = np.asarray([[1, 1, 1], [0, 0, 0], [0, 0, 0]]) -HORIZONTAL_DOOR_ZONE_2 = np.asarray([[0, 0, 0], [0, 0, 0], [1, 1, 1]]) -VERTICAL_DOOR_ZONE_1 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]]) -VERTICAL_DOOR_ZONE_2 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]]) - - - # Utility functions def parse_level(path): @@ -67,13 +85,13 @@ def parse_level(path): return level -def one_hot_level(level, wall_char: Union[Constants, str] = Constants.WALL): +def one_hot_level(level, wall_char: Union[c, str] = c.WALL): grid = np.array(level) binary_grid = np.zeros(grid.shape, dtype=np.int8) - if wall_char in Constants: - binary_grid[grid == wall_char.value] = Constants.OCCUPIED_CELL.value + if wall_char in c: + binary_grid[grid == wall_char.value] = c.OCCUPIED_CELL.value else: - binary_grid[grid == wall_char] = Constants.OCCUPIED_CELL.value + binary_grid[grid == wall_char] = c.OCCUPIED_CELL.value return binary_grid @@ -89,7 +107,22 @@ def check_position(slice_to_check_against: np.ndarray, position_to_check: Tuple[ # Check for collision with level walls valid = valid and not slice_to_check_against[x_pos, y_pos] - return Constants.VALID if valid else Constants.NOT_VALID + return c.VALID if valid else c.NOT_VALID + + +def asset_str(agent): + # What does this abonimation do? + # if any([x is None for x in [self._slices[j] for j in agent.collisions]]): + # print('error') + col_names = [x.name for x in agent.temp_collisions] + if c.AGENT.value in col_names: + return 'agent_collision', 'blank' + elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names: + return c.AGENT.value, 'invalid' + elif agent.temp_valid: + return c.AGENT.value, 'valid' + else: + return c.AGENT.value, 'idle' if __name__ == '__main__': diff --git a/main.py b/main.py index 3bf7d8a..3a7dcc2 100644 --- a/main.py +++ b/main.py @@ -9,6 +9,7 @@ import pandas as pd from stable_baselines3.common.callbacks import CallbackList +from environments.factory.double_task_factory import DoubleTaskFactory, ItemProperties from environments.factory.simple_factory import DirtProperties, SimpleFactory from environments.helpers import IGNORED_DF_COLUMNS from environments.logging.monitor import MonitorCallback @@ -94,11 +95,12 @@ if __name__ == '__main__': dirt_props = DirtProperties(clean_amount=1, gain_amount=0.1, max_global_amount=20, max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05, - dirt_smear_amount=0.0) + dirt_smear_amount=0.0, agent_can_interact=False) + item_props = ItemProperties(n_items=5, agent_can_interact=True) move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True, allow_no_op=False) - train_steps = 2.5e6 + train_steps = 6e5 time_stamp = int(time.time()) out_path = None @@ -106,11 +108,13 @@ if __name__ == '__main__': for modeL_type in [A2C, PPO, DQN]: # ,RegDQN, QRDQN]: for seed in range(3): - with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=True, - movement_properties=move_props, level_name='rooms', frames_to_stack=3, - omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False, - cast_shadows=True, doors_have_area=False, seed=seed - ) as env: + with DoubleTaskFactory(n_agents=1, with_dirt=False, + item_properties=item_props, dirt_properties=None, movement_properties=move_props, + pomdp_radius=2, max_steps=500, parse_doors=True, + level_name='rooms', frames_to_stack=3, + omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False, + cast_shadows=True, doors_have_area=False, seed=seed + ) as env: if modeL_type.__name__ in ["PPO", "A2C"]: kwargs = dict(ent_coef=0.01) diff --git a/requirements.txt b/requirements.txt index 96f4391..1d1a27c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,4 +27,6 @@ gym~=0.18.0 PyYAML~=5.3.1 pyglet~=1.5.0 optuna~=2.7.0 -natsort~=7.1.1 \ No newline at end of file +natsort~=7.1.1 +tqdm~=4.60.0 +networkx~=2.6.1 \ No newline at end of file