From a08ae73656510df6d7260573964d16a85e5816af Mon Sep 17 00:00:00 2001 From: Steffen Illium Date: Mon, 8 May 2023 10:26:05 +0200 Subject: [PATCH] Doors are now seperate --- .../factory/additional/_template/__init__.py | 0 .../additional/_template/_collections.py | 38 ++++ .../factory/additional/_template/_entities.py | 71 +++++++ .../factory/additional/_template/_util.py | 31 +++ .../additional/_template/factory_template.py | 196 ++++++++++++++++++ .../factory/additional/combined_factories.py | 18 +- .../factory/additional/dirt/factory_dirt.py | 2 +- .../factory/additional/doors/__init__.py | 0 .../additional/doors/doors_collections.py | 38 ++++ .../additional/doors/doors_entities.py | 69 ++++++ .../factory/additional/doors/doors_util.py | 31 +++ .../factory/additional/doors/factory_doors.py | 196 ++++++++++++++++++ environments/factory/base/base_factory.py | 122 +---------- environments/factory/base/objects.py | 112 ++-------- environments/factory/base/registers.py | 39 +--- environments/helpers.py | 18 +- quickstart/single_agent_train_dirt_env.py | 6 +- 17 files changed, 725 insertions(+), 262 deletions(-) create mode 100644 environments/factory/additional/_template/__init__.py create mode 100644 environments/factory/additional/_template/_collections.py create mode 100644 environments/factory/additional/_template/_entities.py create mode 100644 environments/factory/additional/_template/_util.py create mode 100644 environments/factory/additional/_template/factory_template.py create mode 100644 environments/factory/additional/doors/__init__.py create mode 100644 environments/factory/additional/doors/doors_collections.py create mode 100644 environments/factory/additional/doors/doors_entities.py create mode 100644 environments/factory/additional/doors/doors_util.py create mode 100644 environments/factory/additional/doors/factory_doors.py diff --git a/environments/factory/additional/_template/__init__.py b/environments/factory/additional/_template/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/environments/factory/additional/_template/_collections.py b/environments/factory/additional/_template/_collections.py new file mode 100644 index 0000000..029a255 --- /dev/null +++ b/environments/factory/additional/_template/_collections.py @@ -0,0 +1,38 @@ +from typing import Union + +from environments.factory.additional.doors.doors_entities import Door +from environments.factory.base.registers import EntityCollection + +from environments.factory.additional.doors.doors_util import Constants as c + + +class Doors(EntityCollection): + + def __init__(self, *args, indicate_area=False, **kwargs): + self.indicate_area = indicate_area + self._area_marked = False + super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs) + + _accepted_objects = Door + + def get_near_position(self, position: (int, int)) -> Union[None, Door]: + try: + return next(door for door in self if position in door.tile.neighboring_floor_pos) + except StopIteration: + return None + + def tick_doors(self): + for door in self: + door.tick() + + def as_array(self): + if not self._area_marked and self.indicate_area: + for door in self: + for tile in door.tile.neighboring_floor: + if self._individual_slices: + pass + else: + pos = (0, *tile.pos) + self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL)) + self._area_marked = True + return super(Doors, self).as_array() diff --git a/environments/factory/additional/_template/_entities.py b/environments/factory/additional/_template/_entities.py new file mode 100644 index 0000000..80fabb8 --- /dev/null +++ b/environments/factory/additional/_template/_entities.py @@ -0,0 +1,71 @@ +from environments.factory.base.objects import Entity +from environments.factory.additional.doors.doors_util import Constants as c + + +class Template(Entity): + """Template for new Entity""" + + # How to define / override properties + @property + def is_blocking(self): + return False + + @property + def can_collide(self): + return False if self.template_attr else True + + @property + def encoding(self): + # This is important as it shadow is checked by occupation value + return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL + + @property + def str_state(self): + return 'open' if self.is_open else 'closed' + + def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs): + super(Template, self).__init__(*args, **kwargs) + self._state = c.CLOSED_DOOR + self.indicate_area = indicate_area + self.auto_close_interval = auto_close_interval + self.time_to_close = -1 + if not closed_on_init: + self._open() + + def summarize_state(self): + state_dict = super().summarize_state() + state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close)) + return state_dict + + @property + def is_closed(self): + return self._state == c.CLOSED_DOOR + + @property + def is_open(self): + return self._state == c.OPEN_DOOR + + @property + def status(self): + return self._state + + def use(self): + if self._state == c.OPEN_DOOR: + self._close() + else: + self._open() + + def tick(self): + if self.is_open and len(self.tile) == 1 and self.time_to_close: + self.time_to_close -= 1 + elif self.is_open and not self.time_to_close and len(self.tile) == 1: + self.use() + + def _open(self): + self._state = c.OPEN_DOOR + self._collection.notify_change_to_value(self) + self.time_to_close = self.auto_close_interval + + def _close(self): + self._state = c.CLOSED_DOOR + self._collection.notify_change_to_value(self) diff --git a/environments/factory/additional/_template/_util.py b/environments/factory/additional/_template/_util.py new file mode 100644 index 0000000..6e32e09 --- /dev/null +++ b/environments/factory/additional/_template/_util.py @@ -0,0 +1,31 @@ + +from typing import NamedTuple + +from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions + + +class Constants(BaseConstants): + DOOR = 'Door' # Identifier of Single-Door Entities. + DOORS = 'Doors' # Identifier of Door-objects and sets (collections). + DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files. + + ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation + OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation + CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation + + CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state + OPEN_DOOR = 'open' # Identifier to compare door-is-open state + # ACCESS_DOOR = 'access' # Identifier to compare access positions + + +class Actions(BaseActions): + USE_DOOR = 'use_door' + + +class RewardsDoor(NamedTuple): + USE_DOOR_VALID: float = -0.00 + USE_DOOR_FAIL: float = -0.01 + + +class DoorProperties(NamedTuple): + indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation. diff --git a/environments/factory/additional/_template/factory_template.py b/environments/factory/additional/_template/factory_template.py new file mode 100644 index 0000000..2268cc9 --- /dev/null +++ b/environments/factory/additional/_template/factory_template.py @@ -0,0 +1,196 @@ +import time +from typing import List, Union, Dict +import random + +import numpy as np + +from environments.factory.additional.doors.doors_collections import Doors +from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions +from environments.factory.base.base_factory import BaseFactory +from environments.factory.base.objects import Agent, Action +from environments.factory.base.registers import Entities + +from environments import helpers as h + +from environments.factory.base.renderer import RenderEntity +from environments.utility_classes import ObservationProperties + + +def softmax(x): + """Compute softmax values for each sets of scores in x.""" + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum() + + +def entropy(x): + return -(x * np.log(x + 1e-8)).sum() + + +c = Constants +a = Actions + + +# noinspection PyAttributeOutsideInit, PyAbstractClass +class DoorFactory(BaseFactory): + + @property + def actions_hook(self) -> Union[Action, List[Action]]: + super_actions = super().actions_hook + super_actions.append(Action(str_ident=a.USE_DOOR)) + return super_actions + + @property + def entities_hook(self) -> Dict[(str, Entities)]: + super_entities = super().entities_hook + + parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL) + parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0) + if np.any(parsed_doors): + door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)] + doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area, + entity_kwargs=dict() + ) + super_entities.update(({c.DOORS: doors})) + return super_entities + + def __init__(self, *args, + door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(), + env_seed=time.time_ns(), **kwargs): + if isinstance(door_properties, dict): + door_properties = DoorProperties(**door_properties) + if isinstance(rewards_door, dict): + rewards_door = RewardsDoor(**rewards_door) + self.door_properties = door_properties + self.rewards_door = rewards_door + self._door_rng = np.random.default_rng(env_seed) + self._doors: Doors + kwargs.update(env_seed=env_seed) + # TODO: Reset ---> document this + super().__init__(*args, **kwargs) + + def render_assets_hook(self, mode='human'): + additional_assets = super().render_assets_hook() + doors = [] + for i, door in enumerate(self[c.DOORS]): + name, state = 'door_open' if door.is_open else 'door_closed', 'blank' + doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1)) + additional_assets.extend(doors) + return additional_assets + + + def step_hook(self) -> (List[dict], dict): + super_reward_info = super().step_hook() + # Step the door close intervall + # TODO: Maybe move this to self.post_step_hook? May collide with reward calculation. + if doors := self[c.DOORS]: + doors.tick_doors() + return super_reward_info + + def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict): + action_result = super().do_additional_actions(agent, action) + if action_result is None: + if action == a.USE_DOOR: + return self.use_door_action(agent) + else: + return None + else: + return action_result + + def use_door_action(self, agent: Agent): + + # Check if agent really is standing on a door: + door = self[c.DOORS].get_near_position(agent.pos) + if door is not None: + door.use() + valid = c.VALID + self.print(f'{agent.name} just used a {door.name} at {door.pos}') + info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1} + # When he doesn't... + else: + valid = c.NOT_VALID + info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1} + self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.') + + reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL, + reason=a.USE_DOOR, info=info_dict) + + return valid, reward + + def reset_hook(self) -> None: + super().reset_hook() + # There is nothing to reset. + + def check_additional_done(self) -> (bool, dict): + super_done, super_dict = super().check_additional_done() + return super_done, super_dict + + def observations_hook(self) -> Dict[str, np.typing.ArrayLike]: + additional_observations = super().observations_hook() + + additional_observations.update({c.DOORS: self[c.DOORS].as_array()}) + return additional_observations + + def post_step_hook(self) -> List[Dict[str, int]]: + super_post_step = super(DoorFactory, self).post_step_hook() + return super_post_step + + +if __name__ == '__main__': + from environments.utility_classes import AgentRenderOptions as aro + render = True + + door_props = DoorProperties( + indicate_door_area=True + ) + + obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True, + pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True + ) + + move_props = {'allow_square_movement': True, + 'allow_diagonal_movement': False, + 'allow_no_op': False} + import time + global_timings = [] + for i in range(10): + + factory = DoorFactory(n_agents=10, done_at_collision=False, + level_name='rooms', max_steps=1000, + obs_prop=obs_props, parse_doors=True, + verbose=True, + mv_prop=move_props, dirt_prop=door_props, + # inject_agents=[TSPDirtAgent], + ) + + # noinspection DuplicatedCode + n_actions = factory.action_space.n - 1 + _ = factory.observation_space + obs_space = factory.observation_space + obs_space_named = factory.named_observation_space + action_space_named = factory.named_action_space + times = [] + for epoch in range(10): + start_time = time.time() + random_actions = [[random.randint(0, n_actions) for _ + in range(factory.n_agents)] for _ + in range(factory.max_steps+1)] + env_state = factory.reset() + if render: + factory.render() + # tsp_agent = factory.get_injected_agents()[0] + + rwrd = 0 + for agent_i_action in random_actions: + # agent_i_action = tsp_agent.predict() + env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action) + rwrd += step_rwrd + if render: + factory.render() + if done_bool: + break + times.append(time.time() - start_time) + # print(f'Factory run {epoch} done, reward is:\n {r}') + print('Mean Time Taken: ', sum(times) / 10) + global_timings.extend(times) + print('Mean Time Taken: ', sum(global_timings) / len(global_timings)) + print('Median Time Taken: ', global_timings[len(global_timings)//2]) diff --git a/environments/factory/additional/combined_factories.py b/environments/factory/additional/combined_factories.py index 1c43488..bab9af4 100644 --- a/environments/factory/additional/combined_factories.py +++ b/environments/factory/additional/combined_factories.py @@ -7,9 +7,17 @@ from environments.factory.additional.btry.factory_battery import BatteryFactory from environments.factory.additional.dest.factory_dest import DestFactory from environments.factory.additional.dirt.dirt_util import DirtProperties from environments.factory.additional.dirt.factory_dirt import DirtFactory +from environments.factory.additional.doors.factory_doors import DoorFactory from environments.factory.additional.item.factory_item import ItemFactory +# noinspection PyAbstractClass +class DoorDirtFactory(DoorFactory, DirtFactory): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + +# noinspection PyAbstractClass class DirtItemFactory(ItemFactory, DirtFactory): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -38,8 +46,6 @@ if __name__ == '__main__': render = True - dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0) - obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True, pomdp_r=2, additional_agent_placeholder=None) @@ -47,13 +53,13 @@ if __name__ == '__main__': 'allow_diagonal_movement': False, 'allow_no_op': False} - factory = DirtBatteryFactory(n_agents=5, done_at_collision=False, + factory = DoorDirtFactory(n_agents=10, done_at_collision=False, level_name='rooms', max_steps=400, obs_prop=obs_props, parse_doors=True, record_episodes=True, verbose=True, - btry_prop=BatteryProperties(), - mv_prop=move_props, dirt_prop=dirt_props - ) + dirt_prop=DirtProperties(), + mv_prop=move_props) + # noinspection DuplicatedCode n_actions = factory.action_space.n - 1 diff --git a/environments/factory/additional/dirt/factory_dirt.py b/environments/factory/additional/dirt/factory_dirt.py index 6a06a79..50e484c 100644 --- a/environments/factory/additional/dirt/factory_dirt.py +++ b/environments/factory/additional/dirt/factory_dirt.py @@ -44,7 +44,7 @@ class DirtFactory(BaseFactory): def entities_hook(self) -> Dict[(str, Entities)]: super_entities = super().entities_hook dirt_register = DirtPiles(self.dirt_prop, self._level_shape) - super_entities.update(({c.DIRT: dirt_register})) + super_entities.update({c.DIRT: dirt_register}) return super_entities def __init__(self, *args, diff --git a/environments/factory/additional/doors/__init__.py b/environments/factory/additional/doors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/environments/factory/additional/doors/doors_collections.py b/environments/factory/additional/doors/doors_collections.py new file mode 100644 index 0000000..029a255 --- /dev/null +++ b/environments/factory/additional/doors/doors_collections.py @@ -0,0 +1,38 @@ +from typing import Union + +from environments.factory.additional.doors.doors_entities import Door +from environments.factory.base.registers import EntityCollection + +from environments.factory.additional.doors.doors_util import Constants as c + + +class Doors(EntityCollection): + + def __init__(self, *args, indicate_area=False, **kwargs): + self.indicate_area = indicate_area + self._area_marked = False + super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs) + + _accepted_objects = Door + + def get_near_position(self, position: (int, int)) -> Union[None, Door]: + try: + return next(door for door in self if position in door.tile.neighboring_floor_pos) + except StopIteration: + return None + + def tick_doors(self): + for door in self: + door.tick() + + def as_array(self): + if not self._area_marked and self.indicate_area: + for door in self: + for tile in door.tile.neighboring_floor: + if self._individual_slices: + pass + else: + pos = (0, *tile.pos) + self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL)) + self._area_marked = True + return super(Doors, self).as_array() diff --git a/environments/factory/additional/doors/doors_entities.py b/environments/factory/additional/doors/doors_entities.py new file mode 100644 index 0000000..5598a5b --- /dev/null +++ b/environments/factory/additional/doors/doors_entities.py @@ -0,0 +1,69 @@ +from environments.factory.base.objects import Entity +from environments.factory.additional.doors.doors_util import Constants as c + + +class Door(Entity): + + @property + def is_blocking(self): + return False if self.is_open else True + + @property + def can_collide(self): + return False if self.is_open else True + + @property + def encoding(self): + # This is important as it shadow is checked by occupation value + return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL + + @property + def str_state(self): + return 'open' if self.is_open else 'closed' + + def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs): + super(Door, self).__init__(*args, **kwargs) + self._state = c.CLOSED_DOOR + self.indicate_area = indicate_area + self.auto_close_interval = auto_close_interval + self.time_to_close = -1 + if not closed_on_init: + self._open() + + def summarize_state(self): + state_dict = super().summarize_state() + state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close)) + return state_dict + + @property + def is_closed(self): + return self._state == c.CLOSED_DOOR + + @property + def is_open(self): + return self._state == c.OPEN_DOOR + + @property + def status(self): + return self._state + + def use(self): + if self._state == c.OPEN_DOOR: + self._close() + else: + self._open() + + def tick(self): + if self.is_open and len(self.tile) == 1 and self.time_to_close: + self.time_to_close -= 1 + elif self.is_open and not self.time_to_close and len(self.tile) == 1: + self.use() + + def _open(self): + self._state = c.OPEN_DOOR + self._collection.notify_change_to_value(self) + self.time_to_close = self.auto_close_interval + + def _close(self): + self._state = c.CLOSED_DOOR + self._collection.notify_change_to_value(self) diff --git a/environments/factory/additional/doors/doors_util.py b/environments/factory/additional/doors/doors_util.py new file mode 100644 index 0000000..6e32e09 --- /dev/null +++ b/environments/factory/additional/doors/doors_util.py @@ -0,0 +1,31 @@ + +from typing import NamedTuple + +from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions + + +class Constants(BaseConstants): + DOOR = 'Door' # Identifier of Single-Door Entities. + DOORS = 'Doors' # Identifier of Door-objects and sets (collections). + DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files. + + ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation + OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation + CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation + + CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state + OPEN_DOOR = 'open' # Identifier to compare door-is-open state + # ACCESS_DOOR = 'access' # Identifier to compare access positions + + +class Actions(BaseActions): + USE_DOOR = 'use_door' + + +class RewardsDoor(NamedTuple): + USE_DOOR_VALID: float = -0.00 + USE_DOOR_FAIL: float = -0.01 + + +class DoorProperties(NamedTuple): + indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation. diff --git a/environments/factory/additional/doors/factory_doors.py b/environments/factory/additional/doors/factory_doors.py new file mode 100644 index 0000000..2268cc9 --- /dev/null +++ b/environments/factory/additional/doors/factory_doors.py @@ -0,0 +1,196 @@ +import time +from typing import List, Union, Dict +import random + +import numpy as np + +from environments.factory.additional.doors.doors_collections import Doors +from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions +from environments.factory.base.base_factory import BaseFactory +from environments.factory.base.objects import Agent, Action +from environments.factory.base.registers import Entities + +from environments import helpers as h + +from environments.factory.base.renderer import RenderEntity +from environments.utility_classes import ObservationProperties + + +def softmax(x): + """Compute softmax values for each sets of scores in x.""" + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum() + + +def entropy(x): + return -(x * np.log(x + 1e-8)).sum() + + +c = Constants +a = Actions + + +# noinspection PyAttributeOutsideInit, PyAbstractClass +class DoorFactory(BaseFactory): + + @property + def actions_hook(self) -> Union[Action, List[Action]]: + super_actions = super().actions_hook + super_actions.append(Action(str_ident=a.USE_DOOR)) + return super_actions + + @property + def entities_hook(self) -> Dict[(str, Entities)]: + super_entities = super().entities_hook + + parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL) + parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0) + if np.any(parsed_doors): + door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)] + doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area, + entity_kwargs=dict() + ) + super_entities.update(({c.DOORS: doors})) + return super_entities + + def __init__(self, *args, + door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(), + env_seed=time.time_ns(), **kwargs): + if isinstance(door_properties, dict): + door_properties = DoorProperties(**door_properties) + if isinstance(rewards_door, dict): + rewards_door = RewardsDoor(**rewards_door) + self.door_properties = door_properties + self.rewards_door = rewards_door + self._door_rng = np.random.default_rng(env_seed) + self._doors: Doors + kwargs.update(env_seed=env_seed) + # TODO: Reset ---> document this + super().__init__(*args, **kwargs) + + def render_assets_hook(self, mode='human'): + additional_assets = super().render_assets_hook() + doors = [] + for i, door in enumerate(self[c.DOORS]): + name, state = 'door_open' if door.is_open else 'door_closed', 'blank' + doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1)) + additional_assets.extend(doors) + return additional_assets + + + def step_hook(self) -> (List[dict], dict): + super_reward_info = super().step_hook() + # Step the door close intervall + # TODO: Maybe move this to self.post_step_hook? May collide with reward calculation. + if doors := self[c.DOORS]: + doors.tick_doors() + return super_reward_info + + def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict): + action_result = super().do_additional_actions(agent, action) + if action_result is None: + if action == a.USE_DOOR: + return self.use_door_action(agent) + else: + return None + else: + return action_result + + def use_door_action(self, agent: Agent): + + # Check if agent really is standing on a door: + door = self[c.DOORS].get_near_position(agent.pos) + if door is not None: + door.use() + valid = c.VALID + self.print(f'{agent.name} just used a {door.name} at {door.pos}') + info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1} + # When he doesn't... + else: + valid = c.NOT_VALID + info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1} + self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.') + + reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL, + reason=a.USE_DOOR, info=info_dict) + + return valid, reward + + def reset_hook(self) -> None: + super().reset_hook() + # There is nothing to reset. + + def check_additional_done(self) -> (bool, dict): + super_done, super_dict = super().check_additional_done() + return super_done, super_dict + + def observations_hook(self) -> Dict[str, np.typing.ArrayLike]: + additional_observations = super().observations_hook() + + additional_observations.update({c.DOORS: self[c.DOORS].as_array()}) + return additional_observations + + def post_step_hook(self) -> List[Dict[str, int]]: + super_post_step = super(DoorFactory, self).post_step_hook() + return super_post_step + + +if __name__ == '__main__': + from environments.utility_classes import AgentRenderOptions as aro + render = True + + door_props = DoorProperties( + indicate_door_area=True + ) + + obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True, + pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True + ) + + move_props = {'allow_square_movement': True, + 'allow_diagonal_movement': False, + 'allow_no_op': False} + import time + global_timings = [] + for i in range(10): + + factory = DoorFactory(n_agents=10, done_at_collision=False, + level_name='rooms', max_steps=1000, + obs_prop=obs_props, parse_doors=True, + verbose=True, + mv_prop=move_props, dirt_prop=door_props, + # inject_agents=[TSPDirtAgent], + ) + + # noinspection DuplicatedCode + n_actions = factory.action_space.n - 1 + _ = factory.observation_space + obs_space = factory.observation_space + obs_space_named = factory.named_observation_space + action_space_named = factory.named_action_space + times = [] + for epoch in range(10): + start_time = time.time() + random_actions = [[random.randint(0, n_actions) for _ + in range(factory.n_agents)] for _ + in range(factory.max_steps+1)] + env_state = factory.reset() + if render: + factory.render() + # tsp_agent = factory.get_injected_agents()[0] + + rwrd = 0 + for agent_i_action in random_actions: + # agent_i_action = tsp_agent.predict() + env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action) + rwrd += step_rwrd + if render: + factory.render() + if done_bool: + break + times.append(time.time() - start_time) + # print(f'Factory run {epoch} done, reward is:\n {r}') + print('Mean Time Taken: ', sum(times) / 10) + global_timings.extend(times) + print('Mean Time Taken: ', sum(global_timings) / len(global_timings)) + print('Median Time Taken: ', global_timings[len(global_timings)//2]) diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py index 5ef9227..4555e3d 100644 --- a/environments/factory/base/base_factory.py +++ b/environments/factory/base/base_factory.py @@ -16,7 +16,7 @@ from environments.helpers import Constants as c from environments.helpers import EnvActions as a from environments.helpers import RewardsBase from environments.factory.base.objects import Agent, Floor, Action -from environments.factory.base.registers import Actions, Entities, Agents, Doors, Floors, Walls, PlaceHolders, \ +from environments.factory.base.registers import Actions, Entities, Agents, Floors, Walls, PlaceHolders, \ GlobalPositions from environments.utility_classes import MovementProperties, ObservationProperties, MarlFrameStack from environments.utility_classes import AgentRenderOptions as a_obs @@ -88,8 +88,8 @@ class BaseFactory(gym.Env): mv_prop: MovementProperties = MovementProperties(), obs_prop: ObservationProperties = ObservationProperties(), rewards_base: RewardsBase = RewardsBase(), - parse_doors=False, done_at_collision=False, inject_agents: Union[None, List] = None, - verbose=False, doors_have_area=True, env_seed=time.time_ns(), individual_rewards=False, + done_at_collision=False, inject_agents: Union[None, List] = None, + verbose=False, env_seed=time.time_ns(), individual_rewards=False, class_name='', **kwargs): if class_name: @@ -105,8 +105,6 @@ class BaseFactory(gym.Env): assert obs_prop.frames_to_stack != 1 and \ obs_prop.frames_to_stack >= 0, \ "'frames_to_stack' cannot be negative or 1." - assert doors_have_area or not obs_prop.indicate_door_area, \ - '"indicate_door_area" can only active, when "doors_have_area"' if kwargs: print(f'Following kwargs were passed, but ignored: {kwargs}') @@ -133,9 +131,7 @@ class BaseFactory(gym.Env): self.done_at_collision = done_at_collision self._record_episodes = False - self.parse_doors = parse_doors self._injected_agents = inject_agents or [] - self.doors_have_area = doors_have_area self.individual_rewards = individual_rewards # TODO: Reset ---> document this @@ -174,20 +170,9 @@ class BaseFactory(gym.Env): # NOPOS self._NO_POS_TILE = Floor(c.NO_POS, None) - # Doors - if self.parse_doors: - parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR) - parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0) - if np.any(parsed_doors): - door_tiles = [floor.by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)] - doors = Doors.from_tiles(door_tiles, self._level_shape, have_area=self.obs_prop.indicate_door_area, - entity_kwargs=dict(context=floor) - ) - self._entities.add_additional_items({c.DOORS: doors}) - # Actions # TODO: Move this to Agent init, so that agents can have individual action sets. - self._actions = Actions(self.mv_prop, can_use_doors=self.parse_doors) + self._actions = Actions(self.mv_prop) if additional_actions := self.actions_hook: self._actions.add_additional_items(additional_actions) @@ -263,8 +248,6 @@ class BaseFactory(gym.Env): elif a.NOOP == action_obj: action_valid = c.VALID reward = dict(value=self.rewards_base.NOOP, reason=a.NOOP, info={f'{agent.name}_NOOP': 1, 'NOOP': 1}) - elif a.USE_DOOR == action_obj: - action_valid, reward = self._handle_door_interaction(agent) else: # noinspection PyTupleAssignmentBalance action_valid, reward = self.do_additional_actions(agent, action_obj) @@ -282,12 +265,9 @@ class BaseFactory(gym.Env): for tile in tiles_with_collisions: guests = tile.guests_that_can_collide for i, guest in enumerate(guests): - # This does make a copy, but is faster than.copy() - this_collisions = guests[:] - del this_collisions[i] - assert hasattr(guest, 'step_result') - for collision in this_collisions: - guest.step_result['collisions'].append(collision) + for j, collision in enumerate(guests): + if j != i and hasattr(guest, 'step_result'): + guest.step_result['collisions'].append(collision) done = False if self.done_at_collision: @@ -299,11 +279,6 @@ class BaseFactory(gym.Env): done = done or additional_done info.update(additional_done_info) - # Step the door close intervall - if self.parse_doors: - if doors := self[c.DOORS]: - doors.tick_doors() - # Finalize reward, reward_info = self.build_reward_result(rewards) @@ -319,41 +294,14 @@ class BaseFactory(gym.Env): info.update(post_step_info) obs, _ = self._build_observations() - return obs, reward, done, info - def _handle_door_interaction(self, agent) -> (bool, dict): - if doors := self[c.DOORS]: - # Check if agent really is standing on a door: - if self.doors_have_area: - door = doors.get_near_position(agent.pos) - else: - door = doors.by_pos(agent.pos) - if door is not None: - door.use() - valid = c.VALID - self.print(f'{agent.name} just used a {door.name} at {door.pos}') - info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1} - # When he doesn't... - else: - valid = c.NOT_VALID - info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1} - self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.') - - else: - raise RuntimeError('This should not happen, since the door action should not be available.') - reward = dict(value=self.rewards_base.USE_DOOR_VALID if valid else self.rewards_base.USE_DOOR_FAIL, - reason=a.USE_DOOR, info=info_dict) - - return valid, reward - def _build_observations(self) -> np.typing.ArrayLike: # Observation dict: per_agent_expl_idx = dict() per_agent_obsn = dict() # Generel Observations lvl_obs = self[c.WALLS].as_array() - door_obs = self[c.DOORS].as_array() if self.parse_doors else None if self.obs_prop.render_agents == a_obs.NOT: global_agent_obs = None elif self.obs_prop.omit_agent_self and self.n_agents == 1: @@ -391,8 +339,6 @@ class BaseFactory(gym.Env): obs_dict[c.AGENT] = agent_obs[:] if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None: obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs - if self.parse_doors and door_obs is not None: - obs_dict[c.DOORS] = door_obs[:] obs_dict.update(add_obs_dict) obsn = np.vstack(list(obs_dict.values())) if self.obs_prop.pomdp_r: @@ -430,33 +376,11 @@ class BaseFactory(gym.Env): raise e obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL - door_shadowing = False - if self.parse_doors: - if doors := self[c.DOORS]: - if door := doors.by_pos(agent.pos): - if door.is_closed: - for group in door.connectivity_subgroups: - if agent.last_pos not in group: - door_shadowing = True - if self._pomdp_r: - blocking = [ - tuple(np.subtract(x, agent.pos) + (self._pomdp_r, self._pomdp_r)) - for x in group] - xs, ys = zip(*blocking) - else: - xs, ys = zip(*group) - - # noinspection PyUnresolvedReferences - obs_block_light[:, xs, ys] = False - light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int).squeeze()) if self._pomdp_r: light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape)) else: light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape)) - if door_shadowing: - # noinspection PyUnboundLocalVariable - light_block_map[xs, ys] = 0 agent.step_result['lightmap'] = light_block_map @@ -550,35 +474,13 @@ class BaseFactory(gym.Env): y_new = agent.y + y_diff new_tile = self[c.FLOOR].by_pos((x_new, y_new)) - if new_tile: + if new_tile and not np.any([x.is_blocking for x in new_tile.guests]): valid = c.VALID else: tile = agent.tile valid = c.VALID return tile, valid - if self.parse_doors and agent.last_pos != c.NO_POS: - if doors := self[c.DOORS]: - if self.doors_have_area: - if door := doors.by_pos(new_tile.pos): - if door.is_closed: - return agent.tile, c.NOT_VALID - else: # door.is_closed: - pass - - if door := doors.by_pos(agent.pos): - if door.is_open: - pass - else: # door.is_closed: - if door.is_linked(agent.last_pos, new_tile.pos): - pass - else: - return agent.tile, c.NOT_VALID - else: - pass - else: - pass - return new_tile, valid def build_reward_result(self, global_env_rewards: list) -> (int, dict): @@ -649,14 +551,10 @@ class BaseFactory(gym.Env): for i, agent in enumerate(self[c.AGENT]): name, state = h.asset_str(agent) agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.step_result['lightmap'])) - doors = [] - if self.parse_doors: - for i, door in enumerate(self[c.DOORS]): - name, state = 'door_open' if door.is_open else 'door_closed', 'blank' - doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1)) + additional_assets = self.render_assets_hook() - return self._renderer.render(walls + doors + additional_assets + agents) + return self._renderer.render(walls + additional_assets + agents) def save_params(self, filepath: Path): # noinspection PyProtectedMember diff --git a/environments/factory/base/objects.py b/environments/factory/base/objects.py index 7afa9d7..3b47ad9 100644 --- a/environments/factory/base/objects.py +++ b/environments/factory/base/objects.py @@ -1,12 +1,10 @@ from collections import defaultdict -from typing import Union +from typing import Union, List -import networkx as nx import numpy as np from environments import helpers as h from environments.helpers import Constants as c -import itertools ########################################################################## # ##################### Base Object Building Blocks ######################### # @@ -88,6 +86,10 @@ class EnvObject(Object): class Entity(EnvObject): """Full Env Entity that lives on the env Grid. Doors, Items, DirtPile etc...""" + @property + def is_blocking(self): + return False + @property def can_collide(self): return False @@ -226,6 +228,21 @@ class GlobalPosition(BoundingMixin, EnvObject): class Floor(EnvObject): + @property + def neighboring_floor_pos(self): + return [x.pos for x in self.neighboring_floor] + + @property + def neighboring_floor(self): + if self._neighboring_floor: + pass + else: + self._neighboring_floor = [x for x in [self._collection.by_pos(np.add(self.pos, pos)) + for pos in h.POS_MASK.reshape(-1, 2) + if not np.all(pos == [0, 0])] + if x] + return self._neighboring_floor + @property def encoding(self): return c.FREE_CELL @@ -254,6 +271,7 @@ class Floor(EnvObject): super(Floor, self).__init__(*args, **kwargs) self._guests = dict() self._pos = tuple(pos) + self._neighboring_floor: List[Floor] = list() def __len__(self): return len(self._guests) @@ -298,94 +316,6 @@ class Wall(Floor): pass -class Door(Entity): - - @property - def can_collide(self): - if self.has_area: - return False if self.is_open else True - else: - return False - - @property - def encoding(self): - # This is important as it shadow is checked by occupation value - return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL - - @property - def str_state(self): - return 'open' if self.is_open else 'closed' - - @property - def access_area(self): - return [node for node in self.connectivity.nodes - if node not in range(len(self.connectivity_subgroups)) and node != self.pos] - - def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False, **kwargs): - super(Door, self).__init__(*args, **kwargs) - self._state = c.CLOSED_DOOR - self.has_area = has_area - self.auto_close_interval = auto_close_interval - self.time_to_close = -1 - neighbor_pos = list(itertools.product([-1, 1, 0], repeat=2))[:-1] - neighbor_tiles = [context.by_pos(tuple([sum(x) for x in zip(self.pos, diff)])) for diff in neighbor_pos] - neighbor_pos = [x.pos for x in neighbor_tiles if x] - self.connectivity = h.points_to_graph(neighbor_pos) - self.connectivity_subgroups = list(nx.algorithms.components.connected_components(self.connectivity)) - for idx, group in enumerate(self.connectivity_subgroups): - for tile_pos in group: - self.connectivity.add_edge(tile_pos, idx) - if not closed_on_init: - self._open() - - def summarize_state(self): - state_dict = super().summarize_state() - state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close)) - return state_dict - - @property - def is_closed(self): - return self._state == c.CLOSED_DOOR - - @property - def is_open(self): - return self._state == c.OPEN_DOOR - - @property - def status(self): - return self._state - - def use(self): - if self._state == c.OPEN_DOOR: - self._close() - else: - self._open() - - def tick(self): - if self.is_open and len(self.tile) == 1 and self.time_to_close: - self.time_to_close -= 1 - elif self.is_open and not self.time_to_close and len(self.tile) == 1: - self.use() - - def _open(self): - self.connectivity.add_edges_from([(self.pos, x) for x in range(len(self.connectivity_subgroups))]) - self._state = c.OPEN_DOOR - self._collection.notify_change_to_value(self) - self.time_to_close = self.auto_close_interval - - def _close(self): - self.connectivity.remove_node(self.pos) - self._state = c.CLOSED_DOOR - self._collection.notify_change_to_value(self) - - def is_linked(self, old_pos, new_pos): - try: - _ = nx.shortest_path(self.connectivity, old_pos, new_pos) - return True - except nx.exception.NetworkXNoPath: - return False - - class Agent(MoveableEntity): @property diff --git a/environments/factory/base/registers.py b/environments/factory/base/registers.py index 2664786..96f47f1 100644 --- a/environments/factory/base/registers.py +++ b/environments/factory/base/registers.py @@ -6,7 +6,7 @@ from typing import List, Union, Dict, Tuple import numpy as np import six -from environments.factory.base.objects import Entity, Floor, Agent, Door, Action, Wall, PlaceHolder, GlobalPosition, \ +from environments.factory.base.objects import Entity, Floor, Agent, Action, Wall, PlaceHolder, GlobalPosition, \ Object, EnvObject from environments.utility_classes import MovementProperties from environments import helpers as h @@ -452,38 +452,6 @@ class Agents(MovingEntityObjectCollection): self._collection[agent.name] = agent -class Doors(EntityCollection): - - def __init__(self, *args, have_area: bool = False, **kwargs): - self.have_area = have_area - self._area_marked = False - super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs) - - _accepted_objects = Door - - def get_near_position(self, position: (int, int)) -> Union[None, Door]: - try: - return next(door for door in self if position in door.access_area) - except StopIteration: - return None - - def tick_doors(self): - for door in self: - door.tick() - - def as_array(self): - if self.have_area and not self._area_marked: - for door in self: - for pos in door.access_area: - if self._individual_slices: - pass - else: - pos = (0, *pos) - self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL)) - self._area_marked = True - return super(Doors, self).as_array() - - class Actions(ObjectCollection): _accepted_objects = Action @@ -492,11 +460,10 @@ class Actions(ObjectCollection): return self._movement_actions # noinspection PyTypeChecker - def __init__(self, movement_properties: MovementProperties, can_use_doors=False): + def __init__(self, movement_properties: MovementProperties): self.allow_no_op = movement_properties.allow_no_op self.allow_diagonal_movement = movement_properties.allow_diagonal_movement self.allow_square_movement = movement_properties.allow_square_movement - self.can_use_doors = can_use_doors super(Actions, self).__init__() # Move this to Baseclass, Env init? @@ -507,8 +474,6 @@ class Actions(ObjectCollection): self.add_additional_items([self._accepted_objects(str_ident=direction) for direction in h.EnvActions.diagonal_move()]) self._movement_actions = self._collection.copy() - if self.can_use_doors: - self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.USE_DOOR)]) if self.allow_no_op: self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.NOOP)]) diff --git a/environments/helpers.py b/environments/helpers.py index 5298092..c385813 100644 --- a/environments/helpers.py +++ b/environments/helpers.py @@ -33,6 +33,10 @@ IGNORED_DF_COLUMNS = ['Episode', 'Run', # For plotting, which values ar 'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count', 'terminal_observation', 'episode'] +POS_MASK = np.asarray([[[-1, -1], [0, -1], [1, -1]], + [[-1, 0], [0, 0], [1, 0]], + [[-1, 1], [0, 1], [1, 1]]]) + class Constants: @@ -42,12 +46,10 @@ class Constants: """ WALL = '#' # Wall tile identifier for resolving the string based map files. - DOOR = 'D' # Door identifier for resolving the string based map files. DANGER_ZONE = 'x' # Dange Zone tile identifier for resolving the string based map files. WALLS = 'Walls' # Identifier of Wall-objects and sets (collections). FLOOR = 'Floor' # Identifier of Floor-objects and sets (collections). - DOORS = 'Doors' # Identifier of Door-objects and sets (collections). LEVEL = 'Level' # Identifier of Level-objects and sets (collections). AGENT = 'Agent' # Identifier of Agent-objects and sets (collections). AGENT_PLACEHOLDER = 'AGENT_PLACEHOLDER' # Identifier of Placeholder-objects and sets (collections). @@ -56,16 +58,9 @@ class Constants: FREE_CELL = 0 # Free-Cell value used in observation OCCUPIED_CELL = 1 # Occupied-Cell value used in observation SHADOWED_CELL = -1 # Shadowed-Cell value used in observation - ACCESS_DOOR_CELL = 1/3 # Access-door-Cell value used in observation - OPEN_DOOR_CELL = 2/3 # Open-door-Cell value used in observation - CLOSED_DOOR_CELL = 3/3 # Closed-door-Cell value used in observation NO_POS = (-9999, -9999) # Invalid Position value used in the environment (something is off-grid) - CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state - OPEN_DOOR = 'open' # Identifier to compare door-is-open state - # ACCESS_DOOR = 'access' # Identifier to compare access positions - ACTION = 'action' # Identifier of Action-objects and sets (collections). COLLISION = 'collision' # Identifier to use in the context of collitions. VALID = True # Identifier to rename boolean values in the context of actions. @@ -90,7 +85,6 @@ class EnvActions: # Other # MOVE = 'move' NOOP = 'no_op' - USE_DOOR = 'use_door' _ACTIONMAP = defaultdict(lambda: (0, 0), {NORTH: (-1, 0), NORTHEAST: (-1, 1), @@ -100,6 +94,8 @@ class EnvActions: } ) + + @classmethod def is_move(cls, action): """ @@ -166,8 +162,6 @@ class RewardsBase(NamedTuple): MOVEMENTS_VALID: float = -0.001 MOVEMENTS_FAIL: float = -0.05 NOOP: float = -0.01 - USE_DOOR_VALID: float = -0.00 - USE_DOOR_FAIL: float = -0.01 COLLISION: float = -0.5 diff --git a/quickstart/single_agent_train_dirt_env.py b/quickstart/single_agent_train_dirt_env.py index 8f0f9c9..b5602b6 100644 --- a/quickstart/single_agent_train_dirt_env.py +++ b/quickstart/single_agent_train_dirt_env.py @@ -68,7 +68,7 @@ if __name__ == '__main__': omit_agent_self=True, # This is default additional_agent_placeholder=None, # We will not take care of future agents frames_to_stack=3, # To give the agent a notion of time - pomdp_r=2 # the agents view-radius + pomdp_r=2 # the agents' view-radius ) # 'MovementProperties' are for specifying how the agent is allowed to move in the env. move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices) @@ -135,7 +135,7 @@ if __name__ == '__main__': env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10)) # Model Init - model = model_class("MlpPolicy", env_factory,verbose=1, seed=seed, device='cpu') + model = model_class("MlpPolicy", env_factory, verbose=1, seed=seed, device='cpu') # Model train model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback]) @@ -166,7 +166,7 @@ if __name__ == '__main__': # retrieve model class model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name) - # Load the agent agent + # Load the agent model = model_cls.load(policy_path / 'model.zip', device='cpu') # Load old env kwargs with next(policy_path.glob(env_params_json)).open('r') as f: