mirror of
				https://github.com/illiumst/marl-factory-grid.git
				synced 2025-10-31 04:37:25 +01:00 
			
		
		
		
	Doors are now seperate
This commit is contained in:
		
							
								
								
									
										38
									
								
								environments/factory/additional/_template/_collections.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								environments/factory/additional/_template/_collections.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| from typing import Union | ||||
|  | ||||
| from environments.factory.additional.doors.doors_entities import Door | ||||
| from environments.factory.base.registers import EntityCollection | ||||
|  | ||||
| from environments.factory.additional.doors.doors_util import Constants as c | ||||
|  | ||||
|  | ||||
| class Doors(EntityCollection): | ||||
|  | ||||
|     def __init__(self, *args, indicate_area=False, **kwargs): | ||||
|         self.indicate_area = indicate_area | ||||
|         self._area_marked = False | ||||
|         super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs) | ||||
|  | ||||
|     _accepted_objects = Door | ||||
|  | ||||
|     def get_near_position(self, position: (int, int)) -> Union[None, Door]: | ||||
|         try: | ||||
|             return next(door for door in self if position in door.tile.neighboring_floor_pos) | ||||
|         except StopIteration: | ||||
|             return None | ||||
|  | ||||
|     def tick_doors(self): | ||||
|         for door in self: | ||||
|             door.tick() | ||||
|  | ||||
|     def as_array(self): | ||||
|         if not self._area_marked and self.indicate_area: | ||||
|             for door in self: | ||||
|                 for tile in door.tile.neighboring_floor: | ||||
|                     if self._individual_slices: | ||||
|                         pass | ||||
|                     else: | ||||
|                         pos = (0, *tile.pos) | ||||
|                     self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL)) | ||||
|             self._area_marked = True | ||||
|         return super(Doors, self).as_array() | ||||
							
								
								
									
										71
									
								
								environments/factory/additional/_template/_entities.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								environments/factory/additional/_template/_entities.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,71 @@ | ||||
| from environments.factory.base.objects import Entity | ||||
| from environments.factory.additional.doors.doors_util import Constants as c | ||||
|  | ||||
|  | ||||
| class Template(Entity): | ||||
|     """Template for new Entity""" | ||||
|  | ||||
|     # How to define / override properties | ||||
|     @property | ||||
|     def is_blocking(self): | ||||
|         return False | ||||
|  | ||||
|     @property | ||||
|     def can_collide(self): | ||||
|         return False if self.template_attr else True | ||||
|  | ||||
|     @property | ||||
|     def encoding(self): | ||||
|         # This is important as it shadow is checked by occupation value | ||||
|         return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL | ||||
|  | ||||
|     @property | ||||
|     def str_state(self): | ||||
|         return 'open' if self.is_open else 'closed' | ||||
|  | ||||
|     def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs): | ||||
|         super(Template, self).__init__(*args, **kwargs) | ||||
|         self._state = c.CLOSED_DOOR | ||||
|         self.indicate_area = indicate_area | ||||
|         self.auto_close_interval = auto_close_interval | ||||
|         self.time_to_close = -1 | ||||
|         if not closed_on_init: | ||||
|             self._open() | ||||
|  | ||||
|     def summarize_state(self): | ||||
|         state_dict = super().summarize_state() | ||||
|         state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close)) | ||||
|         return state_dict | ||||
|  | ||||
|     @property | ||||
|     def is_closed(self): | ||||
|         return self._state == c.CLOSED_DOOR | ||||
|  | ||||
|     @property | ||||
|     def is_open(self): | ||||
|         return self._state == c.OPEN_DOOR | ||||
|  | ||||
|     @property | ||||
|     def status(self): | ||||
|         return self._state | ||||
|  | ||||
|     def use(self): | ||||
|         if self._state == c.OPEN_DOOR: | ||||
|             self._close() | ||||
|         else: | ||||
|             self._open() | ||||
|  | ||||
|     def tick(self): | ||||
|         if self.is_open and len(self.tile) == 1 and self.time_to_close: | ||||
|             self.time_to_close -= 1 | ||||
|         elif self.is_open and not self.time_to_close and len(self.tile) == 1: | ||||
|             self.use() | ||||
|  | ||||
|     def _open(self): | ||||
|         self._state = c.OPEN_DOOR | ||||
|         self._collection.notify_change_to_value(self) | ||||
|         self.time_to_close = self.auto_close_interval | ||||
|  | ||||
|     def _close(self): | ||||
|         self._state = c.CLOSED_DOOR | ||||
|         self._collection.notify_change_to_value(self) | ||||
							
								
								
									
										31
									
								
								environments/factory/additional/_template/_util.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								environments/factory/additional/_template/_util.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,31 @@ | ||||
|  | ||||
| from typing import NamedTuple | ||||
|  | ||||
| from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions | ||||
|  | ||||
|  | ||||
| class Constants(BaseConstants): | ||||
|     DOOR         = 'Door'   # Identifier of Single-Door Entities. | ||||
|     DOORS        = 'Doors'  # Identifier of Door-objects and sets (collections). | ||||
|     DOOR_SYMBOL  = 'D'                   # Door identifier for resolving the string based map files. | ||||
|  | ||||
|     ACCESS_DOOR_CELL = 1 / 3  # Access-door-Cell value used in observation | ||||
|     OPEN_DOOR_CELL = 2 / 3  # Open-door-Cell value used in observation | ||||
|     CLOSED_DOOR_CELL = 3 / 3  # Closed-door-Cell value used in observation | ||||
|  | ||||
|     CLOSED_DOOR         = 'closed'              # Identifier to compare door-is-closed state | ||||
|     OPEN_DOOR           = 'open'                # Identifier to compare door-is-open state | ||||
|     # ACCESS_DOOR         = 'access'            # Identifier to compare access positions | ||||
|  | ||||
|  | ||||
| class Actions(BaseActions): | ||||
|     USE_DOOR = 'use_door' | ||||
|  | ||||
|  | ||||
| class RewardsDoor(NamedTuple): | ||||
|     USE_DOOR_VALID: float  = -0.00 | ||||
|     USE_DOOR_FAIL: float   = -0.01 | ||||
|  | ||||
|  | ||||
| class DoorProperties(NamedTuple): | ||||
|     indicate_door_area: bool = True            # Wether the door area should be indicated in the agents' observation. | ||||
							
								
								
									
										196
									
								
								environments/factory/additional/_template/factory_template.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										196
									
								
								environments/factory/additional/_template/factory_template.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,196 @@ | ||||
| import time | ||||
| from typing import List, Union, Dict | ||||
| import random | ||||
|  | ||||
| import numpy as np | ||||
|  | ||||
| from environments.factory.additional.doors.doors_collections import Doors | ||||
| from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions | ||||
| from environments.factory.base.base_factory import BaseFactory | ||||
| from environments.factory.base.objects import Agent, Action | ||||
| from environments.factory.base.registers import Entities | ||||
|  | ||||
| from environments import helpers as h | ||||
|  | ||||
| from environments.factory.base.renderer import RenderEntity | ||||
| from environments.utility_classes import ObservationProperties | ||||
|  | ||||
|  | ||||
| def softmax(x): | ||||
|     """Compute softmax values for each sets of scores in x.""" | ||||
|     e_x = np.exp(x - np.max(x)) | ||||
|     return e_x / e_x.sum() | ||||
|  | ||||
|  | ||||
| def entropy(x): | ||||
|     return -(x * np.log(x + 1e-8)).sum() | ||||
|  | ||||
|  | ||||
| c = Constants | ||||
| a = Actions | ||||
|  | ||||
|  | ||||
| # noinspection PyAttributeOutsideInit, PyAbstractClass | ||||
| class DoorFactory(BaseFactory): | ||||
|  | ||||
|     @property | ||||
|     def actions_hook(self) -> Union[Action, List[Action]]: | ||||
|         super_actions = super().actions_hook | ||||
|         super_actions.append(Action(str_ident=a.USE_DOOR)) | ||||
|         return super_actions | ||||
|  | ||||
|     @property | ||||
|     def entities_hook(self) -> Dict[(str, Entities)]: | ||||
|         super_entities = super().entities_hook | ||||
|  | ||||
|         parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL) | ||||
|         parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0) | ||||
|         if np.any(parsed_doors): | ||||
|             door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)] | ||||
|             doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area, | ||||
|                                      entity_kwargs=dict() | ||||
|                                      ) | ||||
|             super_entities.update(({c.DOORS: doors})) | ||||
|         return super_entities | ||||
|  | ||||
|     def __init__(self, *args, | ||||
|                  door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(), | ||||
|                  env_seed=time.time_ns(), **kwargs): | ||||
|         if isinstance(door_properties, dict): | ||||
|             door_properties = DoorProperties(**door_properties) | ||||
|         if isinstance(rewards_door, dict): | ||||
|             rewards_door = RewardsDoor(**rewards_door) | ||||
|         self.door_properties = door_properties | ||||
|         self.rewards_door = rewards_door | ||||
|         self._door_rng = np.random.default_rng(env_seed) | ||||
|         self._doors: Doors | ||||
|         kwargs.update(env_seed=env_seed) | ||||
|         # TODO: Reset ---> document this | ||||
|         super().__init__(*args, **kwargs) | ||||
|  | ||||
|     def render_assets_hook(self, mode='human'): | ||||
|         additional_assets = super().render_assets_hook() | ||||
|         doors = [] | ||||
|         for i, door in enumerate(self[c.DOORS]): | ||||
|             name, state = 'door_open' if door.is_open else 'door_closed', 'blank' | ||||
|             doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1)) | ||||
|         additional_assets.extend(doors) | ||||
|         return additional_assets | ||||
|  | ||||
|  | ||||
|     def step_hook(self) -> (List[dict], dict): | ||||
|         super_reward_info = super().step_hook() | ||||
|         # Step the door close intervall | ||||
|         # TODO: Maybe move this to self.post_step_hook? May collide with reward calculation. | ||||
|         if doors := self[c.DOORS]: | ||||
|             doors.tick_doors() | ||||
|         return super_reward_info | ||||
|  | ||||
|     def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict): | ||||
|         action_result = super().do_additional_actions(agent, action) | ||||
|         if action_result is None: | ||||
|             if action == a.USE_DOOR: | ||||
|                 return self.use_door_action(agent) | ||||
|             else: | ||||
|                 return None | ||||
|         else: | ||||
|             return action_result | ||||
|  | ||||
|     def use_door_action(self, agent: Agent): | ||||
|  | ||||
|         # Check if agent really is standing on a door: | ||||
|         door = self[c.DOORS].get_near_position(agent.pos) | ||||
|         if door is not None: | ||||
|             door.use() | ||||
|             valid = c.VALID | ||||
|             self.print(f'{agent.name} just used a {door.name} at {door.pos}') | ||||
|             info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1} | ||||
|         # When he doesn't... | ||||
|         else: | ||||
|             valid = c.NOT_VALID | ||||
|             info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1} | ||||
|             self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.') | ||||
|  | ||||
|         reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL, | ||||
|                       reason=a.USE_DOOR, info=info_dict) | ||||
|  | ||||
|         return valid, reward | ||||
|  | ||||
|     def reset_hook(self) -> None: | ||||
|         super().reset_hook() | ||||
|         # There is nothing to reset. | ||||
|  | ||||
|     def check_additional_done(self) -> (bool, dict): | ||||
|         super_done, super_dict = super().check_additional_done() | ||||
|         return super_done, super_dict | ||||
|  | ||||
|     def observations_hook(self) -> Dict[str, np.typing.ArrayLike]: | ||||
|         additional_observations = super().observations_hook() | ||||
|  | ||||
|         additional_observations.update({c.DOORS: self[c.DOORS].as_array()}) | ||||
|         return additional_observations | ||||
|  | ||||
|     def post_step_hook(self) -> List[Dict[str, int]]: | ||||
|         super_post_step = super(DoorFactory, self).post_step_hook() | ||||
|         return super_post_step | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     from environments.utility_classes import AgentRenderOptions as aro | ||||
|     render = True | ||||
|  | ||||
|     door_props = DoorProperties( | ||||
|         indicate_door_area=True | ||||
|     ) | ||||
|  | ||||
|     obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True, | ||||
|                                       pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True | ||||
|                                       ) | ||||
|  | ||||
|     move_props = {'allow_square_movement': True, | ||||
|                   'allow_diagonal_movement': False, | ||||
|                   'allow_no_op': False} | ||||
|     import time | ||||
|     global_timings = [] | ||||
|     for i in range(10): | ||||
|  | ||||
|         factory = DoorFactory(n_agents=10, done_at_collision=False, | ||||
|                               level_name='rooms', max_steps=1000, | ||||
|                               obs_prop=obs_props, parse_doors=True, | ||||
|                               verbose=True, | ||||
|                               mv_prop=move_props, dirt_prop=door_props, | ||||
|                               # inject_agents=[TSPDirtAgent], | ||||
|                               ) | ||||
|  | ||||
|         # noinspection DuplicatedCode | ||||
|         n_actions = factory.action_space.n - 1 | ||||
|         _ = factory.observation_space | ||||
|         obs_space = factory.observation_space | ||||
|         obs_space_named = factory.named_observation_space | ||||
|         action_space_named = factory.named_action_space | ||||
|         times = [] | ||||
|         for epoch in range(10): | ||||
|             start_time = time.time() | ||||
|             random_actions = [[random.randint(0, n_actions) for _ | ||||
|                                in range(factory.n_agents)] for _ | ||||
|                               in range(factory.max_steps+1)] | ||||
|             env_state = factory.reset() | ||||
|             if render: | ||||
|                 factory.render() | ||||
|             # tsp_agent = factory.get_injected_agents()[0] | ||||
|  | ||||
|             rwrd = 0 | ||||
|             for agent_i_action in random_actions: | ||||
|                 # agent_i_action = tsp_agent.predict() | ||||
|                 env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action) | ||||
|                 rwrd += step_rwrd | ||||
|                 if render: | ||||
|                     factory.render() | ||||
|                 if done_bool: | ||||
|                     break | ||||
|             times.append(time.time() - start_time) | ||||
|             # print(f'Factory run {epoch} done, reward is:\n    {r}') | ||||
|         print('Mean Time Taken: ', sum(times) / 10) | ||||
|         global_timings.extend(times) | ||||
|     print('Mean Time Taken: ', sum(global_timings) / len(global_timings)) | ||||
|     print('Median Time Taken: ', global_timings[len(global_timings)//2]) | ||||
| @@ -7,9 +7,17 @@ from environments.factory.additional.btry.factory_battery import BatteryFactory | ||||
| from environments.factory.additional.dest.factory_dest import DestFactory | ||||
| from environments.factory.additional.dirt.dirt_util import DirtProperties | ||||
| from environments.factory.additional.dirt.factory_dirt import DirtFactory | ||||
| from environments.factory.additional.doors.factory_doors import DoorFactory | ||||
| from environments.factory.additional.item.factory_item import ItemFactory | ||||
|  | ||||
|  | ||||
| # noinspection PyAbstractClass | ||||
| class DoorDirtFactory(DoorFactory, DirtFactory): | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         super().__init__(*args, **kwargs) | ||||
|  | ||||
|  | ||||
| # noinspection PyAbstractClass | ||||
| class DirtItemFactory(ItemFactory, DirtFactory): | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         super().__init__(*args, **kwargs) | ||||
| @@ -38,8 +46,6 @@ if __name__ == '__main__': | ||||
|  | ||||
|     render = True | ||||
|  | ||||
|     dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0) | ||||
|  | ||||
|     obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True, | ||||
|                                       pomdp_r=2, additional_agent_placeholder=None) | ||||
|  | ||||
| @@ -47,13 +53,13 @@ if __name__ == '__main__': | ||||
|                   'allow_diagonal_movement': False, | ||||
|                   'allow_no_op': False} | ||||
|  | ||||
|     factory = DirtBatteryFactory(n_agents=5, done_at_collision=False, | ||||
|     factory = DoorDirtFactory(n_agents=10, done_at_collision=False, | ||||
|                                  level_name='rooms', max_steps=400, | ||||
|                                  obs_prop=obs_props, parse_doors=True, | ||||
|                                  record_episodes=True, verbose=True, | ||||
|                                  btry_prop=BatteryProperties(), | ||||
|                                  mv_prop=move_props, dirt_prop=dirt_props | ||||
|                                  ) | ||||
|                                  dirt_prop=DirtProperties(), | ||||
|                                  mv_prop=move_props) | ||||
|  | ||||
|  | ||||
|     # noinspection DuplicatedCode | ||||
|     n_actions = factory.action_space.n - 1 | ||||
|   | ||||
| @@ -44,7 +44,7 @@ class DirtFactory(BaseFactory): | ||||
|     def entities_hook(self) -> Dict[(str, Entities)]: | ||||
|         super_entities = super().entities_hook | ||||
|         dirt_register = DirtPiles(self.dirt_prop, self._level_shape) | ||||
|         super_entities.update(({c.DIRT: dirt_register})) | ||||
|         super_entities.update({c.DIRT: dirt_register}) | ||||
|         return super_entities | ||||
|  | ||||
|     def __init__(self, *args, | ||||
|   | ||||
							
								
								
									
										0
									
								
								environments/factory/additional/doors/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								environments/factory/additional/doors/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										38
									
								
								environments/factory/additional/doors/doors_collections.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								environments/factory/additional/doors/doors_collections.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| from typing import Union | ||||
|  | ||||
| from environments.factory.additional.doors.doors_entities import Door | ||||
| from environments.factory.base.registers import EntityCollection | ||||
|  | ||||
| from environments.factory.additional.doors.doors_util import Constants as c | ||||
|  | ||||
|  | ||||
| class Doors(EntityCollection): | ||||
|  | ||||
|     def __init__(self, *args, indicate_area=False, **kwargs): | ||||
|         self.indicate_area = indicate_area | ||||
|         self._area_marked = False | ||||
|         super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs) | ||||
|  | ||||
|     _accepted_objects = Door | ||||
|  | ||||
|     def get_near_position(self, position: (int, int)) -> Union[None, Door]: | ||||
|         try: | ||||
|             return next(door for door in self if position in door.tile.neighboring_floor_pos) | ||||
|         except StopIteration: | ||||
|             return None | ||||
|  | ||||
|     def tick_doors(self): | ||||
|         for door in self: | ||||
|             door.tick() | ||||
|  | ||||
|     def as_array(self): | ||||
|         if not self._area_marked and self.indicate_area: | ||||
|             for door in self: | ||||
|                 for tile in door.tile.neighboring_floor: | ||||
|                     if self._individual_slices: | ||||
|                         pass | ||||
|                     else: | ||||
|                         pos = (0, *tile.pos) | ||||
|                     self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL)) | ||||
|             self._area_marked = True | ||||
|         return super(Doors, self).as_array() | ||||
							
								
								
									
										69
									
								
								environments/factory/additional/doors/doors_entities.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								environments/factory/additional/doors/doors_entities.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,69 @@ | ||||
| from environments.factory.base.objects import Entity | ||||
| from environments.factory.additional.doors.doors_util import Constants as c | ||||
|  | ||||
|  | ||||
| class Door(Entity): | ||||
|  | ||||
|     @property | ||||
|     def is_blocking(self): | ||||
|         return False if self.is_open else True | ||||
|  | ||||
|     @property | ||||
|     def can_collide(self): | ||||
|         return False if self.is_open else True | ||||
|  | ||||
|     @property | ||||
|     def encoding(self): | ||||
|         # This is important as it shadow is checked by occupation value | ||||
|         return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL | ||||
|  | ||||
|     @property | ||||
|     def str_state(self): | ||||
|         return 'open' if self.is_open else 'closed' | ||||
|  | ||||
|     def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs): | ||||
|         super(Door, self).__init__(*args, **kwargs) | ||||
|         self._state = c.CLOSED_DOOR | ||||
|         self.indicate_area = indicate_area | ||||
|         self.auto_close_interval = auto_close_interval | ||||
|         self.time_to_close = -1 | ||||
|         if not closed_on_init: | ||||
|             self._open() | ||||
|  | ||||
|     def summarize_state(self): | ||||
|         state_dict = super().summarize_state() | ||||
|         state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close)) | ||||
|         return state_dict | ||||
|  | ||||
|     @property | ||||
|     def is_closed(self): | ||||
|         return self._state == c.CLOSED_DOOR | ||||
|  | ||||
|     @property | ||||
|     def is_open(self): | ||||
|         return self._state == c.OPEN_DOOR | ||||
|  | ||||
|     @property | ||||
|     def status(self): | ||||
|         return self._state | ||||
|  | ||||
|     def use(self): | ||||
|         if self._state == c.OPEN_DOOR: | ||||
|             self._close() | ||||
|         else: | ||||
|             self._open() | ||||
|  | ||||
|     def tick(self): | ||||
|         if self.is_open and len(self.tile) == 1 and self.time_to_close: | ||||
|             self.time_to_close -= 1 | ||||
|         elif self.is_open and not self.time_to_close and len(self.tile) == 1: | ||||
|             self.use() | ||||
|  | ||||
|     def _open(self): | ||||
|         self._state = c.OPEN_DOOR | ||||
|         self._collection.notify_change_to_value(self) | ||||
|         self.time_to_close = self.auto_close_interval | ||||
|  | ||||
|     def _close(self): | ||||
|         self._state = c.CLOSED_DOOR | ||||
|         self._collection.notify_change_to_value(self) | ||||
							
								
								
									
										31
									
								
								environments/factory/additional/doors/doors_util.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								environments/factory/additional/doors/doors_util.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,31 @@ | ||||
|  | ||||
| from typing import NamedTuple | ||||
|  | ||||
| from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions | ||||
|  | ||||
|  | ||||
| class Constants(BaseConstants): | ||||
|     DOOR         = 'Door'   # Identifier of Single-Door Entities. | ||||
|     DOORS        = 'Doors'  # Identifier of Door-objects and sets (collections). | ||||
|     DOOR_SYMBOL  = 'D'                   # Door identifier for resolving the string based map files. | ||||
|  | ||||
|     ACCESS_DOOR_CELL = 1 / 3  # Access-door-Cell value used in observation | ||||
|     OPEN_DOOR_CELL = 2 / 3  # Open-door-Cell value used in observation | ||||
|     CLOSED_DOOR_CELL = 3 / 3  # Closed-door-Cell value used in observation | ||||
|  | ||||
|     CLOSED_DOOR         = 'closed'              # Identifier to compare door-is-closed state | ||||
|     OPEN_DOOR           = 'open'                # Identifier to compare door-is-open state | ||||
|     # ACCESS_DOOR         = 'access'            # Identifier to compare access positions | ||||
|  | ||||
|  | ||||
| class Actions(BaseActions): | ||||
|     USE_DOOR = 'use_door' | ||||
|  | ||||
|  | ||||
| class RewardsDoor(NamedTuple): | ||||
|     USE_DOOR_VALID: float  = -0.00 | ||||
|     USE_DOOR_FAIL: float   = -0.01 | ||||
|  | ||||
|  | ||||
| class DoorProperties(NamedTuple): | ||||
|     indicate_door_area: bool = True            # Wether the door area should be indicated in the agents' observation. | ||||
							
								
								
									
										196
									
								
								environments/factory/additional/doors/factory_doors.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										196
									
								
								environments/factory/additional/doors/factory_doors.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,196 @@ | ||||
| import time | ||||
| from typing import List, Union, Dict | ||||
| import random | ||||
|  | ||||
| import numpy as np | ||||
|  | ||||
| from environments.factory.additional.doors.doors_collections import Doors | ||||
| from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions | ||||
| from environments.factory.base.base_factory import BaseFactory | ||||
| from environments.factory.base.objects import Agent, Action | ||||
| from environments.factory.base.registers import Entities | ||||
|  | ||||
| from environments import helpers as h | ||||
|  | ||||
| from environments.factory.base.renderer import RenderEntity | ||||
| from environments.utility_classes import ObservationProperties | ||||
|  | ||||
|  | ||||
| def softmax(x): | ||||
|     """Compute softmax values for each sets of scores in x.""" | ||||
|     e_x = np.exp(x - np.max(x)) | ||||
|     return e_x / e_x.sum() | ||||
|  | ||||
|  | ||||
| def entropy(x): | ||||
|     return -(x * np.log(x + 1e-8)).sum() | ||||
|  | ||||
|  | ||||
| c = Constants | ||||
| a = Actions | ||||
|  | ||||
|  | ||||
| # noinspection PyAttributeOutsideInit, PyAbstractClass | ||||
| class DoorFactory(BaseFactory): | ||||
|  | ||||
|     @property | ||||
|     def actions_hook(self) -> Union[Action, List[Action]]: | ||||
|         super_actions = super().actions_hook | ||||
|         super_actions.append(Action(str_ident=a.USE_DOOR)) | ||||
|         return super_actions | ||||
|  | ||||
|     @property | ||||
|     def entities_hook(self) -> Dict[(str, Entities)]: | ||||
|         super_entities = super().entities_hook | ||||
|  | ||||
|         parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL) | ||||
|         parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0) | ||||
|         if np.any(parsed_doors): | ||||
|             door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)] | ||||
|             doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area, | ||||
|                                      entity_kwargs=dict() | ||||
|                                      ) | ||||
|             super_entities.update(({c.DOORS: doors})) | ||||
|         return super_entities | ||||
|  | ||||
|     def __init__(self, *args, | ||||
|                  door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(), | ||||
|                  env_seed=time.time_ns(), **kwargs): | ||||
|         if isinstance(door_properties, dict): | ||||
|             door_properties = DoorProperties(**door_properties) | ||||
|         if isinstance(rewards_door, dict): | ||||
|             rewards_door = RewardsDoor(**rewards_door) | ||||
|         self.door_properties = door_properties | ||||
|         self.rewards_door = rewards_door | ||||
|         self._door_rng = np.random.default_rng(env_seed) | ||||
|         self._doors: Doors | ||||
|         kwargs.update(env_seed=env_seed) | ||||
|         # TODO: Reset ---> document this | ||||
|         super().__init__(*args, **kwargs) | ||||
|  | ||||
|     def render_assets_hook(self, mode='human'): | ||||
|         additional_assets = super().render_assets_hook() | ||||
|         doors = [] | ||||
|         for i, door in enumerate(self[c.DOORS]): | ||||
|             name, state = 'door_open' if door.is_open else 'door_closed', 'blank' | ||||
|             doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1)) | ||||
|         additional_assets.extend(doors) | ||||
|         return additional_assets | ||||
|  | ||||
|  | ||||
|     def step_hook(self) -> (List[dict], dict): | ||||
|         super_reward_info = super().step_hook() | ||||
|         # Step the door close intervall | ||||
|         # TODO: Maybe move this to self.post_step_hook? May collide with reward calculation. | ||||
|         if doors := self[c.DOORS]: | ||||
|             doors.tick_doors() | ||||
|         return super_reward_info | ||||
|  | ||||
|     def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict): | ||||
|         action_result = super().do_additional_actions(agent, action) | ||||
|         if action_result is None: | ||||
|             if action == a.USE_DOOR: | ||||
|                 return self.use_door_action(agent) | ||||
|             else: | ||||
|                 return None | ||||
|         else: | ||||
|             return action_result | ||||
|  | ||||
|     def use_door_action(self, agent: Agent): | ||||
|  | ||||
|         # Check if agent really is standing on a door: | ||||
|         door = self[c.DOORS].get_near_position(agent.pos) | ||||
|         if door is not None: | ||||
|             door.use() | ||||
|             valid = c.VALID | ||||
|             self.print(f'{agent.name} just used a {door.name} at {door.pos}') | ||||
|             info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1} | ||||
|         # When he doesn't... | ||||
|         else: | ||||
|             valid = c.NOT_VALID | ||||
|             info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1} | ||||
|             self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.') | ||||
|  | ||||
|         reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL, | ||||
|                       reason=a.USE_DOOR, info=info_dict) | ||||
|  | ||||
|         return valid, reward | ||||
|  | ||||
|     def reset_hook(self) -> None: | ||||
|         super().reset_hook() | ||||
|         # There is nothing to reset. | ||||
|  | ||||
|     def check_additional_done(self) -> (bool, dict): | ||||
|         super_done, super_dict = super().check_additional_done() | ||||
|         return super_done, super_dict | ||||
|  | ||||
|     def observations_hook(self) -> Dict[str, np.typing.ArrayLike]: | ||||
|         additional_observations = super().observations_hook() | ||||
|  | ||||
|         additional_observations.update({c.DOORS: self[c.DOORS].as_array()}) | ||||
|         return additional_observations | ||||
|  | ||||
|     def post_step_hook(self) -> List[Dict[str, int]]: | ||||
|         super_post_step = super(DoorFactory, self).post_step_hook() | ||||
|         return super_post_step | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     from environments.utility_classes import AgentRenderOptions as aro | ||||
|     render = True | ||||
|  | ||||
|     door_props = DoorProperties( | ||||
|         indicate_door_area=True | ||||
|     ) | ||||
|  | ||||
|     obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True, | ||||
|                                       pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True | ||||
|                                       ) | ||||
|  | ||||
|     move_props = {'allow_square_movement': True, | ||||
|                   'allow_diagonal_movement': False, | ||||
|                   'allow_no_op': False} | ||||
|     import time | ||||
|     global_timings = [] | ||||
|     for i in range(10): | ||||
|  | ||||
|         factory = DoorFactory(n_agents=10, done_at_collision=False, | ||||
|                               level_name='rooms', max_steps=1000, | ||||
|                               obs_prop=obs_props, parse_doors=True, | ||||
|                               verbose=True, | ||||
|                               mv_prop=move_props, dirt_prop=door_props, | ||||
|                               # inject_agents=[TSPDirtAgent], | ||||
|                               ) | ||||
|  | ||||
|         # noinspection DuplicatedCode | ||||
|         n_actions = factory.action_space.n - 1 | ||||
|         _ = factory.observation_space | ||||
|         obs_space = factory.observation_space | ||||
|         obs_space_named = factory.named_observation_space | ||||
|         action_space_named = factory.named_action_space | ||||
|         times = [] | ||||
|         for epoch in range(10): | ||||
|             start_time = time.time() | ||||
|             random_actions = [[random.randint(0, n_actions) for _ | ||||
|                                in range(factory.n_agents)] for _ | ||||
|                               in range(factory.max_steps+1)] | ||||
|             env_state = factory.reset() | ||||
|             if render: | ||||
|                 factory.render() | ||||
|             # tsp_agent = factory.get_injected_agents()[0] | ||||
|  | ||||
|             rwrd = 0 | ||||
|             for agent_i_action in random_actions: | ||||
|                 # agent_i_action = tsp_agent.predict() | ||||
|                 env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action) | ||||
|                 rwrd += step_rwrd | ||||
|                 if render: | ||||
|                     factory.render() | ||||
|                 if done_bool: | ||||
|                     break | ||||
|             times.append(time.time() - start_time) | ||||
|             # print(f'Factory run {epoch} done, reward is:\n    {r}') | ||||
|         print('Mean Time Taken: ', sum(times) / 10) | ||||
|         global_timings.extend(times) | ||||
|     print('Mean Time Taken: ', sum(global_timings) / len(global_timings)) | ||||
|     print('Median Time Taken: ', global_timings[len(global_timings)//2]) | ||||
| @@ -16,7 +16,7 @@ from environments.helpers import Constants as c | ||||
| from environments.helpers import EnvActions as a | ||||
| from environments.helpers import RewardsBase | ||||
| from environments.factory.base.objects import Agent, Floor, Action | ||||
| from environments.factory.base.registers import Actions, Entities, Agents, Doors, Floors, Walls, PlaceHolders, \ | ||||
| from environments.factory.base.registers import Actions, Entities, Agents, Floors, Walls, PlaceHolders, \ | ||||
|     GlobalPositions | ||||
| from environments.utility_classes import MovementProperties, ObservationProperties, MarlFrameStack | ||||
| from environments.utility_classes import AgentRenderOptions as a_obs | ||||
| @@ -88,8 +88,8 @@ class BaseFactory(gym.Env): | ||||
|                  mv_prop: MovementProperties = MovementProperties(), | ||||
|                  obs_prop: ObservationProperties = ObservationProperties(), | ||||
|                  rewards_base: RewardsBase = RewardsBase(), | ||||
|                  parse_doors=False, done_at_collision=False, inject_agents: Union[None, List] = None, | ||||
|                  verbose=False, doors_have_area=True, env_seed=time.time_ns(), individual_rewards=False, | ||||
|                  done_at_collision=False, inject_agents: Union[None, List] = None, | ||||
|                  verbose=False, env_seed=time.time_ns(), individual_rewards=False, | ||||
|                  class_name='', **kwargs): | ||||
|  | ||||
|         if class_name: | ||||
| @@ -105,8 +105,6 @@ class BaseFactory(gym.Env): | ||||
|         assert obs_prop.frames_to_stack != 1 and \ | ||||
|                obs_prop.frames_to_stack >= 0, \ | ||||
|                "'frames_to_stack' cannot be negative or 1." | ||||
|         assert doors_have_area or not obs_prop.indicate_door_area, \ | ||||
|             '"indicate_door_area" can only active, when "doors_have_area"' | ||||
|         if kwargs: | ||||
|             print(f'Following kwargs were passed, but ignored: {kwargs}') | ||||
|  | ||||
| @@ -133,9 +131,7 @@ class BaseFactory(gym.Env): | ||||
|  | ||||
|         self.done_at_collision = done_at_collision | ||||
|         self._record_episodes = False | ||||
|         self.parse_doors = parse_doors | ||||
|         self._injected_agents = inject_agents or [] | ||||
|         self.doors_have_area = doors_have_area | ||||
|         self.individual_rewards = individual_rewards | ||||
|  | ||||
|         # TODO: Reset ---> document this | ||||
| @@ -174,20 +170,9 @@ class BaseFactory(gym.Env): | ||||
|         # NOPOS | ||||
|         self._NO_POS_TILE = Floor(c.NO_POS, None) | ||||
|  | ||||
|         # Doors | ||||
|         if self.parse_doors: | ||||
|             parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR) | ||||
|             parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0) | ||||
|             if np.any(parsed_doors): | ||||
|                 door_tiles = [floor.by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)] | ||||
|                 doors = Doors.from_tiles(door_tiles, self._level_shape, have_area=self.obs_prop.indicate_door_area, | ||||
|                                          entity_kwargs=dict(context=floor) | ||||
|                                          ) | ||||
|                 self._entities.add_additional_items({c.DOORS: doors}) | ||||
|  | ||||
|         # Actions | ||||
|         # TODO: Move this to Agent init, so that agents can have individual action sets. | ||||
|         self._actions = Actions(self.mv_prop, can_use_doors=self.parse_doors) | ||||
|         self._actions = Actions(self.mv_prop) | ||||
|         if additional_actions := self.actions_hook: | ||||
|             self._actions.add_additional_items(additional_actions) | ||||
|  | ||||
| @@ -263,8 +248,6 @@ class BaseFactory(gym.Env): | ||||
|             elif a.NOOP == action_obj: | ||||
|                 action_valid = c.VALID | ||||
|                 reward = dict(value=self.rewards_base.NOOP, reason=a.NOOP, info={f'{agent.name}_NOOP': 1, 'NOOP': 1}) | ||||
|             elif a.USE_DOOR == action_obj: | ||||
|                 action_valid, reward = self._handle_door_interaction(agent) | ||||
|             else: | ||||
|                 # noinspection PyTupleAssignmentBalance | ||||
|                 action_valid, reward = self.do_additional_actions(agent, action_obj) | ||||
| @@ -282,12 +265,9 @@ class BaseFactory(gym.Env): | ||||
|         for tile in tiles_with_collisions: | ||||
|             guests = tile.guests_that_can_collide | ||||
|             for i, guest in enumerate(guests): | ||||
|                 # This does make a copy, but is faster than.copy() | ||||
|                 this_collisions = guests[:] | ||||
|                 del this_collisions[i] | ||||
|                 assert hasattr(guest, 'step_result') | ||||
|                 for collision in this_collisions: | ||||
|                     guest.step_result['collisions'].append(collision) | ||||
|                 for j, collision in enumerate(guests): | ||||
|                     if j != i and hasattr(guest, 'step_result'): | ||||
|                         guest.step_result['collisions'].append(collision) | ||||
|  | ||||
|         done = False | ||||
|         if self.done_at_collision: | ||||
| @@ -299,11 +279,6 @@ class BaseFactory(gym.Env): | ||||
|         done = done or additional_done | ||||
|         info.update(additional_done_info) | ||||
|  | ||||
|         # Step the door close intervall | ||||
|         if self.parse_doors: | ||||
|             if doors := self[c.DOORS]: | ||||
|                 doors.tick_doors() | ||||
|  | ||||
|         # Finalize | ||||
|         reward, reward_info = self.build_reward_result(rewards) | ||||
|  | ||||
| @@ -319,41 +294,14 @@ class BaseFactory(gym.Env): | ||||
|             info.update(post_step_info) | ||||
|  | ||||
|         obs, _ = self._build_observations() | ||||
|  | ||||
|         return obs, reward, done, info | ||||
|  | ||||
|     def _handle_door_interaction(self, agent) -> (bool, dict): | ||||
|         if doors := self[c.DOORS]: | ||||
|             # Check if agent really is standing on a door: | ||||
|             if self.doors_have_area: | ||||
|                 door = doors.get_near_position(agent.pos) | ||||
|             else: | ||||
|                 door = doors.by_pos(agent.pos) | ||||
|             if door is not None: | ||||
|                 door.use() | ||||
|                 valid = c.VALID | ||||
|                 self.print(f'{agent.name} just used a {door.name} at {door.pos}') | ||||
|                 info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1} | ||||
|             # When he doesn't... | ||||
|             else: | ||||
|                 valid = c.NOT_VALID | ||||
|                 info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1} | ||||
|                 self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.') | ||||
|  | ||||
|         else: | ||||
|             raise RuntimeError('This should not happen, since the door action should not be available.') | ||||
|         reward = dict(value=self.rewards_base.USE_DOOR_VALID if valid else self.rewards_base.USE_DOOR_FAIL, | ||||
|                       reason=a.USE_DOOR, info=info_dict) | ||||
|  | ||||
|         return valid, reward | ||||
|  | ||||
|     def _build_observations(self) -> np.typing.ArrayLike: | ||||
|         # Observation dict: | ||||
|         per_agent_expl_idx = dict() | ||||
|         per_agent_obsn = dict() | ||||
|         # Generel Observations | ||||
|         lvl_obs = self[c.WALLS].as_array() | ||||
|         door_obs = self[c.DOORS].as_array() if self.parse_doors else None | ||||
|         if self.obs_prop.render_agents == a_obs.NOT: | ||||
|             global_agent_obs = None | ||||
|         elif self.obs_prop.omit_agent_self and self.n_agents == 1: | ||||
| @@ -391,8 +339,6 @@ class BaseFactory(gym.Env): | ||||
|                 obs_dict[c.AGENT] = agent_obs[:] | ||||
|             if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None: | ||||
|                 obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs | ||||
|             if self.parse_doors and door_obs is not None: | ||||
|                 obs_dict[c.DOORS] = door_obs[:] | ||||
|             obs_dict.update(add_obs_dict) | ||||
|             obsn = np.vstack(list(obs_dict.values())) | ||||
|             if self.obs_prop.pomdp_r: | ||||
| @@ -430,33 +376,11 @@ class BaseFactory(gym.Env): | ||||
|                     raise e | ||||
|  | ||||
|                 obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL | ||||
|                 door_shadowing = False | ||||
|                 if self.parse_doors: | ||||
|                     if doors := self[c.DOORS]: | ||||
|                         if door := doors.by_pos(agent.pos): | ||||
|                             if door.is_closed: | ||||
|                                 for group in door.connectivity_subgroups: | ||||
|                                     if agent.last_pos not in group: | ||||
|                                         door_shadowing = True | ||||
|                                         if self._pomdp_r: | ||||
|                                             blocking = [ | ||||
|                                                 tuple(np.subtract(x, agent.pos) + (self._pomdp_r, self._pomdp_r)) | ||||
|                                                 for x in group] | ||||
|                                             xs, ys = zip(*blocking) | ||||
|                                         else: | ||||
|                                             xs, ys = zip(*group) | ||||
|  | ||||
|                                         # noinspection PyUnresolvedReferences | ||||
|                                         obs_block_light[:, xs, ys] = False | ||||
|  | ||||
|                 light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int).squeeze()) | ||||
|                 if self._pomdp_r: | ||||
|                     light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape)) | ||||
|                 else: | ||||
|                     light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape)) | ||||
|                 if door_shadowing: | ||||
|                     # noinspection PyUnboundLocalVariable | ||||
|                     light_block_map[xs, ys] = 0 | ||||
|  | ||||
|                 agent.step_result['lightmap'] = light_block_map | ||||
|  | ||||
| @@ -550,35 +474,13 @@ class BaseFactory(gym.Env): | ||||
|         y_new = agent.y + y_diff | ||||
|  | ||||
|         new_tile = self[c.FLOOR].by_pos((x_new, y_new)) | ||||
|         if new_tile: | ||||
|         if new_tile and not np.any([x.is_blocking for x in new_tile.guests]): | ||||
|             valid = c.VALID | ||||
|         else: | ||||
|             tile = agent.tile | ||||
|             valid = c.VALID | ||||
|             return tile, valid | ||||
|  | ||||
|         if self.parse_doors and agent.last_pos != c.NO_POS: | ||||
|             if doors := self[c.DOORS]: | ||||
|                 if self.doors_have_area: | ||||
|                     if door := doors.by_pos(new_tile.pos): | ||||
|                         if door.is_closed: | ||||
|                             return agent.tile, c.NOT_VALID | ||||
|                         else:  # door.is_closed: | ||||
|                             pass | ||||
|  | ||||
|                 if door := doors.by_pos(agent.pos): | ||||
|                     if door.is_open: | ||||
|                         pass | ||||
|                     else:  # door.is_closed: | ||||
|                         if door.is_linked(agent.last_pos, new_tile.pos): | ||||
|                             pass | ||||
|                         else: | ||||
|                             return agent.tile, c.NOT_VALID | ||||
|                 else: | ||||
|                     pass | ||||
|         else: | ||||
|             pass | ||||
|  | ||||
|         return new_tile, valid | ||||
|  | ||||
|     def build_reward_result(self, global_env_rewards: list) -> (int, dict): | ||||
| @@ -649,14 +551,10 @@ class BaseFactory(gym.Env): | ||||
|         for i, agent in enumerate(self[c.AGENT]): | ||||
|             name, state = h.asset_str(agent) | ||||
|             agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.step_result['lightmap'])) | ||||
|         doors = [] | ||||
|         if self.parse_doors: | ||||
|             for i, door in enumerate(self[c.DOORS]): | ||||
|                 name, state = 'door_open' if door.is_open else 'door_closed', 'blank' | ||||
|                 doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1)) | ||||
|  | ||||
|         additional_assets = self.render_assets_hook() | ||||
|  | ||||
|         return self._renderer.render(walls + doors + additional_assets + agents) | ||||
|         return self._renderer.render(walls + additional_assets + agents) | ||||
|  | ||||
|     def save_params(self, filepath: Path): | ||||
|         # noinspection PyProtectedMember | ||||
|   | ||||
| @@ -1,12 +1,10 @@ | ||||
| from collections import defaultdict | ||||
| from typing import Union | ||||
| from typing import Union, List | ||||
|  | ||||
| import networkx as nx | ||||
| import numpy as np | ||||
|  | ||||
| from environments import helpers as h | ||||
| from environments.helpers import Constants as c | ||||
| import itertools | ||||
|  | ||||
| ########################################################################## | ||||
| # ##################### Base Object Building Blocks ######################### # | ||||
| @@ -88,6 +86,10 @@ class EnvObject(Object): | ||||
| class Entity(EnvObject): | ||||
|     """Full Env Entity that lives on the env Grid. Doors, Items, DirtPile etc...""" | ||||
|  | ||||
|     @property | ||||
|     def is_blocking(self): | ||||
|         return False | ||||
|  | ||||
|     @property | ||||
|     def can_collide(self): | ||||
|         return False | ||||
| @@ -226,6 +228,21 @@ class GlobalPosition(BoundingMixin, EnvObject): | ||||
|  | ||||
| class Floor(EnvObject): | ||||
|  | ||||
|     @property | ||||
|     def neighboring_floor_pos(self): | ||||
|         return [x.pos for x in self.neighboring_floor] | ||||
|  | ||||
|     @property | ||||
|     def neighboring_floor(self): | ||||
|         if self._neighboring_floor: | ||||
|             pass | ||||
|         else: | ||||
|             self._neighboring_floor = [x for x in [self._collection.by_pos(np.add(self.pos, pos)) | ||||
|                                                    for pos in h.POS_MASK.reshape(-1, 2) | ||||
|                                                    if not np.all(pos == [0, 0])] | ||||
|                                        if x] | ||||
|         return self._neighboring_floor | ||||
|  | ||||
|     @property | ||||
|     def encoding(self): | ||||
|         return c.FREE_CELL | ||||
| @@ -254,6 +271,7 @@ class Floor(EnvObject): | ||||
|         super(Floor, self).__init__(*args, **kwargs) | ||||
|         self._guests = dict() | ||||
|         self._pos = tuple(pos) | ||||
|         self._neighboring_floor: List[Floor] = list() | ||||
|  | ||||
|     def __len__(self): | ||||
|         return len(self._guests) | ||||
| @@ -298,94 +316,6 @@ class Wall(Floor): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class Door(Entity): | ||||
|  | ||||
|     @property | ||||
|     def can_collide(self): | ||||
|         if self.has_area: | ||||
|             return False if self.is_open else True | ||||
|         else: | ||||
|             return False | ||||
|  | ||||
|     @property | ||||
|     def encoding(self): | ||||
|         # This is important as it shadow is checked by occupation value | ||||
|         return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL | ||||
|  | ||||
|     @property | ||||
|     def str_state(self): | ||||
|         return 'open' if self.is_open else 'closed' | ||||
|  | ||||
|     @property | ||||
|     def access_area(self): | ||||
|         return [node for node in self.connectivity.nodes | ||||
|                 if node not in range(len(self.connectivity_subgroups)) and node != self.pos] | ||||
|  | ||||
|     def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False, **kwargs): | ||||
|         super(Door, self).__init__(*args, **kwargs) | ||||
|         self._state = c.CLOSED_DOOR | ||||
|         self.has_area = has_area | ||||
|         self.auto_close_interval = auto_close_interval | ||||
|         self.time_to_close = -1 | ||||
|         neighbor_pos = list(itertools.product([-1, 1, 0], repeat=2))[:-1] | ||||
|         neighbor_tiles = [context.by_pos(tuple([sum(x) for x in zip(self.pos, diff)])) for diff in neighbor_pos] | ||||
|         neighbor_pos = [x.pos for x in neighbor_tiles if x] | ||||
|         self.connectivity = h.points_to_graph(neighbor_pos) | ||||
|         self.connectivity_subgroups = list(nx.algorithms.components.connected_components(self.connectivity)) | ||||
|         for idx, group in enumerate(self.connectivity_subgroups): | ||||
|             for tile_pos in group: | ||||
|                 self.connectivity.add_edge(tile_pos, idx) | ||||
|         if not closed_on_init: | ||||
|             self._open() | ||||
|  | ||||
|     def summarize_state(self): | ||||
|         state_dict = super().summarize_state() | ||||
|         state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close)) | ||||
|         return state_dict | ||||
|  | ||||
|     @property | ||||
|     def is_closed(self): | ||||
|         return self._state == c.CLOSED_DOOR | ||||
|  | ||||
|     @property | ||||
|     def is_open(self): | ||||
|         return self._state == c.OPEN_DOOR | ||||
|  | ||||
|     @property | ||||
|     def status(self): | ||||
|         return self._state | ||||
|  | ||||
|     def use(self): | ||||
|         if self._state == c.OPEN_DOOR: | ||||
|             self._close() | ||||
|         else: | ||||
|             self._open() | ||||
|  | ||||
|     def tick(self): | ||||
|         if self.is_open and len(self.tile) == 1 and self.time_to_close: | ||||
|             self.time_to_close -= 1 | ||||
|         elif self.is_open and not self.time_to_close and len(self.tile) == 1: | ||||
|             self.use() | ||||
|  | ||||
|     def _open(self): | ||||
|         self.connectivity.add_edges_from([(self.pos, x) for x in range(len(self.connectivity_subgroups))]) | ||||
|         self._state = c.OPEN_DOOR | ||||
|         self._collection.notify_change_to_value(self) | ||||
|         self.time_to_close = self.auto_close_interval | ||||
|  | ||||
|     def _close(self): | ||||
|         self.connectivity.remove_node(self.pos) | ||||
|         self._state = c.CLOSED_DOOR | ||||
|         self._collection.notify_change_to_value(self) | ||||
|  | ||||
|     def is_linked(self, old_pos, new_pos): | ||||
|         try: | ||||
|             _ = nx.shortest_path(self.connectivity, old_pos, new_pos) | ||||
|             return True | ||||
|         except nx.exception.NetworkXNoPath: | ||||
|             return False | ||||
|  | ||||
|  | ||||
| class Agent(MoveableEntity): | ||||
|  | ||||
|     @property | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from typing import List, Union, Dict, Tuple | ||||
| import numpy as np | ||||
| import six | ||||
|  | ||||
| from environments.factory.base.objects import Entity, Floor, Agent, Door, Action, Wall, PlaceHolder, GlobalPosition, \ | ||||
| from environments.factory.base.objects import Entity, Floor, Agent, Action, Wall, PlaceHolder, GlobalPosition, \ | ||||
|     Object, EnvObject | ||||
| from environments.utility_classes import MovementProperties | ||||
| from environments import helpers as h | ||||
| @@ -452,38 +452,6 @@ class Agents(MovingEntityObjectCollection): | ||||
|         self._collection[agent.name] = agent | ||||
|  | ||||
|  | ||||
| class Doors(EntityCollection): | ||||
|  | ||||
|     def __init__(self, *args, have_area: bool = False, **kwargs): | ||||
|         self.have_area = have_area | ||||
|         self._area_marked = False | ||||
|         super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs) | ||||
|  | ||||
|     _accepted_objects = Door | ||||
|  | ||||
|     def get_near_position(self, position: (int, int)) -> Union[None, Door]: | ||||
|         try: | ||||
|             return next(door for door in self if position in door.access_area) | ||||
|         except StopIteration: | ||||
|             return None | ||||
|  | ||||
|     def tick_doors(self): | ||||
|         for door in self: | ||||
|             door.tick() | ||||
|  | ||||
|     def as_array(self): | ||||
|         if self.have_area and not self._area_marked: | ||||
|             for door in self: | ||||
|                 for pos in door.access_area: | ||||
|                     if self._individual_slices: | ||||
|                         pass | ||||
|                     else: | ||||
|                         pos = (0, *pos) | ||||
|                     self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL)) | ||||
|             self._area_marked = True | ||||
|         return super(Doors, self).as_array() | ||||
|  | ||||
|  | ||||
| class Actions(ObjectCollection): | ||||
|     _accepted_objects = Action | ||||
|  | ||||
| @@ -492,11 +460,10 @@ class Actions(ObjectCollection): | ||||
|         return self._movement_actions | ||||
|  | ||||
|     # noinspection PyTypeChecker | ||||
|     def __init__(self, movement_properties: MovementProperties, can_use_doors=False): | ||||
|     def __init__(self, movement_properties: MovementProperties): | ||||
|         self.allow_no_op = movement_properties.allow_no_op | ||||
|         self.allow_diagonal_movement = movement_properties.allow_diagonal_movement | ||||
|         self.allow_square_movement = movement_properties.allow_square_movement | ||||
|         self.can_use_doors = can_use_doors | ||||
|         super(Actions, self).__init__() | ||||
|  | ||||
|         # Move this to Baseclass, Env init? | ||||
| @@ -507,8 +474,6 @@ class Actions(ObjectCollection): | ||||
|             self.add_additional_items([self._accepted_objects(str_ident=direction) | ||||
|                                        for direction in h.EnvActions.diagonal_move()]) | ||||
|         self._movement_actions = self._collection.copy() | ||||
|         if self.can_use_doors: | ||||
|             self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.USE_DOOR)]) | ||||
|         if self.allow_no_op: | ||||
|             self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.NOOP)]) | ||||
|  | ||||
|   | ||||
| @@ -33,6 +33,10 @@ IGNORED_DF_COLUMNS = ['Episode', 'Run',          # For plotting, which values ar | ||||
|                       'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count', 'terminal_observation', | ||||
|                       'episode'] | ||||
|  | ||||
| POS_MASK = np.asarray([[[-1, -1], [0, -1], [1, -1]], | ||||
|                        [[-1,  0], [0,  0], [1,  0]], | ||||
|                        [[-1,  1], [0,  1], [1,  1]]]) | ||||
|  | ||||
|  | ||||
| class Constants: | ||||
|  | ||||
| @@ -42,12 +46,10 @@ class Constants: | ||||
|     """ | ||||
|  | ||||
|     WALL                = '#'                   # Wall tile identifier for resolving the string based map files. | ||||
|     DOOR                = 'D'                   # Door identifier for resolving the string based map files. | ||||
|     DANGER_ZONE         = 'x'                   # Dange Zone tile identifier for resolving the string based map files. | ||||
|  | ||||
|     WALLS               = 'Walls'               # Identifier of Wall-objects and sets (collections). | ||||
|     FLOOR               = 'Floor'               # Identifier of Floor-objects and sets (collections). | ||||
|     DOORS               = 'Doors'               # Identifier of Door-objects and sets (collections). | ||||
|     LEVEL               = 'Level'               # Identifier of Level-objects and sets (collections). | ||||
|     AGENT               = 'Agent'               # Identifier of Agent-objects and sets (collections). | ||||
|     AGENT_PLACEHOLDER   = 'AGENT_PLACEHOLDER'   # Identifier of Placeholder-objects and sets (collections). | ||||
| @@ -56,16 +58,9 @@ class Constants: | ||||
|     FREE_CELL           = 0                     # Free-Cell value used in observation | ||||
|     OCCUPIED_CELL       = 1                     # Occupied-Cell value used in observation | ||||
|     SHADOWED_CELL       = -1                    # Shadowed-Cell value used in observation | ||||
|     ACCESS_DOOR_CELL    = 1/3                   # Access-door-Cell value used in observation | ||||
|     OPEN_DOOR_CELL      = 2/3                   # Open-door-Cell value used in observation | ||||
|     CLOSED_DOOR_CELL    = 3/3                   # Closed-door-Cell value used in observation | ||||
|  | ||||
|     NO_POS              = (-9999, -9999)        # Invalid Position value used in the environment (something is off-grid) | ||||
|  | ||||
|     CLOSED_DOOR         = 'closed'              # Identifier to compare door-is-closed state | ||||
|     OPEN_DOOR           = 'open'                # Identifier to compare door-is-open state | ||||
|     # ACCESS_DOOR         = 'access'            # Identifier to compare access positions | ||||
|  | ||||
|     ACTION              = 'action'              # Identifier of Action-objects and sets (collections). | ||||
|     COLLISION           = 'collision'           # Identifier to use in the context of collitions. | ||||
|     VALID               = True                  # Identifier to rename boolean values in the context of actions. | ||||
| @@ -90,7 +85,6 @@ class EnvActions: | ||||
|     # Other | ||||
|     # MOVE            = 'move' | ||||
|     NOOP            = 'no_op' | ||||
|     USE_DOOR        = 'use_door' | ||||
|  | ||||
|     _ACTIONMAP = defaultdict(lambda: (0, 0), | ||||
|                             {NORTH: (-1, 0),    NORTHEAST: (-1, 1), | ||||
| @@ -100,6 +94,8 @@ class EnvActions: | ||||
|                              } | ||||
|                             ) | ||||
|  | ||||
|  | ||||
|  | ||||
|     @classmethod | ||||
|     def is_move(cls, action): | ||||
|         """ | ||||
| @@ -166,8 +162,6 @@ class RewardsBase(NamedTuple): | ||||
|     MOVEMENTS_VALID: float = -0.001 | ||||
|     MOVEMENTS_FAIL: float  = -0.05 | ||||
|     NOOP: float            = -0.01 | ||||
|     USE_DOOR_VALID: float  = -0.00 | ||||
|     USE_DOOR_FAIL: float   = -0.01 | ||||
|     COLLISION: float       = -0.5 | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -68,7 +68,7 @@ if __name__ == '__main__': | ||||
|                                       omit_agent_self=True,                  # This is default | ||||
|                                       additional_agent_placeholder=None,     # We will not take care of future agents | ||||
|                                       frames_to_stack=3,                     # To give the agent a notion of time | ||||
|                                       pomdp_r=2                              # the agents view-radius | ||||
|                                       pomdp_r=2                              # the agents' view-radius | ||||
|                                       ) | ||||
|     #  'MovementProperties' are for specifying how the agent is allowed to move in the env. | ||||
|     move_props = MovementProperties(allow_diagonal_movement=True,   # Euclidean style (vertices) | ||||
| @@ -135,7 +135,7 @@ if __name__ == '__main__': | ||||
|             env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10)) | ||||
|  | ||||
|             # Model Init | ||||
|             model = model_class("MlpPolicy", env_factory,verbose=1, seed=seed, device='cpu') | ||||
|             model = model_class("MlpPolicy", env_factory, verbose=1, seed=seed, device='cpu') | ||||
|  | ||||
|             # Model train | ||||
|             model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback]) | ||||
| @@ -166,7 +166,7 @@ if __name__ == '__main__': | ||||
|  | ||||
|         # retrieve model class | ||||
|         model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name) | ||||
|         # Load the agent agent | ||||
|         # Load the agent | ||||
|         model = model_cls.load(policy_path / 'model.zip', device='cpu') | ||||
|         # Load old env kwargs | ||||
|         with next(policy_path.glob(env_params_json)).open('r') as f: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Steffen Illium
					Steffen Illium