mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-07-11 23:42:40 +02:00
Doors are now seperate
This commit is contained in:
38
environments/factory/additional/_template/_collections.py
Normal file
38
environments/factory/additional/_template/_collections.py
Normal file
@ -0,0 +1,38 @@
|
||||
from typing import Union
|
||||
|
||||
from environments.factory.additional.doors.doors_entities import Door
|
||||
from environments.factory.base.registers import EntityCollection
|
||||
|
||||
from environments.factory.additional.doors.doors_util import Constants as c
|
||||
|
||||
|
||||
class Doors(EntityCollection):
|
||||
|
||||
def __init__(self, *args, indicate_area=False, **kwargs):
|
||||
self.indicate_area = indicate_area
|
||||
self._area_marked = False
|
||||
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
|
||||
|
||||
_accepted_objects = Door
|
||||
|
||||
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
|
||||
try:
|
||||
return next(door for door in self if position in door.tile.neighboring_floor_pos)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def tick_doors(self):
|
||||
for door in self:
|
||||
door.tick()
|
||||
|
||||
def as_array(self):
|
||||
if not self._area_marked and self.indicate_area:
|
||||
for door in self:
|
||||
for tile in door.tile.neighboring_floor:
|
||||
if self._individual_slices:
|
||||
pass
|
||||
else:
|
||||
pos = (0, *tile.pos)
|
||||
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
|
||||
self._area_marked = True
|
||||
return super(Doors, self).as_array()
|
71
environments/factory/additional/_template/_entities.py
Normal file
71
environments/factory/additional/_template/_entities.py
Normal file
@ -0,0 +1,71 @@
|
||||
from environments.factory.base.objects import Entity
|
||||
from environments.factory.additional.doors.doors_util import Constants as c
|
||||
|
||||
|
||||
class Template(Entity):
|
||||
"""Template for new Entity"""
|
||||
|
||||
# How to define / override properties
|
||||
@property
|
||||
def is_blocking(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return False if self.template_attr else True
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
# This is important as it shadow is checked by occupation value
|
||||
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
|
||||
|
||||
@property
|
||||
def str_state(self):
|
||||
return 'open' if self.is_open else 'closed'
|
||||
|
||||
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs):
|
||||
super(Template, self).__init__(*args, **kwargs)
|
||||
self._state = c.CLOSED_DOOR
|
||||
self.indicate_area = indicate_area
|
||||
self.auto_close_interval = auto_close_interval
|
||||
self.time_to_close = -1
|
||||
if not closed_on_init:
|
||||
self._open()
|
||||
|
||||
def summarize_state(self):
|
||||
state_dict = super().summarize_state()
|
||||
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
|
||||
return state_dict
|
||||
|
||||
@property
|
||||
def is_closed(self):
|
||||
return self._state == c.CLOSED_DOOR
|
||||
|
||||
@property
|
||||
def is_open(self):
|
||||
return self._state == c.OPEN_DOOR
|
||||
|
||||
@property
|
||||
def status(self):
|
||||
return self._state
|
||||
|
||||
def use(self):
|
||||
if self._state == c.OPEN_DOOR:
|
||||
self._close()
|
||||
else:
|
||||
self._open()
|
||||
|
||||
def tick(self):
|
||||
if self.is_open and len(self.tile) == 1 and self.time_to_close:
|
||||
self.time_to_close -= 1
|
||||
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
|
||||
self.use()
|
||||
|
||||
def _open(self):
|
||||
self._state = c.OPEN_DOOR
|
||||
self._collection.notify_change_to_value(self)
|
||||
self.time_to_close = self.auto_close_interval
|
||||
|
||||
def _close(self):
|
||||
self._state = c.CLOSED_DOOR
|
||||
self._collection.notify_change_to_value(self)
|
31
environments/factory/additional/_template/_util.py
Normal file
31
environments/factory/additional/_template/_util.py
Normal file
@ -0,0 +1,31 @@
|
||||
|
||||
from typing import NamedTuple
|
||||
|
||||
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
|
||||
|
||||
|
||||
class Constants(BaseConstants):
|
||||
DOOR = 'Door' # Identifier of Single-Door Entities.
|
||||
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
|
||||
DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files.
|
||||
|
||||
ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation
|
||||
OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation
|
||||
CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation
|
||||
|
||||
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
|
||||
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
|
||||
# ACCESS_DOOR = 'access' # Identifier to compare access positions
|
||||
|
||||
|
||||
class Actions(BaseActions):
|
||||
USE_DOOR = 'use_door'
|
||||
|
||||
|
||||
class RewardsDoor(NamedTuple):
|
||||
USE_DOOR_VALID: float = -0.00
|
||||
USE_DOOR_FAIL: float = -0.01
|
||||
|
||||
|
||||
class DoorProperties(NamedTuple):
|
||||
indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation.
|
196
environments/factory/additional/_template/factory_template.py
Normal file
196
environments/factory/additional/_template/factory_template.py
Normal file
@ -0,0 +1,196 @@
|
||||
import time
|
||||
from typing import List, Union, Dict
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environments.factory.additional.doors.doors_collections import Doors
|
||||
from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions
|
||||
from environments.factory.base.base_factory import BaseFactory
|
||||
from environments.factory.base.objects import Agent, Action
|
||||
from environments.factory.base.registers import Entities
|
||||
|
||||
from environments import helpers as h
|
||||
|
||||
from environments.factory.base.renderer import RenderEntity
|
||||
from environments.utility_classes import ObservationProperties
|
||||
|
||||
|
||||
def softmax(x):
|
||||
"""Compute softmax values for each sets of scores in x."""
|
||||
e_x = np.exp(x - np.max(x))
|
||||
return e_x / e_x.sum()
|
||||
|
||||
|
||||
def entropy(x):
|
||||
return -(x * np.log(x + 1e-8)).sum()
|
||||
|
||||
|
||||
c = Constants
|
||||
a = Actions
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit, PyAbstractClass
|
||||
class DoorFactory(BaseFactory):
|
||||
|
||||
@property
|
||||
def actions_hook(self) -> Union[Action, List[Action]]:
|
||||
super_actions = super().actions_hook
|
||||
super_actions.append(Action(str_ident=a.USE_DOOR))
|
||||
return super_actions
|
||||
|
||||
@property
|
||||
def entities_hook(self) -> Dict[(str, Entities)]:
|
||||
super_entities = super().entities_hook
|
||||
|
||||
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL)
|
||||
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
|
||||
if np.any(parsed_doors):
|
||||
door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
|
||||
doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area,
|
||||
entity_kwargs=dict()
|
||||
)
|
||||
super_entities.update(({c.DOORS: doors}))
|
||||
return super_entities
|
||||
|
||||
def __init__(self, *args,
|
||||
door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(),
|
||||
env_seed=time.time_ns(), **kwargs):
|
||||
if isinstance(door_properties, dict):
|
||||
door_properties = DoorProperties(**door_properties)
|
||||
if isinstance(rewards_door, dict):
|
||||
rewards_door = RewardsDoor(**rewards_door)
|
||||
self.door_properties = door_properties
|
||||
self.rewards_door = rewards_door
|
||||
self._door_rng = np.random.default_rng(env_seed)
|
||||
self._doors: Doors
|
||||
kwargs.update(env_seed=env_seed)
|
||||
# TODO: Reset ---> document this
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def render_assets_hook(self, mode='human'):
|
||||
additional_assets = super().render_assets_hook()
|
||||
doors = []
|
||||
for i, door in enumerate(self[c.DOORS]):
|
||||
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
|
||||
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
|
||||
additional_assets.extend(doors)
|
||||
return additional_assets
|
||||
|
||||
|
||||
def step_hook(self) -> (List[dict], dict):
|
||||
super_reward_info = super().step_hook()
|
||||
# Step the door close intervall
|
||||
# TODO: Maybe move this to self.post_step_hook? May collide with reward calculation.
|
||||
if doors := self[c.DOORS]:
|
||||
doors.tick_doors()
|
||||
return super_reward_info
|
||||
|
||||
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
|
||||
action_result = super().do_additional_actions(agent, action)
|
||||
if action_result is None:
|
||||
if action == a.USE_DOOR:
|
||||
return self.use_door_action(agent)
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return action_result
|
||||
|
||||
def use_door_action(self, agent: Agent):
|
||||
|
||||
# Check if agent really is standing on a door:
|
||||
door = self[c.DOORS].get_near_position(agent.pos)
|
||||
if door is not None:
|
||||
door.use()
|
||||
valid = c.VALID
|
||||
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
|
||||
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
|
||||
# When he doesn't...
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
|
||||
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
|
||||
|
||||
reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL,
|
||||
reason=a.USE_DOOR, info=info_dict)
|
||||
|
||||
return valid, reward
|
||||
|
||||
def reset_hook(self) -> None:
|
||||
super().reset_hook()
|
||||
# There is nothing to reset.
|
||||
|
||||
def check_additional_done(self) -> (bool, dict):
|
||||
super_done, super_dict = super().check_additional_done()
|
||||
return super_done, super_dict
|
||||
|
||||
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
|
||||
additional_observations = super().observations_hook()
|
||||
|
||||
additional_observations.update({c.DOORS: self[c.DOORS].as_array()})
|
||||
return additional_observations
|
||||
|
||||
def post_step_hook(self) -> List[Dict[str, int]]:
|
||||
super_post_step = super(DoorFactory, self).post_step_hook()
|
||||
return super_post_step
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from environments.utility_classes import AgentRenderOptions as aro
|
||||
render = True
|
||||
|
||||
door_props = DoorProperties(
|
||||
indicate_door_area=True
|
||||
)
|
||||
|
||||
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
|
||||
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True
|
||||
)
|
||||
|
||||
move_props = {'allow_square_movement': True,
|
||||
'allow_diagonal_movement': False,
|
||||
'allow_no_op': False}
|
||||
import time
|
||||
global_timings = []
|
||||
for i in range(10):
|
||||
|
||||
factory = DoorFactory(n_agents=10, done_at_collision=False,
|
||||
level_name='rooms', max_steps=1000,
|
||||
obs_prop=obs_props, parse_doors=True,
|
||||
verbose=True,
|
||||
mv_prop=move_props, dirt_prop=door_props,
|
||||
# inject_agents=[TSPDirtAgent],
|
||||
)
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
n_actions = factory.action_space.n - 1
|
||||
_ = factory.observation_space
|
||||
obs_space = factory.observation_space
|
||||
obs_space_named = factory.named_observation_space
|
||||
action_space_named = factory.named_action_space
|
||||
times = []
|
||||
for epoch in range(10):
|
||||
start_time = time.time()
|
||||
random_actions = [[random.randint(0, n_actions) for _
|
||||
in range(factory.n_agents)] for _
|
||||
in range(factory.max_steps+1)]
|
||||
env_state = factory.reset()
|
||||
if render:
|
||||
factory.render()
|
||||
# tsp_agent = factory.get_injected_agents()[0]
|
||||
|
||||
rwrd = 0
|
||||
for agent_i_action in random_actions:
|
||||
# agent_i_action = tsp_agent.predict()
|
||||
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
|
||||
rwrd += step_rwrd
|
||||
if render:
|
||||
factory.render()
|
||||
if done_bool:
|
||||
break
|
||||
times.append(time.time() - start_time)
|
||||
# print(f'Factory run {epoch} done, reward is:\n {r}')
|
||||
print('Mean Time Taken: ', sum(times) / 10)
|
||||
global_timings.extend(times)
|
||||
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
|
||||
print('Median Time Taken: ', global_timings[len(global_timings)//2])
|
@ -7,9 +7,17 @@ from environments.factory.additional.btry.factory_battery import BatteryFactory
|
||||
from environments.factory.additional.dest.factory_dest import DestFactory
|
||||
from environments.factory.additional.dirt.dirt_util import DirtProperties
|
||||
from environments.factory.additional.dirt.factory_dirt import DirtFactory
|
||||
from environments.factory.additional.doors.factory_doors import DoorFactory
|
||||
from environments.factory.additional.item.factory_item import ItemFactory
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class DoorDirtFactory(DoorFactory, DirtFactory):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class DirtItemFactory(ItemFactory, DirtFactory):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
@ -38,8 +46,6 @@ if __name__ == '__main__':
|
||||
|
||||
render = True
|
||||
|
||||
dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0)
|
||||
|
||||
obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True,
|
||||
pomdp_r=2, additional_agent_placeholder=None)
|
||||
|
||||
@ -47,13 +53,13 @@ if __name__ == '__main__':
|
||||
'allow_diagonal_movement': False,
|
||||
'allow_no_op': False}
|
||||
|
||||
factory = DirtBatteryFactory(n_agents=5, done_at_collision=False,
|
||||
factory = DoorDirtFactory(n_agents=10, done_at_collision=False,
|
||||
level_name='rooms', max_steps=400,
|
||||
obs_prop=obs_props, parse_doors=True,
|
||||
record_episodes=True, verbose=True,
|
||||
btry_prop=BatteryProperties(),
|
||||
mv_prop=move_props, dirt_prop=dirt_props
|
||||
)
|
||||
dirt_prop=DirtProperties(),
|
||||
mv_prop=move_props)
|
||||
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
n_actions = factory.action_space.n - 1
|
||||
|
@ -44,7 +44,7 @@ class DirtFactory(BaseFactory):
|
||||
def entities_hook(self) -> Dict[(str, Entities)]:
|
||||
super_entities = super().entities_hook
|
||||
dirt_register = DirtPiles(self.dirt_prop, self._level_shape)
|
||||
super_entities.update(({c.DIRT: dirt_register}))
|
||||
super_entities.update({c.DIRT: dirt_register})
|
||||
return super_entities
|
||||
|
||||
def __init__(self, *args,
|
||||
|
0
environments/factory/additional/doors/__init__.py
Normal file
0
environments/factory/additional/doors/__init__.py
Normal file
38
environments/factory/additional/doors/doors_collections.py
Normal file
38
environments/factory/additional/doors/doors_collections.py
Normal file
@ -0,0 +1,38 @@
|
||||
from typing import Union
|
||||
|
||||
from environments.factory.additional.doors.doors_entities import Door
|
||||
from environments.factory.base.registers import EntityCollection
|
||||
|
||||
from environments.factory.additional.doors.doors_util import Constants as c
|
||||
|
||||
|
||||
class Doors(EntityCollection):
|
||||
|
||||
def __init__(self, *args, indicate_area=False, **kwargs):
|
||||
self.indicate_area = indicate_area
|
||||
self._area_marked = False
|
||||
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
|
||||
|
||||
_accepted_objects = Door
|
||||
|
||||
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
|
||||
try:
|
||||
return next(door for door in self if position in door.tile.neighboring_floor_pos)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def tick_doors(self):
|
||||
for door in self:
|
||||
door.tick()
|
||||
|
||||
def as_array(self):
|
||||
if not self._area_marked and self.indicate_area:
|
||||
for door in self:
|
||||
for tile in door.tile.neighboring_floor:
|
||||
if self._individual_slices:
|
||||
pass
|
||||
else:
|
||||
pos = (0, *tile.pos)
|
||||
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
|
||||
self._area_marked = True
|
||||
return super(Doors, self).as_array()
|
69
environments/factory/additional/doors/doors_entities.py
Normal file
69
environments/factory/additional/doors/doors_entities.py
Normal file
@ -0,0 +1,69 @@
|
||||
from environments.factory.base.objects import Entity
|
||||
from environments.factory.additional.doors.doors_util import Constants as c
|
||||
|
||||
|
||||
class Door(Entity):
|
||||
|
||||
@property
|
||||
def is_blocking(self):
|
||||
return False if self.is_open else True
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return False if self.is_open else True
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
# This is important as it shadow is checked by occupation value
|
||||
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
|
||||
|
||||
@property
|
||||
def str_state(self):
|
||||
return 'open' if self.is_open else 'closed'
|
||||
|
||||
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs):
|
||||
super(Door, self).__init__(*args, **kwargs)
|
||||
self._state = c.CLOSED_DOOR
|
||||
self.indicate_area = indicate_area
|
||||
self.auto_close_interval = auto_close_interval
|
||||
self.time_to_close = -1
|
||||
if not closed_on_init:
|
||||
self._open()
|
||||
|
||||
def summarize_state(self):
|
||||
state_dict = super().summarize_state()
|
||||
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
|
||||
return state_dict
|
||||
|
||||
@property
|
||||
def is_closed(self):
|
||||
return self._state == c.CLOSED_DOOR
|
||||
|
||||
@property
|
||||
def is_open(self):
|
||||
return self._state == c.OPEN_DOOR
|
||||
|
||||
@property
|
||||
def status(self):
|
||||
return self._state
|
||||
|
||||
def use(self):
|
||||
if self._state == c.OPEN_DOOR:
|
||||
self._close()
|
||||
else:
|
||||
self._open()
|
||||
|
||||
def tick(self):
|
||||
if self.is_open and len(self.tile) == 1 and self.time_to_close:
|
||||
self.time_to_close -= 1
|
||||
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
|
||||
self.use()
|
||||
|
||||
def _open(self):
|
||||
self._state = c.OPEN_DOOR
|
||||
self._collection.notify_change_to_value(self)
|
||||
self.time_to_close = self.auto_close_interval
|
||||
|
||||
def _close(self):
|
||||
self._state = c.CLOSED_DOOR
|
||||
self._collection.notify_change_to_value(self)
|
31
environments/factory/additional/doors/doors_util.py
Normal file
31
environments/factory/additional/doors/doors_util.py
Normal file
@ -0,0 +1,31 @@
|
||||
|
||||
from typing import NamedTuple
|
||||
|
||||
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
|
||||
|
||||
|
||||
class Constants(BaseConstants):
|
||||
DOOR = 'Door' # Identifier of Single-Door Entities.
|
||||
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
|
||||
DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files.
|
||||
|
||||
ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation
|
||||
OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation
|
||||
CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation
|
||||
|
||||
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
|
||||
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
|
||||
# ACCESS_DOOR = 'access' # Identifier to compare access positions
|
||||
|
||||
|
||||
class Actions(BaseActions):
|
||||
USE_DOOR = 'use_door'
|
||||
|
||||
|
||||
class RewardsDoor(NamedTuple):
|
||||
USE_DOOR_VALID: float = -0.00
|
||||
USE_DOOR_FAIL: float = -0.01
|
||||
|
||||
|
||||
class DoorProperties(NamedTuple):
|
||||
indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation.
|
196
environments/factory/additional/doors/factory_doors.py
Normal file
196
environments/factory/additional/doors/factory_doors.py
Normal file
@ -0,0 +1,196 @@
|
||||
import time
|
||||
from typing import List, Union, Dict
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environments.factory.additional.doors.doors_collections import Doors
|
||||
from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions
|
||||
from environments.factory.base.base_factory import BaseFactory
|
||||
from environments.factory.base.objects import Agent, Action
|
||||
from environments.factory.base.registers import Entities
|
||||
|
||||
from environments import helpers as h
|
||||
|
||||
from environments.factory.base.renderer import RenderEntity
|
||||
from environments.utility_classes import ObservationProperties
|
||||
|
||||
|
||||
def softmax(x):
|
||||
"""Compute softmax values for each sets of scores in x."""
|
||||
e_x = np.exp(x - np.max(x))
|
||||
return e_x / e_x.sum()
|
||||
|
||||
|
||||
def entropy(x):
|
||||
return -(x * np.log(x + 1e-8)).sum()
|
||||
|
||||
|
||||
c = Constants
|
||||
a = Actions
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit, PyAbstractClass
|
||||
class DoorFactory(BaseFactory):
|
||||
|
||||
@property
|
||||
def actions_hook(self) -> Union[Action, List[Action]]:
|
||||
super_actions = super().actions_hook
|
||||
super_actions.append(Action(str_ident=a.USE_DOOR))
|
||||
return super_actions
|
||||
|
||||
@property
|
||||
def entities_hook(self) -> Dict[(str, Entities)]:
|
||||
super_entities = super().entities_hook
|
||||
|
||||
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL)
|
||||
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
|
||||
if np.any(parsed_doors):
|
||||
door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
|
||||
doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area,
|
||||
entity_kwargs=dict()
|
||||
)
|
||||
super_entities.update(({c.DOORS: doors}))
|
||||
return super_entities
|
||||
|
||||
def __init__(self, *args,
|
||||
door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(),
|
||||
env_seed=time.time_ns(), **kwargs):
|
||||
if isinstance(door_properties, dict):
|
||||
door_properties = DoorProperties(**door_properties)
|
||||
if isinstance(rewards_door, dict):
|
||||
rewards_door = RewardsDoor(**rewards_door)
|
||||
self.door_properties = door_properties
|
||||
self.rewards_door = rewards_door
|
||||
self._door_rng = np.random.default_rng(env_seed)
|
||||
self._doors: Doors
|
||||
kwargs.update(env_seed=env_seed)
|
||||
# TODO: Reset ---> document this
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def render_assets_hook(self, mode='human'):
|
||||
additional_assets = super().render_assets_hook()
|
||||
doors = []
|
||||
for i, door in enumerate(self[c.DOORS]):
|
||||
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
|
||||
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
|
||||
additional_assets.extend(doors)
|
||||
return additional_assets
|
||||
|
||||
|
||||
def step_hook(self) -> (List[dict], dict):
|
||||
super_reward_info = super().step_hook()
|
||||
# Step the door close intervall
|
||||
# TODO: Maybe move this to self.post_step_hook? May collide with reward calculation.
|
||||
if doors := self[c.DOORS]:
|
||||
doors.tick_doors()
|
||||
return super_reward_info
|
||||
|
||||
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
|
||||
action_result = super().do_additional_actions(agent, action)
|
||||
if action_result is None:
|
||||
if action == a.USE_DOOR:
|
||||
return self.use_door_action(agent)
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return action_result
|
||||
|
||||
def use_door_action(self, agent: Agent):
|
||||
|
||||
# Check if agent really is standing on a door:
|
||||
door = self[c.DOORS].get_near_position(agent.pos)
|
||||
if door is not None:
|
||||
door.use()
|
||||
valid = c.VALID
|
||||
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
|
||||
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
|
||||
# When he doesn't...
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
|
||||
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
|
||||
|
||||
reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL,
|
||||
reason=a.USE_DOOR, info=info_dict)
|
||||
|
||||
return valid, reward
|
||||
|
||||
def reset_hook(self) -> None:
|
||||
super().reset_hook()
|
||||
# There is nothing to reset.
|
||||
|
||||
def check_additional_done(self) -> (bool, dict):
|
||||
super_done, super_dict = super().check_additional_done()
|
||||
return super_done, super_dict
|
||||
|
||||
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
|
||||
additional_observations = super().observations_hook()
|
||||
|
||||
additional_observations.update({c.DOORS: self[c.DOORS].as_array()})
|
||||
return additional_observations
|
||||
|
||||
def post_step_hook(self) -> List[Dict[str, int]]:
|
||||
super_post_step = super(DoorFactory, self).post_step_hook()
|
||||
return super_post_step
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from environments.utility_classes import AgentRenderOptions as aro
|
||||
render = True
|
||||
|
||||
door_props = DoorProperties(
|
||||
indicate_door_area=True
|
||||
)
|
||||
|
||||
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
|
||||
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True
|
||||
)
|
||||
|
||||
move_props = {'allow_square_movement': True,
|
||||
'allow_diagonal_movement': False,
|
||||
'allow_no_op': False}
|
||||
import time
|
||||
global_timings = []
|
||||
for i in range(10):
|
||||
|
||||
factory = DoorFactory(n_agents=10, done_at_collision=False,
|
||||
level_name='rooms', max_steps=1000,
|
||||
obs_prop=obs_props, parse_doors=True,
|
||||
verbose=True,
|
||||
mv_prop=move_props, dirt_prop=door_props,
|
||||
# inject_agents=[TSPDirtAgent],
|
||||
)
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
n_actions = factory.action_space.n - 1
|
||||
_ = factory.observation_space
|
||||
obs_space = factory.observation_space
|
||||
obs_space_named = factory.named_observation_space
|
||||
action_space_named = factory.named_action_space
|
||||
times = []
|
||||
for epoch in range(10):
|
||||
start_time = time.time()
|
||||
random_actions = [[random.randint(0, n_actions) for _
|
||||
in range(factory.n_agents)] for _
|
||||
in range(factory.max_steps+1)]
|
||||
env_state = factory.reset()
|
||||
if render:
|
||||
factory.render()
|
||||
# tsp_agent = factory.get_injected_agents()[0]
|
||||
|
||||
rwrd = 0
|
||||
for agent_i_action in random_actions:
|
||||
# agent_i_action = tsp_agent.predict()
|
||||
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
|
||||
rwrd += step_rwrd
|
||||
if render:
|
||||
factory.render()
|
||||
if done_bool:
|
||||
break
|
||||
times.append(time.time() - start_time)
|
||||
# print(f'Factory run {epoch} done, reward is:\n {r}')
|
||||
print('Mean Time Taken: ', sum(times) / 10)
|
||||
global_timings.extend(times)
|
||||
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
|
||||
print('Median Time Taken: ', global_timings[len(global_timings)//2])
|
@ -16,7 +16,7 @@ from environments.helpers import Constants as c
|
||||
from environments.helpers import EnvActions as a
|
||||
from environments.helpers import RewardsBase
|
||||
from environments.factory.base.objects import Agent, Floor, Action
|
||||
from environments.factory.base.registers import Actions, Entities, Agents, Doors, Floors, Walls, PlaceHolders, \
|
||||
from environments.factory.base.registers import Actions, Entities, Agents, Floors, Walls, PlaceHolders, \
|
||||
GlobalPositions
|
||||
from environments.utility_classes import MovementProperties, ObservationProperties, MarlFrameStack
|
||||
from environments.utility_classes import AgentRenderOptions as a_obs
|
||||
@ -88,8 +88,8 @@ class BaseFactory(gym.Env):
|
||||
mv_prop: MovementProperties = MovementProperties(),
|
||||
obs_prop: ObservationProperties = ObservationProperties(),
|
||||
rewards_base: RewardsBase = RewardsBase(),
|
||||
parse_doors=False, done_at_collision=False, inject_agents: Union[None, List] = None,
|
||||
verbose=False, doors_have_area=True, env_seed=time.time_ns(), individual_rewards=False,
|
||||
done_at_collision=False, inject_agents: Union[None, List] = None,
|
||||
verbose=False, env_seed=time.time_ns(), individual_rewards=False,
|
||||
class_name='', **kwargs):
|
||||
|
||||
if class_name:
|
||||
@ -105,8 +105,6 @@ class BaseFactory(gym.Env):
|
||||
assert obs_prop.frames_to_stack != 1 and \
|
||||
obs_prop.frames_to_stack >= 0, \
|
||||
"'frames_to_stack' cannot be negative or 1."
|
||||
assert doors_have_area or not obs_prop.indicate_door_area, \
|
||||
'"indicate_door_area" can only active, when "doors_have_area"'
|
||||
if kwargs:
|
||||
print(f'Following kwargs were passed, but ignored: {kwargs}')
|
||||
|
||||
@ -133,9 +131,7 @@ class BaseFactory(gym.Env):
|
||||
|
||||
self.done_at_collision = done_at_collision
|
||||
self._record_episodes = False
|
||||
self.parse_doors = parse_doors
|
||||
self._injected_agents = inject_agents or []
|
||||
self.doors_have_area = doors_have_area
|
||||
self.individual_rewards = individual_rewards
|
||||
|
||||
# TODO: Reset ---> document this
|
||||
@ -174,20 +170,9 @@ class BaseFactory(gym.Env):
|
||||
# NOPOS
|
||||
self._NO_POS_TILE = Floor(c.NO_POS, None)
|
||||
|
||||
# Doors
|
||||
if self.parse_doors:
|
||||
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR)
|
||||
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
|
||||
if np.any(parsed_doors):
|
||||
door_tiles = [floor.by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
|
||||
doors = Doors.from_tiles(door_tiles, self._level_shape, have_area=self.obs_prop.indicate_door_area,
|
||||
entity_kwargs=dict(context=floor)
|
||||
)
|
||||
self._entities.add_additional_items({c.DOORS: doors})
|
||||
|
||||
# Actions
|
||||
# TODO: Move this to Agent init, so that agents can have individual action sets.
|
||||
self._actions = Actions(self.mv_prop, can_use_doors=self.parse_doors)
|
||||
self._actions = Actions(self.mv_prop)
|
||||
if additional_actions := self.actions_hook:
|
||||
self._actions.add_additional_items(additional_actions)
|
||||
|
||||
@ -263,8 +248,6 @@ class BaseFactory(gym.Env):
|
||||
elif a.NOOP == action_obj:
|
||||
action_valid = c.VALID
|
||||
reward = dict(value=self.rewards_base.NOOP, reason=a.NOOP, info={f'{agent.name}_NOOP': 1, 'NOOP': 1})
|
||||
elif a.USE_DOOR == action_obj:
|
||||
action_valid, reward = self._handle_door_interaction(agent)
|
||||
else:
|
||||
# noinspection PyTupleAssignmentBalance
|
||||
action_valid, reward = self.do_additional_actions(agent, action_obj)
|
||||
@ -282,11 +265,8 @@ class BaseFactory(gym.Env):
|
||||
for tile in tiles_with_collisions:
|
||||
guests = tile.guests_that_can_collide
|
||||
for i, guest in enumerate(guests):
|
||||
# This does make a copy, but is faster than.copy()
|
||||
this_collisions = guests[:]
|
||||
del this_collisions[i]
|
||||
assert hasattr(guest, 'step_result')
|
||||
for collision in this_collisions:
|
||||
for j, collision in enumerate(guests):
|
||||
if j != i and hasattr(guest, 'step_result'):
|
||||
guest.step_result['collisions'].append(collision)
|
||||
|
||||
done = False
|
||||
@ -299,11 +279,6 @@ class BaseFactory(gym.Env):
|
||||
done = done or additional_done
|
||||
info.update(additional_done_info)
|
||||
|
||||
# Step the door close intervall
|
||||
if self.parse_doors:
|
||||
if doors := self[c.DOORS]:
|
||||
doors.tick_doors()
|
||||
|
||||
# Finalize
|
||||
reward, reward_info = self.build_reward_result(rewards)
|
||||
|
||||
@ -319,41 +294,14 @@ class BaseFactory(gym.Env):
|
||||
info.update(post_step_info)
|
||||
|
||||
obs, _ = self._build_observations()
|
||||
|
||||
return obs, reward, done, info
|
||||
|
||||
def _handle_door_interaction(self, agent) -> (bool, dict):
|
||||
if doors := self[c.DOORS]:
|
||||
# Check if agent really is standing on a door:
|
||||
if self.doors_have_area:
|
||||
door = doors.get_near_position(agent.pos)
|
||||
else:
|
||||
door = doors.by_pos(agent.pos)
|
||||
if door is not None:
|
||||
door.use()
|
||||
valid = c.VALID
|
||||
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
|
||||
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
|
||||
# When he doesn't...
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
|
||||
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
|
||||
|
||||
else:
|
||||
raise RuntimeError('This should not happen, since the door action should not be available.')
|
||||
reward = dict(value=self.rewards_base.USE_DOOR_VALID if valid else self.rewards_base.USE_DOOR_FAIL,
|
||||
reason=a.USE_DOOR, info=info_dict)
|
||||
|
||||
return valid, reward
|
||||
|
||||
def _build_observations(self) -> np.typing.ArrayLike:
|
||||
# Observation dict:
|
||||
per_agent_expl_idx = dict()
|
||||
per_agent_obsn = dict()
|
||||
# Generel Observations
|
||||
lvl_obs = self[c.WALLS].as_array()
|
||||
door_obs = self[c.DOORS].as_array() if self.parse_doors else None
|
||||
if self.obs_prop.render_agents == a_obs.NOT:
|
||||
global_agent_obs = None
|
||||
elif self.obs_prop.omit_agent_self and self.n_agents == 1:
|
||||
@ -391,8 +339,6 @@ class BaseFactory(gym.Env):
|
||||
obs_dict[c.AGENT] = agent_obs[:]
|
||||
if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None:
|
||||
obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs
|
||||
if self.parse_doors and door_obs is not None:
|
||||
obs_dict[c.DOORS] = door_obs[:]
|
||||
obs_dict.update(add_obs_dict)
|
||||
obsn = np.vstack(list(obs_dict.values()))
|
||||
if self.obs_prop.pomdp_r:
|
||||
@ -430,33 +376,11 @@ class BaseFactory(gym.Env):
|
||||
raise e
|
||||
|
||||
obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL
|
||||
door_shadowing = False
|
||||
if self.parse_doors:
|
||||
if doors := self[c.DOORS]:
|
||||
if door := doors.by_pos(agent.pos):
|
||||
if door.is_closed:
|
||||
for group in door.connectivity_subgroups:
|
||||
if agent.last_pos not in group:
|
||||
door_shadowing = True
|
||||
if self._pomdp_r:
|
||||
blocking = [
|
||||
tuple(np.subtract(x, agent.pos) + (self._pomdp_r, self._pomdp_r))
|
||||
for x in group]
|
||||
xs, ys = zip(*blocking)
|
||||
else:
|
||||
xs, ys = zip(*group)
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
obs_block_light[:, xs, ys] = False
|
||||
|
||||
light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int).squeeze())
|
||||
if self._pomdp_r:
|
||||
light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape))
|
||||
else:
|
||||
light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
|
||||
if door_shadowing:
|
||||
# noinspection PyUnboundLocalVariable
|
||||
light_block_map[xs, ys] = 0
|
||||
|
||||
agent.step_result['lightmap'] = light_block_map
|
||||
|
||||
@ -550,35 +474,13 @@ class BaseFactory(gym.Env):
|
||||
y_new = agent.y + y_diff
|
||||
|
||||
new_tile = self[c.FLOOR].by_pos((x_new, y_new))
|
||||
if new_tile:
|
||||
if new_tile and not np.any([x.is_blocking for x in new_tile.guests]):
|
||||
valid = c.VALID
|
||||
else:
|
||||
tile = agent.tile
|
||||
valid = c.VALID
|
||||
return tile, valid
|
||||
|
||||
if self.parse_doors and agent.last_pos != c.NO_POS:
|
||||
if doors := self[c.DOORS]:
|
||||
if self.doors_have_area:
|
||||
if door := doors.by_pos(new_tile.pos):
|
||||
if door.is_closed:
|
||||
return agent.tile, c.NOT_VALID
|
||||
else: # door.is_closed:
|
||||
pass
|
||||
|
||||
if door := doors.by_pos(agent.pos):
|
||||
if door.is_open:
|
||||
pass
|
||||
else: # door.is_closed:
|
||||
if door.is_linked(agent.last_pos, new_tile.pos):
|
||||
pass
|
||||
else:
|
||||
return agent.tile, c.NOT_VALID
|
||||
else:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
return new_tile, valid
|
||||
|
||||
def build_reward_result(self, global_env_rewards: list) -> (int, dict):
|
||||
@ -649,14 +551,10 @@ class BaseFactory(gym.Env):
|
||||
for i, agent in enumerate(self[c.AGENT]):
|
||||
name, state = h.asset_str(agent)
|
||||
agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.step_result['lightmap']))
|
||||
doors = []
|
||||
if self.parse_doors:
|
||||
for i, door in enumerate(self[c.DOORS]):
|
||||
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
|
||||
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
|
||||
|
||||
additional_assets = self.render_assets_hook()
|
||||
|
||||
return self._renderer.render(walls + doors + additional_assets + agents)
|
||||
return self._renderer.render(walls + additional_assets + agents)
|
||||
|
||||
def save_params(self, filepath: Path):
|
||||
# noinspection PyProtectedMember
|
||||
|
@ -1,12 +1,10 @@
|
||||
from collections import defaultdict
|
||||
from typing import Union
|
||||
from typing import Union, List
|
||||
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
|
||||
from environments import helpers as h
|
||||
from environments.helpers import Constants as c
|
||||
import itertools
|
||||
|
||||
##########################################################################
|
||||
# ##################### Base Object Building Blocks ######################### #
|
||||
@ -88,6 +86,10 @@ class EnvObject(Object):
|
||||
class Entity(EnvObject):
|
||||
"""Full Env Entity that lives on the env Grid. Doors, Items, DirtPile etc..."""
|
||||
|
||||
@property
|
||||
def is_blocking(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return False
|
||||
@ -226,6 +228,21 @@ class GlobalPosition(BoundingMixin, EnvObject):
|
||||
|
||||
class Floor(EnvObject):
|
||||
|
||||
@property
|
||||
def neighboring_floor_pos(self):
|
||||
return [x.pos for x in self.neighboring_floor]
|
||||
|
||||
@property
|
||||
def neighboring_floor(self):
|
||||
if self._neighboring_floor:
|
||||
pass
|
||||
else:
|
||||
self._neighboring_floor = [x for x in [self._collection.by_pos(np.add(self.pos, pos))
|
||||
for pos in h.POS_MASK.reshape(-1, 2)
|
||||
if not np.all(pos == [0, 0])]
|
||||
if x]
|
||||
return self._neighboring_floor
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return c.FREE_CELL
|
||||
@ -254,6 +271,7 @@ class Floor(EnvObject):
|
||||
super(Floor, self).__init__(*args, **kwargs)
|
||||
self._guests = dict()
|
||||
self._pos = tuple(pos)
|
||||
self._neighboring_floor: List[Floor] = list()
|
||||
|
||||
def __len__(self):
|
||||
return len(self._guests)
|
||||
@ -298,94 +316,6 @@ class Wall(Floor):
|
||||
pass
|
||||
|
||||
|
||||
class Door(Entity):
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
if self.has_area:
|
||||
return False if self.is_open else True
|
||||
else:
|
||||
return False
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
# This is important as it shadow is checked by occupation value
|
||||
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
|
||||
|
||||
@property
|
||||
def str_state(self):
|
||||
return 'open' if self.is_open else 'closed'
|
||||
|
||||
@property
|
||||
def access_area(self):
|
||||
return [node for node in self.connectivity.nodes
|
||||
if node not in range(len(self.connectivity_subgroups)) and node != self.pos]
|
||||
|
||||
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False, **kwargs):
|
||||
super(Door, self).__init__(*args, **kwargs)
|
||||
self._state = c.CLOSED_DOOR
|
||||
self.has_area = has_area
|
||||
self.auto_close_interval = auto_close_interval
|
||||
self.time_to_close = -1
|
||||
neighbor_pos = list(itertools.product([-1, 1, 0], repeat=2))[:-1]
|
||||
neighbor_tiles = [context.by_pos(tuple([sum(x) for x in zip(self.pos, diff)])) for diff in neighbor_pos]
|
||||
neighbor_pos = [x.pos for x in neighbor_tiles if x]
|
||||
self.connectivity = h.points_to_graph(neighbor_pos)
|
||||
self.connectivity_subgroups = list(nx.algorithms.components.connected_components(self.connectivity))
|
||||
for idx, group in enumerate(self.connectivity_subgroups):
|
||||
for tile_pos in group:
|
||||
self.connectivity.add_edge(tile_pos, idx)
|
||||
if not closed_on_init:
|
||||
self._open()
|
||||
|
||||
def summarize_state(self):
|
||||
state_dict = super().summarize_state()
|
||||
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
|
||||
return state_dict
|
||||
|
||||
@property
|
||||
def is_closed(self):
|
||||
return self._state == c.CLOSED_DOOR
|
||||
|
||||
@property
|
||||
def is_open(self):
|
||||
return self._state == c.OPEN_DOOR
|
||||
|
||||
@property
|
||||
def status(self):
|
||||
return self._state
|
||||
|
||||
def use(self):
|
||||
if self._state == c.OPEN_DOOR:
|
||||
self._close()
|
||||
else:
|
||||
self._open()
|
||||
|
||||
def tick(self):
|
||||
if self.is_open and len(self.tile) == 1 and self.time_to_close:
|
||||
self.time_to_close -= 1
|
||||
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
|
||||
self.use()
|
||||
|
||||
def _open(self):
|
||||
self.connectivity.add_edges_from([(self.pos, x) for x in range(len(self.connectivity_subgroups))])
|
||||
self._state = c.OPEN_DOOR
|
||||
self._collection.notify_change_to_value(self)
|
||||
self.time_to_close = self.auto_close_interval
|
||||
|
||||
def _close(self):
|
||||
self.connectivity.remove_node(self.pos)
|
||||
self._state = c.CLOSED_DOOR
|
||||
self._collection.notify_change_to_value(self)
|
||||
|
||||
def is_linked(self, old_pos, new_pos):
|
||||
try:
|
||||
_ = nx.shortest_path(self.connectivity, old_pos, new_pos)
|
||||
return True
|
||||
except nx.exception.NetworkXNoPath:
|
||||
return False
|
||||
|
||||
|
||||
class Agent(MoveableEntity):
|
||||
|
||||
@property
|
||||
|
@ -6,7 +6,7 @@ from typing import List, Union, Dict, Tuple
|
||||
import numpy as np
|
||||
import six
|
||||
|
||||
from environments.factory.base.objects import Entity, Floor, Agent, Door, Action, Wall, PlaceHolder, GlobalPosition, \
|
||||
from environments.factory.base.objects import Entity, Floor, Agent, Action, Wall, PlaceHolder, GlobalPosition, \
|
||||
Object, EnvObject
|
||||
from environments.utility_classes import MovementProperties
|
||||
from environments import helpers as h
|
||||
@ -452,38 +452,6 @@ class Agents(MovingEntityObjectCollection):
|
||||
self._collection[agent.name] = agent
|
||||
|
||||
|
||||
class Doors(EntityCollection):
|
||||
|
||||
def __init__(self, *args, have_area: bool = False, **kwargs):
|
||||
self.have_area = have_area
|
||||
self._area_marked = False
|
||||
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
|
||||
|
||||
_accepted_objects = Door
|
||||
|
||||
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
|
||||
try:
|
||||
return next(door for door in self if position in door.access_area)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def tick_doors(self):
|
||||
for door in self:
|
||||
door.tick()
|
||||
|
||||
def as_array(self):
|
||||
if self.have_area and not self._area_marked:
|
||||
for door in self:
|
||||
for pos in door.access_area:
|
||||
if self._individual_slices:
|
||||
pass
|
||||
else:
|
||||
pos = (0, *pos)
|
||||
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
|
||||
self._area_marked = True
|
||||
return super(Doors, self).as_array()
|
||||
|
||||
|
||||
class Actions(ObjectCollection):
|
||||
_accepted_objects = Action
|
||||
|
||||
@ -492,11 +460,10 @@ class Actions(ObjectCollection):
|
||||
return self._movement_actions
|
||||
|
||||
# noinspection PyTypeChecker
|
||||
def __init__(self, movement_properties: MovementProperties, can_use_doors=False):
|
||||
def __init__(self, movement_properties: MovementProperties):
|
||||
self.allow_no_op = movement_properties.allow_no_op
|
||||
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
|
||||
self.allow_square_movement = movement_properties.allow_square_movement
|
||||
self.can_use_doors = can_use_doors
|
||||
super(Actions, self).__init__()
|
||||
|
||||
# Move this to Baseclass, Env init?
|
||||
@ -507,8 +474,6 @@ class Actions(ObjectCollection):
|
||||
self.add_additional_items([self._accepted_objects(str_ident=direction)
|
||||
for direction in h.EnvActions.diagonal_move()])
|
||||
self._movement_actions = self._collection.copy()
|
||||
if self.can_use_doors:
|
||||
self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.USE_DOOR)])
|
||||
if self.allow_no_op:
|
||||
self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.NOOP)])
|
||||
|
||||
|
@ -33,6 +33,10 @@ IGNORED_DF_COLUMNS = ['Episode', 'Run', # For plotting, which values ar
|
||||
'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count', 'terminal_observation',
|
||||
'episode']
|
||||
|
||||
POS_MASK = np.asarray([[[-1, -1], [0, -1], [1, -1]],
|
||||
[[-1, 0], [0, 0], [1, 0]],
|
||||
[[-1, 1], [0, 1], [1, 1]]])
|
||||
|
||||
|
||||
class Constants:
|
||||
|
||||
@ -42,12 +46,10 @@ class Constants:
|
||||
"""
|
||||
|
||||
WALL = '#' # Wall tile identifier for resolving the string based map files.
|
||||
DOOR = 'D' # Door identifier for resolving the string based map files.
|
||||
DANGER_ZONE = 'x' # Dange Zone tile identifier for resolving the string based map files.
|
||||
|
||||
WALLS = 'Walls' # Identifier of Wall-objects and sets (collections).
|
||||
FLOOR = 'Floor' # Identifier of Floor-objects and sets (collections).
|
||||
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
|
||||
LEVEL = 'Level' # Identifier of Level-objects and sets (collections).
|
||||
AGENT = 'Agent' # Identifier of Agent-objects and sets (collections).
|
||||
AGENT_PLACEHOLDER = 'AGENT_PLACEHOLDER' # Identifier of Placeholder-objects and sets (collections).
|
||||
@ -56,16 +58,9 @@ class Constants:
|
||||
FREE_CELL = 0 # Free-Cell value used in observation
|
||||
OCCUPIED_CELL = 1 # Occupied-Cell value used in observation
|
||||
SHADOWED_CELL = -1 # Shadowed-Cell value used in observation
|
||||
ACCESS_DOOR_CELL = 1/3 # Access-door-Cell value used in observation
|
||||
OPEN_DOOR_CELL = 2/3 # Open-door-Cell value used in observation
|
||||
CLOSED_DOOR_CELL = 3/3 # Closed-door-Cell value used in observation
|
||||
|
||||
NO_POS = (-9999, -9999) # Invalid Position value used in the environment (something is off-grid)
|
||||
|
||||
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
|
||||
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
|
||||
# ACCESS_DOOR = 'access' # Identifier to compare access positions
|
||||
|
||||
ACTION = 'action' # Identifier of Action-objects and sets (collections).
|
||||
COLLISION = 'collision' # Identifier to use in the context of collitions.
|
||||
VALID = True # Identifier to rename boolean values in the context of actions.
|
||||
@ -90,7 +85,6 @@ class EnvActions:
|
||||
# Other
|
||||
# MOVE = 'move'
|
||||
NOOP = 'no_op'
|
||||
USE_DOOR = 'use_door'
|
||||
|
||||
_ACTIONMAP = defaultdict(lambda: (0, 0),
|
||||
{NORTH: (-1, 0), NORTHEAST: (-1, 1),
|
||||
@ -100,6 +94,8 @@ class EnvActions:
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
|
||||
@classmethod
|
||||
def is_move(cls, action):
|
||||
"""
|
||||
@ -166,8 +162,6 @@ class RewardsBase(NamedTuple):
|
||||
MOVEMENTS_VALID: float = -0.001
|
||||
MOVEMENTS_FAIL: float = -0.05
|
||||
NOOP: float = -0.01
|
||||
USE_DOOR_VALID: float = -0.00
|
||||
USE_DOOR_FAIL: float = -0.01
|
||||
COLLISION: float = -0.5
|
||||
|
||||
|
||||
|
@ -68,7 +68,7 @@ if __name__ == '__main__':
|
||||
omit_agent_self=True, # This is default
|
||||
additional_agent_placeholder=None, # We will not take care of future agents
|
||||
frames_to_stack=3, # To give the agent a notion of time
|
||||
pomdp_r=2 # the agents view-radius
|
||||
pomdp_r=2 # the agents' view-radius
|
||||
)
|
||||
# 'MovementProperties' are for specifying how the agent is allowed to move in the env.
|
||||
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
|
||||
@ -166,7 +166,7 @@ if __name__ == '__main__':
|
||||
|
||||
# retrieve model class
|
||||
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
|
||||
# Load the agent agent
|
||||
# Load the agent
|
||||
model = model_cls.load(policy_path / 'model.zip', device='cpu')
|
||||
# Load old env kwargs
|
||||
with next(policy_path.glob(env_params_json)).open('r') as f:
|
||||
|
Reference in New Issue
Block a user