Doors are now seperate

This commit is contained in:
Steffen Illium
2023-05-08 10:26:05 +02:00
parent 6c2df735d4
commit a08ae73656
17 changed files with 725 additions and 262 deletions

View File

@ -0,0 +1,38 @@
from typing import Union
from environments.factory.additional.doors.doors_entities import Door
from environments.factory.base.registers import EntityCollection
from environments.factory.additional.doors.doors_util import Constants as c
class Doors(EntityCollection):
def __init__(self, *args, indicate_area=False, **kwargs):
self.indicate_area = indicate_area
self._area_marked = False
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
_accepted_objects = Door
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
try:
return next(door for door in self if position in door.tile.neighboring_floor_pos)
except StopIteration:
return None
def tick_doors(self):
for door in self:
door.tick()
def as_array(self):
if not self._area_marked and self.indicate_area:
for door in self:
for tile in door.tile.neighboring_floor:
if self._individual_slices:
pass
else:
pos = (0, *tile.pos)
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
self._area_marked = True
return super(Doors, self).as_array()

View File

@ -0,0 +1,71 @@
from environments.factory.base.objects import Entity
from environments.factory.additional.doors.doors_util import Constants as c
class Template(Entity):
"""Template for new Entity"""
# How to define / override properties
@property
def is_blocking(self):
return False
@property
def can_collide(self):
return False if self.template_attr else True
@property
def encoding(self):
# This is important as it shadow is checked by occupation value
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
@property
def str_state(self):
return 'open' if self.is_open else 'closed'
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs):
super(Template, self).__init__(*args, **kwargs)
self._state = c.CLOSED_DOOR
self.indicate_area = indicate_area
self.auto_close_interval = auto_close_interval
self.time_to_close = -1
if not closed_on_init:
self._open()
def summarize_state(self):
state_dict = super().summarize_state()
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
return state_dict
@property
def is_closed(self):
return self._state == c.CLOSED_DOOR
@property
def is_open(self):
return self._state == c.OPEN_DOOR
@property
def status(self):
return self._state
def use(self):
if self._state == c.OPEN_DOOR:
self._close()
else:
self._open()
def tick(self):
if self.is_open and len(self.tile) == 1 and self.time_to_close:
self.time_to_close -= 1
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
self.use()
def _open(self):
self._state = c.OPEN_DOOR
self._collection.notify_change_to_value(self)
self.time_to_close = self.auto_close_interval
def _close(self):
self._state = c.CLOSED_DOOR
self._collection.notify_change_to_value(self)

View File

@ -0,0 +1,31 @@
from typing import NamedTuple
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
class Constants(BaseConstants):
DOOR = 'Door' # Identifier of Single-Door Entities.
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files.
ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation
OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation
CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
# ACCESS_DOOR = 'access' # Identifier to compare access positions
class Actions(BaseActions):
USE_DOOR = 'use_door'
class RewardsDoor(NamedTuple):
USE_DOOR_VALID: float = -0.00
USE_DOOR_FAIL: float = -0.01
class DoorProperties(NamedTuple):
indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation.

View File

@ -0,0 +1,196 @@
import time
from typing import List, Union, Dict
import random
import numpy as np
from environments.factory.additional.doors.doors_collections import Doors
from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action
from environments.factory.base.registers import Entities
from environments import helpers as h
from environments.factory.base.renderer import RenderEntity
from environments.utility_classes import ObservationProperties
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum()
def entropy(x):
return -(x * np.log(x + 1e-8)).sum()
c = Constants
a = Actions
# noinspection PyAttributeOutsideInit, PyAbstractClass
class DoorFactory(BaseFactory):
@property
def actions_hook(self) -> Union[Action, List[Action]]:
super_actions = super().actions_hook
super_actions.append(Action(str_ident=a.USE_DOOR))
return super_actions
@property
def entities_hook(self) -> Dict[(str, Entities)]:
super_entities = super().entities_hook
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL)
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
if np.any(parsed_doors):
door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area,
entity_kwargs=dict()
)
super_entities.update(({c.DOORS: doors}))
return super_entities
def __init__(self, *args,
door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(),
env_seed=time.time_ns(), **kwargs):
if isinstance(door_properties, dict):
door_properties = DoorProperties(**door_properties)
if isinstance(rewards_door, dict):
rewards_door = RewardsDoor(**rewards_door)
self.door_properties = door_properties
self.rewards_door = rewards_door
self._door_rng = np.random.default_rng(env_seed)
self._doors: Doors
kwargs.update(env_seed=env_seed)
# TODO: Reset ---> document this
super().__init__(*args, **kwargs)
def render_assets_hook(self, mode='human'):
additional_assets = super().render_assets_hook()
doors = []
for i, door in enumerate(self[c.DOORS]):
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
additional_assets.extend(doors)
return additional_assets
def step_hook(self) -> (List[dict], dict):
super_reward_info = super().step_hook()
# Step the door close intervall
# TODO: Maybe move this to self.post_step_hook? May collide with reward calculation.
if doors := self[c.DOORS]:
doors.tick_doors()
return super_reward_info
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
action_result = super().do_additional_actions(agent, action)
if action_result is None:
if action == a.USE_DOOR:
return self.use_door_action(agent)
else:
return None
else:
return action_result
def use_door_action(self, agent: Agent):
# Check if agent really is standing on a door:
door = self[c.DOORS].get_near_position(agent.pos)
if door is not None:
door.use()
valid = c.VALID
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
# When he doesn't...
else:
valid = c.NOT_VALID
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL,
reason=a.USE_DOOR, info=info_dict)
return valid, reward
def reset_hook(self) -> None:
super().reset_hook()
# There is nothing to reset.
def check_additional_done(self) -> (bool, dict):
super_done, super_dict = super().check_additional_done()
return super_done, super_dict
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
additional_observations = super().observations_hook()
additional_observations.update({c.DOORS: self[c.DOORS].as_array()})
return additional_observations
def post_step_hook(self) -> List[Dict[str, int]]:
super_post_step = super(DoorFactory, self).post_step_hook()
return super_post_step
if __name__ == '__main__':
from environments.utility_classes import AgentRenderOptions as aro
render = True
door_props = DoorProperties(
indicate_door_area=True
)
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True
)
move_props = {'allow_square_movement': True,
'allow_diagonal_movement': False,
'allow_no_op': False}
import time
global_timings = []
for i in range(10):
factory = DoorFactory(n_agents=10, done_at_collision=False,
level_name='rooms', max_steps=1000,
obs_prop=obs_props, parse_doors=True,
verbose=True,
mv_prop=move_props, dirt_prop=door_props,
# inject_agents=[TSPDirtAgent],
)
# noinspection DuplicatedCode
n_actions = factory.action_space.n - 1
_ = factory.observation_space
obs_space = factory.observation_space
obs_space_named = factory.named_observation_space
action_space_named = factory.named_action_space
times = []
for epoch in range(10):
start_time = time.time()
random_actions = [[random.randint(0, n_actions) for _
in range(factory.n_agents)] for _
in range(factory.max_steps+1)]
env_state = factory.reset()
if render:
factory.render()
# tsp_agent = factory.get_injected_agents()[0]
rwrd = 0
for agent_i_action in random_actions:
# agent_i_action = tsp_agent.predict()
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
rwrd += step_rwrd
if render:
factory.render()
if done_bool:
break
times.append(time.time() - start_time)
# print(f'Factory run {epoch} done, reward is:\n {r}')
print('Mean Time Taken: ', sum(times) / 10)
global_timings.extend(times)
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
print('Median Time Taken: ', global_timings[len(global_timings)//2])

View File

@ -7,9 +7,17 @@ from environments.factory.additional.btry.factory_battery import BatteryFactory
from environments.factory.additional.dest.factory_dest import DestFactory
from environments.factory.additional.dirt.dirt_util import DirtProperties
from environments.factory.additional.dirt.factory_dirt import DirtFactory
from environments.factory.additional.doors.factory_doors import DoorFactory
from environments.factory.additional.item.factory_item import ItemFactory
# noinspection PyAbstractClass
class DoorDirtFactory(DoorFactory, DirtFactory):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# noinspection PyAbstractClass
class DirtItemFactory(ItemFactory, DirtFactory):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@ -38,8 +46,6 @@ if __name__ == '__main__':
render = True
dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0)
obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True,
pomdp_r=2, additional_agent_placeholder=None)
@ -47,13 +53,13 @@ if __name__ == '__main__':
'allow_diagonal_movement': False,
'allow_no_op': False}
factory = DirtBatteryFactory(n_agents=5, done_at_collision=False,
factory = DoorDirtFactory(n_agents=10, done_at_collision=False,
level_name='rooms', max_steps=400,
obs_prop=obs_props, parse_doors=True,
record_episodes=True, verbose=True,
btry_prop=BatteryProperties(),
mv_prop=move_props, dirt_prop=dirt_props
)
dirt_prop=DirtProperties(),
mv_prop=move_props)
# noinspection DuplicatedCode
n_actions = factory.action_space.n - 1

View File

@ -44,7 +44,7 @@ class DirtFactory(BaseFactory):
def entities_hook(self) -> Dict[(str, Entities)]:
super_entities = super().entities_hook
dirt_register = DirtPiles(self.dirt_prop, self._level_shape)
super_entities.update(({c.DIRT: dirt_register}))
super_entities.update({c.DIRT: dirt_register})
return super_entities
def __init__(self, *args,

View File

@ -0,0 +1,38 @@
from typing import Union
from environments.factory.additional.doors.doors_entities import Door
from environments.factory.base.registers import EntityCollection
from environments.factory.additional.doors.doors_util import Constants as c
class Doors(EntityCollection):
def __init__(self, *args, indicate_area=False, **kwargs):
self.indicate_area = indicate_area
self._area_marked = False
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
_accepted_objects = Door
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
try:
return next(door for door in self if position in door.tile.neighboring_floor_pos)
except StopIteration:
return None
def tick_doors(self):
for door in self:
door.tick()
def as_array(self):
if not self._area_marked and self.indicate_area:
for door in self:
for tile in door.tile.neighboring_floor:
if self._individual_slices:
pass
else:
pos = (0, *tile.pos)
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
self._area_marked = True
return super(Doors, self).as_array()

View File

@ -0,0 +1,69 @@
from environments.factory.base.objects import Entity
from environments.factory.additional.doors.doors_util import Constants as c
class Door(Entity):
@property
def is_blocking(self):
return False if self.is_open else True
@property
def can_collide(self):
return False if self.is_open else True
@property
def encoding(self):
# This is important as it shadow is checked by occupation value
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
@property
def str_state(self):
return 'open' if self.is_open else 'closed'
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs):
super(Door, self).__init__(*args, **kwargs)
self._state = c.CLOSED_DOOR
self.indicate_area = indicate_area
self.auto_close_interval = auto_close_interval
self.time_to_close = -1
if not closed_on_init:
self._open()
def summarize_state(self):
state_dict = super().summarize_state()
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
return state_dict
@property
def is_closed(self):
return self._state == c.CLOSED_DOOR
@property
def is_open(self):
return self._state == c.OPEN_DOOR
@property
def status(self):
return self._state
def use(self):
if self._state == c.OPEN_DOOR:
self._close()
else:
self._open()
def tick(self):
if self.is_open and len(self.tile) == 1 and self.time_to_close:
self.time_to_close -= 1
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
self.use()
def _open(self):
self._state = c.OPEN_DOOR
self._collection.notify_change_to_value(self)
self.time_to_close = self.auto_close_interval
def _close(self):
self._state = c.CLOSED_DOOR
self._collection.notify_change_to_value(self)

View File

@ -0,0 +1,31 @@
from typing import NamedTuple
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
class Constants(BaseConstants):
DOOR = 'Door' # Identifier of Single-Door Entities.
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files.
ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation
OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation
CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
# ACCESS_DOOR = 'access' # Identifier to compare access positions
class Actions(BaseActions):
USE_DOOR = 'use_door'
class RewardsDoor(NamedTuple):
USE_DOOR_VALID: float = -0.00
USE_DOOR_FAIL: float = -0.01
class DoorProperties(NamedTuple):
indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation.

View File

@ -0,0 +1,196 @@
import time
from typing import List, Union, Dict
import random
import numpy as np
from environments.factory.additional.doors.doors_collections import Doors
from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action
from environments.factory.base.registers import Entities
from environments import helpers as h
from environments.factory.base.renderer import RenderEntity
from environments.utility_classes import ObservationProperties
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum()
def entropy(x):
return -(x * np.log(x + 1e-8)).sum()
c = Constants
a = Actions
# noinspection PyAttributeOutsideInit, PyAbstractClass
class DoorFactory(BaseFactory):
@property
def actions_hook(self) -> Union[Action, List[Action]]:
super_actions = super().actions_hook
super_actions.append(Action(str_ident=a.USE_DOOR))
return super_actions
@property
def entities_hook(self) -> Dict[(str, Entities)]:
super_entities = super().entities_hook
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL)
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
if np.any(parsed_doors):
door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area,
entity_kwargs=dict()
)
super_entities.update(({c.DOORS: doors}))
return super_entities
def __init__(self, *args,
door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(),
env_seed=time.time_ns(), **kwargs):
if isinstance(door_properties, dict):
door_properties = DoorProperties(**door_properties)
if isinstance(rewards_door, dict):
rewards_door = RewardsDoor(**rewards_door)
self.door_properties = door_properties
self.rewards_door = rewards_door
self._door_rng = np.random.default_rng(env_seed)
self._doors: Doors
kwargs.update(env_seed=env_seed)
# TODO: Reset ---> document this
super().__init__(*args, **kwargs)
def render_assets_hook(self, mode='human'):
additional_assets = super().render_assets_hook()
doors = []
for i, door in enumerate(self[c.DOORS]):
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
additional_assets.extend(doors)
return additional_assets
def step_hook(self) -> (List[dict], dict):
super_reward_info = super().step_hook()
# Step the door close intervall
# TODO: Maybe move this to self.post_step_hook? May collide with reward calculation.
if doors := self[c.DOORS]:
doors.tick_doors()
return super_reward_info
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
action_result = super().do_additional_actions(agent, action)
if action_result is None:
if action == a.USE_DOOR:
return self.use_door_action(agent)
else:
return None
else:
return action_result
def use_door_action(self, agent: Agent):
# Check if agent really is standing on a door:
door = self[c.DOORS].get_near_position(agent.pos)
if door is not None:
door.use()
valid = c.VALID
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
# When he doesn't...
else:
valid = c.NOT_VALID
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL,
reason=a.USE_DOOR, info=info_dict)
return valid, reward
def reset_hook(self) -> None:
super().reset_hook()
# There is nothing to reset.
def check_additional_done(self) -> (bool, dict):
super_done, super_dict = super().check_additional_done()
return super_done, super_dict
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
additional_observations = super().observations_hook()
additional_observations.update({c.DOORS: self[c.DOORS].as_array()})
return additional_observations
def post_step_hook(self) -> List[Dict[str, int]]:
super_post_step = super(DoorFactory, self).post_step_hook()
return super_post_step
if __name__ == '__main__':
from environments.utility_classes import AgentRenderOptions as aro
render = True
door_props = DoorProperties(
indicate_door_area=True
)
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True
)
move_props = {'allow_square_movement': True,
'allow_diagonal_movement': False,
'allow_no_op': False}
import time
global_timings = []
for i in range(10):
factory = DoorFactory(n_agents=10, done_at_collision=False,
level_name='rooms', max_steps=1000,
obs_prop=obs_props, parse_doors=True,
verbose=True,
mv_prop=move_props, dirt_prop=door_props,
# inject_agents=[TSPDirtAgent],
)
# noinspection DuplicatedCode
n_actions = factory.action_space.n - 1
_ = factory.observation_space
obs_space = factory.observation_space
obs_space_named = factory.named_observation_space
action_space_named = factory.named_action_space
times = []
for epoch in range(10):
start_time = time.time()
random_actions = [[random.randint(0, n_actions) for _
in range(factory.n_agents)] for _
in range(factory.max_steps+1)]
env_state = factory.reset()
if render:
factory.render()
# tsp_agent = factory.get_injected_agents()[0]
rwrd = 0
for agent_i_action in random_actions:
# agent_i_action = tsp_agent.predict()
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
rwrd += step_rwrd
if render:
factory.render()
if done_bool:
break
times.append(time.time() - start_time)
# print(f'Factory run {epoch} done, reward is:\n {r}')
print('Mean Time Taken: ', sum(times) / 10)
global_timings.extend(times)
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
print('Median Time Taken: ', global_timings[len(global_timings)//2])

View File

@ -16,7 +16,7 @@ from environments.helpers import Constants as c
from environments.helpers import EnvActions as a
from environments.helpers import RewardsBase
from environments.factory.base.objects import Agent, Floor, Action
from environments.factory.base.registers import Actions, Entities, Agents, Doors, Floors, Walls, PlaceHolders, \
from environments.factory.base.registers import Actions, Entities, Agents, Floors, Walls, PlaceHolders, \
GlobalPositions
from environments.utility_classes import MovementProperties, ObservationProperties, MarlFrameStack
from environments.utility_classes import AgentRenderOptions as a_obs
@ -88,8 +88,8 @@ class BaseFactory(gym.Env):
mv_prop: MovementProperties = MovementProperties(),
obs_prop: ObservationProperties = ObservationProperties(),
rewards_base: RewardsBase = RewardsBase(),
parse_doors=False, done_at_collision=False, inject_agents: Union[None, List] = None,
verbose=False, doors_have_area=True, env_seed=time.time_ns(), individual_rewards=False,
done_at_collision=False, inject_agents: Union[None, List] = None,
verbose=False, env_seed=time.time_ns(), individual_rewards=False,
class_name='', **kwargs):
if class_name:
@ -105,8 +105,6 @@ class BaseFactory(gym.Env):
assert obs_prop.frames_to_stack != 1 and \
obs_prop.frames_to_stack >= 0, \
"'frames_to_stack' cannot be negative or 1."
assert doors_have_area or not obs_prop.indicate_door_area, \
'"indicate_door_area" can only active, when "doors_have_area"'
if kwargs:
print(f'Following kwargs were passed, but ignored: {kwargs}')
@ -133,9 +131,7 @@ class BaseFactory(gym.Env):
self.done_at_collision = done_at_collision
self._record_episodes = False
self.parse_doors = parse_doors
self._injected_agents = inject_agents or []
self.doors_have_area = doors_have_area
self.individual_rewards = individual_rewards
# TODO: Reset ---> document this
@ -174,20 +170,9 @@ class BaseFactory(gym.Env):
# NOPOS
self._NO_POS_TILE = Floor(c.NO_POS, None)
# Doors
if self.parse_doors:
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR)
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
if np.any(parsed_doors):
door_tiles = [floor.by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
doors = Doors.from_tiles(door_tiles, self._level_shape, have_area=self.obs_prop.indicate_door_area,
entity_kwargs=dict(context=floor)
)
self._entities.add_additional_items({c.DOORS: doors})
# Actions
# TODO: Move this to Agent init, so that agents can have individual action sets.
self._actions = Actions(self.mv_prop, can_use_doors=self.parse_doors)
self._actions = Actions(self.mv_prop)
if additional_actions := self.actions_hook:
self._actions.add_additional_items(additional_actions)
@ -263,8 +248,6 @@ class BaseFactory(gym.Env):
elif a.NOOP == action_obj:
action_valid = c.VALID
reward = dict(value=self.rewards_base.NOOP, reason=a.NOOP, info={f'{agent.name}_NOOP': 1, 'NOOP': 1})
elif a.USE_DOOR == action_obj:
action_valid, reward = self._handle_door_interaction(agent)
else:
# noinspection PyTupleAssignmentBalance
action_valid, reward = self.do_additional_actions(agent, action_obj)
@ -282,12 +265,9 @@ class BaseFactory(gym.Env):
for tile in tiles_with_collisions:
guests = tile.guests_that_can_collide
for i, guest in enumerate(guests):
# This does make a copy, but is faster than.copy()
this_collisions = guests[:]
del this_collisions[i]
assert hasattr(guest, 'step_result')
for collision in this_collisions:
guest.step_result['collisions'].append(collision)
for j, collision in enumerate(guests):
if j != i and hasattr(guest, 'step_result'):
guest.step_result['collisions'].append(collision)
done = False
if self.done_at_collision:
@ -299,11 +279,6 @@ class BaseFactory(gym.Env):
done = done or additional_done
info.update(additional_done_info)
# Step the door close intervall
if self.parse_doors:
if doors := self[c.DOORS]:
doors.tick_doors()
# Finalize
reward, reward_info = self.build_reward_result(rewards)
@ -319,41 +294,14 @@ class BaseFactory(gym.Env):
info.update(post_step_info)
obs, _ = self._build_observations()
return obs, reward, done, info
def _handle_door_interaction(self, agent) -> (bool, dict):
if doors := self[c.DOORS]:
# Check if agent really is standing on a door:
if self.doors_have_area:
door = doors.get_near_position(agent.pos)
else:
door = doors.by_pos(agent.pos)
if door is not None:
door.use()
valid = c.VALID
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
# When he doesn't...
else:
valid = c.NOT_VALID
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
else:
raise RuntimeError('This should not happen, since the door action should not be available.')
reward = dict(value=self.rewards_base.USE_DOOR_VALID if valid else self.rewards_base.USE_DOOR_FAIL,
reason=a.USE_DOOR, info=info_dict)
return valid, reward
def _build_observations(self) -> np.typing.ArrayLike:
# Observation dict:
per_agent_expl_idx = dict()
per_agent_obsn = dict()
# Generel Observations
lvl_obs = self[c.WALLS].as_array()
door_obs = self[c.DOORS].as_array() if self.parse_doors else None
if self.obs_prop.render_agents == a_obs.NOT:
global_agent_obs = None
elif self.obs_prop.omit_agent_self and self.n_agents == 1:
@ -391,8 +339,6 @@ class BaseFactory(gym.Env):
obs_dict[c.AGENT] = agent_obs[:]
if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None:
obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs
if self.parse_doors and door_obs is not None:
obs_dict[c.DOORS] = door_obs[:]
obs_dict.update(add_obs_dict)
obsn = np.vstack(list(obs_dict.values()))
if self.obs_prop.pomdp_r:
@ -430,33 +376,11 @@ class BaseFactory(gym.Env):
raise e
obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL
door_shadowing = False
if self.parse_doors:
if doors := self[c.DOORS]:
if door := doors.by_pos(agent.pos):
if door.is_closed:
for group in door.connectivity_subgroups:
if agent.last_pos not in group:
door_shadowing = True
if self._pomdp_r:
blocking = [
tuple(np.subtract(x, agent.pos) + (self._pomdp_r, self._pomdp_r))
for x in group]
xs, ys = zip(*blocking)
else:
xs, ys = zip(*group)
# noinspection PyUnresolvedReferences
obs_block_light[:, xs, ys] = False
light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int).squeeze())
if self._pomdp_r:
light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape))
else:
light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
if door_shadowing:
# noinspection PyUnboundLocalVariable
light_block_map[xs, ys] = 0
agent.step_result['lightmap'] = light_block_map
@ -550,35 +474,13 @@ class BaseFactory(gym.Env):
y_new = agent.y + y_diff
new_tile = self[c.FLOOR].by_pos((x_new, y_new))
if new_tile:
if new_tile and not np.any([x.is_blocking for x in new_tile.guests]):
valid = c.VALID
else:
tile = agent.tile
valid = c.VALID
return tile, valid
if self.parse_doors and agent.last_pos != c.NO_POS:
if doors := self[c.DOORS]:
if self.doors_have_area:
if door := doors.by_pos(new_tile.pos):
if door.is_closed:
return agent.tile, c.NOT_VALID
else: # door.is_closed:
pass
if door := doors.by_pos(agent.pos):
if door.is_open:
pass
else: # door.is_closed:
if door.is_linked(agent.last_pos, new_tile.pos):
pass
else:
return agent.tile, c.NOT_VALID
else:
pass
else:
pass
return new_tile, valid
def build_reward_result(self, global_env_rewards: list) -> (int, dict):
@ -649,14 +551,10 @@ class BaseFactory(gym.Env):
for i, agent in enumerate(self[c.AGENT]):
name, state = h.asset_str(agent)
agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.step_result['lightmap']))
doors = []
if self.parse_doors:
for i, door in enumerate(self[c.DOORS]):
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
additional_assets = self.render_assets_hook()
return self._renderer.render(walls + doors + additional_assets + agents)
return self._renderer.render(walls + additional_assets + agents)
def save_params(self, filepath: Path):
# noinspection PyProtectedMember

View File

@ -1,12 +1,10 @@
from collections import defaultdict
from typing import Union
from typing import Union, List
import networkx as nx
import numpy as np
from environments import helpers as h
from environments.helpers import Constants as c
import itertools
##########################################################################
# ##################### Base Object Building Blocks ######################### #
@ -88,6 +86,10 @@ class EnvObject(Object):
class Entity(EnvObject):
"""Full Env Entity that lives on the env Grid. Doors, Items, DirtPile etc..."""
@property
def is_blocking(self):
return False
@property
def can_collide(self):
return False
@ -226,6 +228,21 @@ class GlobalPosition(BoundingMixin, EnvObject):
class Floor(EnvObject):
@property
def neighboring_floor_pos(self):
return [x.pos for x in self.neighboring_floor]
@property
def neighboring_floor(self):
if self._neighboring_floor:
pass
else:
self._neighboring_floor = [x for x in [self._collection.by_pos(np.add(self.pos, pos))
for pos in h.POS_MASK.reshape(-1, 2)
if not np.all(pos == [0, 0])]
if x]
return self._neighboring_floor
@property
def encoding(self):
return c.FREE_CELL
@ -254,6 +271,7 @@ class Floor(EnvObject):
super(Floor, self).__init__(*args, **kwargs)
self._guests = dict()
self._pos = tuple(pos)
self._neighboring_floor: List[Floor] = list()
def __len__(self):
return len(self._guests)
@ -298,94 +316,6 @@ class Wall(Floor):
pass
class Door(Entity):
@property
def can_collide(self):
if self.has_area:
return False if self.is_open else True
else:
return False
@property
def encoding(self):
# This is important as it shadow is checked by occupation value
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
@property
def str_state(self):
return 'open' if self.is_open else 'closed'
@property
def access_area(self):
return [node for node in self.connectivity.nodes
if node not in range(len(self.connectivity_subgroups)) and node != self.pos]
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False, **kwargs):
super(Door, self).__init__(*args, **kwargs)
self._state = c.CLOSED_DOOR
self.has_area = has_area
self.auto_close_interval = auto_close_interval
self.time_to_close = -1
neighbor_pos = list(itertools.product([-1, 1, 0], repeat=2))[:-1]
neighbor_tiles = [context.by_pos(tuple([sum(x) for x in zip(self.pos, diff)])) for diff in neighbor_pos]
neighbor_pos = [x.pos for x in neighbor_tiles if x]
self.connectivity = h.points_to_graph(neighbor_pos)
self.connectivity_subgroups = list(nx.algorithms.components.connected_components(self.connectivity))
for idx, group in enumerate(self.connectivity_subgroups):
for tile_pos in group:
self.connectivity.add_edge(tile_pos, idx)
if not closed_on_init:
self._open()
def summarize_state(self):
state_dict = super().summarize_state()
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
return state_dict
@property
def is_closed(self):
return self._state == c.CLOSED_DOOR
@property
def is_open(self):
return self._state == c.OPEN_DOOR
@property
def status(self):
return self._state
def use(self):
if self._state == c.OPEN_DOOR:
self._close()
else:
self._open()
def tick(self):
if self.is_open and len(self.tile) == 1 and self.time_to_close:
self.time_to_close -= 1
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
self.use()
def _open(self):
self.connectivity.add_edges_from([(self.pos, x) for x in range(len(self.connectivity_subgroups))])
self._state = c.OPEN_DOOR
self._collection.notify_change_to_value(self)
self.time_to_close = self.auto_close_interval
def _close(self):
self.connectivity.remove_node(self.pos)
self._state = c.CLOSED_DOOR
self._collection.notify_change_to_value(self)
def is_linked(self, old_pos, new_pos):
try:
_ = nx.shortest_path(self.connectivity, old_pos, new_pos)
return True
except nx.exception.NetworkXNoPath:
return False
class Agent(MoveableEntity):
@property

View File

@ -6,7 +6,7 @@ from typing import List, Union, Dict, Tuple
import numpy as np
import six
from environments.factory.base.objects import Entity, Floor, Agent, Door, Action, Wall, PlaceHolder, GlobalPosition, \
from environments.factory.base.objects import Entity, Floor, Agent, Action, Wall, PlaceHolder, GlobalPosition, \
Object, EnvObject
from environments.utility_classes import MovementProperties
from environments import helpers as h
@ -452,38 +452,6 @@ class Agents(MovingEntityObjectCollection):
self._collection[agent.name] = agent
class Doors(EntityCollection):
def __init__(self, *args, have_area: bool = False, **kwargs):
self.have_area = have_area
self._area_marked = False
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
_accepted_objects = Door
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
try:
return next(door for door in self if position in door.access_area)
except StopIteration:
return None
def tick_doors(self):
for door in self:
door.tick()
def as_array(self):
if self.have_area and not self._area_marked:
for door in self:
for pos in door.access_area:
if self._individual_slices:
pass
else:
pos = (0, *pos)
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
self._area_marked = True
return super(Doors, self).as_array()
class Actions(ObjectCollection):
_accepted_objects = Action
@ -492,11 +460,10 @@ class Actions(ObjectCollection):
return self._movement_actions
# noinspection PyTypeChecker
def __init__(self, movement_properties: MovementProperties, can_use_doors=False):
def __init__(self, movement_properties: MovementProperties):
self.allow_no_op = movement_properties.allow_no_op
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
self.allow_square_movement = movement_properties.allow_square_movement
self.can_use_doors = can_use_doors
super(Actions, self).__init__()
# Move this to Baseclass, Env init?
@ -507,8 +474,6 @@ class Actions(ObjectCollection):
self.add_additional_items([self._accepted_objects(str_ident=direction)
for direction in h.EnvActions.diagonal_move()])
self._movement_actions = self._collection.copy()
if self.can_use_doors:
self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.USE_DOOR)])
if self.allow_no_op:
self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.NOOP)])

View File

@ -33,6 +33,10 @@ IGNORED_DF_COLUMNS = ['Episode', 'Run', # For plotting, which values ar
'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count', 'terminal_observation',
'episode']
POS_MASK = np.asarray([[[-1, -1], [0, -1], [1, -1]],
[[-1, 0], [0, 0], [1, 0]],
[[-1, 1], [0, 1], [1, 1]]])
class Constants:
@ -42,12 +46,10 @@ class Constants:
"""
WALL = '#' # Wall tile identifier for resolving the string based map files.
DOOR = 'D' # Door identifier for resolving the string based map files.
DANGER_ZONE = 'x' # Dange Zone tile identifier for resolving the string based map files.
WALLS = 'Walls' # Identifier of Wall-objects and sets (collections).
FLOOR = 'Floor' # Identifier of Floor-objects and sets (collections).
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
LEVEL = 'Level' # Identifier of Level-objects and sets (collections).
AGENT = 'Agent' # Identifier of Agent-objects and sets (collections).
AGENT_PLACEHOLDER = 'AGENT_PLACEHOLDER' # Identifier of Placeholder-objects and sets (collections).
@ -56,16 +58,9 @@ class Constants:
FREE_CELL = 0 # Free-Cell value used in observation
OCCUPIED_CELL = 1 # Occupied-Cell value used in observation
SHADOWED_CELL = -1 # Shadowed-Cell value used in observation
ACCESS_DOOR_CELL = 1/3 # Access-door-Cell value used in observation
OPEN_DOOR_CELL = 2/3 # Open-door-Cell value used in observation
CLOSED_DOOR_CELL = 3/3 # Closed-door-Cell value used in observation
NO_POS = (-9999, -9999) # Invalid Position value used in the environment (something is off-grid)
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
# ACCESS_DOOR = 'access' # Identifier to compare access positions
ACTION = 'action' # Identifier of Action-objects and sets (collections).
COLLISION = 'collision' # Identifier to use in the context of collitions.
VALID = True # Identifier to rename boolean values in the context of actions.
@ -90,7 +85,6 @@ class EnvActions:
# Other
# MOVE = 'move'
NOOP = 'no_op'
USE_DOOR = 'use_door'
_ACTIONMAP = defaultdict(lambda: (0, 0),
{NORTH: (-1, 0), NORTHEAST: (-1, 1),
@ -100,6 +94,8 @@ class EnvActions:
}
)
@classmethod
def is_move(cls, action):
"""
@ -166,8 +162,6 @@ class RewardsBase(NamedTuple):
MOVEMENTS_VALID: float = -0.001
MOVEMENTS_FAIL: float = -0.05
NOOP: float = -0.01
USE_DOOR_VALID: float = -0.00
USE_DOOR_FAIL: float = -0.01
COLLISION: float = -0.5

View File

@ -68,7 +68,7 @@ if __name__ == '__main__':
omit_agent_self=True, # This is default
additional_agent_placeholder=None, # We will not take care of future agents
frames_to_stack=3, # To give the agent a notion of time
pomdp_r=2 # the agents view-radius
pomdp_r=2 # the agents' view-radius
)
# 'MovementProperties' are for specifying how the agent is allowed to move in the env.
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
@ -135,7 +135,7 @@ if __name__ == '__main__':
env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10))
# Model Init
model = model_class("MlpPolicy", env_factory,verbose=1, seed=seed, device='cpu')
model = model_class("MlpPolicy", env_factory, verbose=1, seed=seed, device='cpu')
# Model train
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
@ -166,7 +166,7 @@ if __name__ == '__main__':
# retrieve model class
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
# Load the agent agent
# Load the agent
model = model_cls.load(policy_path / 'model.zip', device='cpu')
# Load old env kwargs
with next(policy_path.glob(env_params_json)).open('r') as f: