mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-07-11 23:42:40 +02:00
Doors are now seperate
This commit is contained in:
38
environments/factory/additional/_template/_collections.py
Normal file
38
environments/factory/additional/_template/_collections.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from environments.factory.additional.doors.doors_entities import Door
|
||||||
|
from environments.factory.base.registers import EntityCollection
|
||||||
|
|
||||||
|
from environments.factory.additional.doors.doors_util import Constants as c
|
||||||
|
|
||||||
|
|
||||||
|
class Doors(EntityCollection):
|
||||||
|
|
||||||
|
def __init__(self, *args, indicate_area=False, **kwargs):
|
||||||
|
self.indicate_area = indicate_area
|
||||||
|
self._area_marked = False
|
||||||
|
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
|
||||||
|
|
||||||
|
_accepted_objects = Door
|
||||||
|
|
||||||
|
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
|
||||||
|
try:
|
||||||
|
return next(door for door in self if position in door.tile.neighboring_floor_pos)
|
||||||
|
except StopIteration:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def tick_doors(self):
|
||||||
|
for door in self:
|
||||||
|
door.tick()
|
||||||
|
|
||||||
|
def as_array(self):
|
||||||
|
if not self._area_marked and self.indicate_area:
|
||||||
|
for door in self:
|
||||||
|
for tile in door.tile.neighboring_floor:
|
||||||
|
if self._individual_slices:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
pos = (0, *tile.pos)
|
||||||
|
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
|
||||||
|
self._area_marked = True
|
||||||
|
return super(Doors, self).as_array()
|
71
environments/factory/additional/_template/_entities.py
Normal file
71
environments/factory/additional/_template/_entities.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
from environments.factory.base.objects import Entity
|
||||||
|
from environments.factory.additional.doors.doors_util import Constants as c
|
||||||
|
|
||||||
|
|
||||||
|
class Template(Entity):
|
||||||
|
"""Template for new Entity"""
|
||||||
|
|
||||||
|
# How to define / override properties
|
||||||
|
@property
|
||||||
|
def is_blocking(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def can_collide(self):
|
||||||
|
return False if self.template_attr else True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def encoding(self):
|
||||||
|
# This is important as it shadow is checked by occupation value
|
||||||
|
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
|
||||||
|
|
||||||
|
@property
|
||||||
|
def str_state(self):
|
||||||
|
return 'open' if self.is_open else 'closed'
|
||||||
|
|
||||||
|
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs):
|
||||||
|
super(Template, self).__init__(*args, **kwargs)
|
||||||
|
self._state = c.CLOSED_DOOR
|
||||||
|
self.indicate_area = indicate_area
|
||||||
|
self.auto_close_interval = auto_close_interval
|
||||||
|
self.time_to_close = -1
|
||||||
|
if not closed_on_init:
|
||||||
|
self._open()
|
||||||
|
|
||||||
|
def summarize_state(self):
|
||||||
|
state_dict = super().summarize_state()
|
||||||
|
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
|
||||||
|
return state_dict
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_closed(self):
|
||||||
|
return self._state == c.CLOSED_DOOR
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_open(self):
|
||||||
|
return self._state == c.OPEN_DOOR
|
||||||
|
|
||||||
|
@property
|
||||||
|
def status(self):
|
||||||
|
return self._state
|
||||||
|
|
||||||
|
def use(self):
|
||||||
|
if self._state == c.OPEN_DOOR:
|
||||||
|
self._close()
|
||||||
|
else:
|
||||||
|
self._open()
|
||||||
|
|
||||||
|
def tick(self):
|
||||||
|
if self.is_open and len(self.tile) == 1 and self.time_to_close:
|
||||||
|
self.time_to_close -= 1
|
||||||
|
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
|
||||||
|
self.use()
|
||||||
|
|
||||||
|
def _open(self):
|
||||||
|
self._state = c.OPEN_DOOR
|
||||||
|
self._collection.notify_change_to_value(self)
|
||||||
|
self.time_to_close = self.auto_close_interval
|
||||||
|
|
||||||
|
def _close(self):
|
||||||
|
self._state = c.CLOSED_DOOR
|
||||||
|
self._collection.notify_change_to_value(self)
|
31
environments/factory/additional/_template/_util.py
Normal file
31
environments/factory/additional/_template/_util.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
|
||||||
|
from typing import NamedTuple
|
||||||
|
|
||||||
|
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
|
||||||
|
|
||||||
|
|
||||||
|
class Constants(BaseConstants):
|
||||||
|
DOOR = 'Door' # Identifier of Single-Door Entities.
|
||||||
|
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
|
||||||
|
DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files.
|
||||||
|
|
||||||
|
ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation
|
||||||
|
OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation
|
||||||
|
CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation
|
||||||
|
|
||||||
|
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
|
||||||
|
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
|
||||||
|
# ACCESS_DOOR = 'access' # Identifier to compare access positions
|
||||||
|
|
||||||
|
|
||||||
|
class Actions(BaseActions):
|
||||||
|
USE_DOOR = 'use_door'
|
||||||
|
|
||||||
|
|
||||||
|
class RewardsDoor(NamedTuple):
|
||||||
|
USE_DOOR_VALID: float = -0.00
|
||||||
|
USE_DOOR_FAIL: float = -0.01
|
||||||
|
|
||||||
|
|
||||||
|
class DoorProperties(NamedTuple):
|
||||||
|
indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation.
|
196
environments/factory/additional/_template/factory_template.py
Normal file
196
environments/factory/additional/_template/factory_template.py
Normal file
@ -0,0 +1,196 @@
|
|||||||
|
import time
|
||||||
|
from typing import List, Union, Dict
|
||||||
|
import random
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from environments.factory.additional.doors.doors_collections import Doors
|
||||||
|
from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions
|
||||||
|
from environments.factory.base.base_factory import BaseFactory
|
||||||
|
from environments.factory.base.objects import Agent, Action
|
||||||
|
from environments.factory.base.registers import Entities
|
||||||
|
|
||||||
|
from environments import helpers as h
|
||||||
|
|
||||||
|
from environments.factory.base.renderer import RenderEntity
|
||||||
|
from environments.utility_classes import ObservationProperties
|
||||||
|
|
||||||
|
|
||||||
|
def softmax(x):
|
||||||
|
"""Compute softmax values for each sets of scores in x."""
|
||||||
|
e_x = np.exp(x - np.max(x))
|
||||||
|
return e_x / e_x.sum()
|
||||||
|
|
||||||
|
|
||||||
|
def entropy(x):
|
||||||
|
return -(x * np.log(x + 1e-8)).sum()
|
||||||
|
|
||||||
|
|
||||||
|
c = Constants
|
||||||
|
a = Actions
|
||||||
|
|
||||||
|
|
||||||
|
# noinspection PyAttributeOutsideInit, PyAbstractClass
|
||||||
|
class DoorFactory(BaseFactory):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def actions_hook(self) -> Union[Action, List[Action]]:
|
||||||
|
super_actions = super().actions_hook
|
||||||
|
super_actions.append(Action(str_ident=a.USE_DOOR))
|
||||||
|
return super_actions
|
||||||
|
|
||||||
|
@property
|
||||||
|
def entities_hook(self) -> Dict[(str, Entities)]:
|
||||||
|
super_entities = super().entities_hook
|
||||||
|
|
||||||
|
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL)
|
||||||
|
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
|
||||||
|
if np.any(parsed_doors):
|
||||||
|
door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
|
||||||
|
doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area,
|
||||||
|
entity_kwargs=dict()
|
||||||
|
)
|
||||||
|
super_entities.update(({c.DOORS: doors}))
|
||||||
|
return super_entities
|
||||||
|
|
||||||
|
def __init__(self, *args,
|
||||||
|
door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(),
|
||||||
|
env_seed=time.time_ns(), **kwargs):
|
||||||
|
if isinstance(door_properties, dict):
|
||||||
|
door_properties = DoorProperties(**door_properties)
|
||||||
|
if isinstance(rewards_door, dict):
|
||||||
|
rewards_door = RewardsDoor(**rewards_door)
|
||||||
|
self.door_properties = door_properties
|
||||||
|
self.rewards_door = rewards_door
|
||||||
|
self._door_rng = np.random.default_rng(env_seed)
|
||||||
|
self._doors: Doors
|
||||||
|
kwargs.update(env_seed=env_seed)
|
||||||
|
# TODO: Reset ---> document this
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def render_assets_hook(self, mode='human'):
|
||||||
|
additional_assets = super().render_assets_hook()
|
||||||
|
doors = []
|
||||||
|
for i, door in enumerate(self[c.DOORS]):
|
||||||
|
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
|
||||||
|
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
|
||||||
|
additional_assets.extend(doors)
|
||||||
|
return additional_assets
|
||||||
|
|
||||||
|
|
||||||
|
def step_hook(self) -> (List[dict], dict):
|
||||||
|
super_reward_info = super().step_hook()
|
||||||
|
# Step the door close intervall
|
||||||
|
# TODO: Maybe move this to self.post_step_hook? May collide with reward calculation.
|
||||||
|
if doors := self[c.DOORS]:
|
||||||
|
doors.tick_doors()
|
||||||
|
return super_reward_info
|
||||||
|
|
||||||
|
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
|
||||||
|
action_result = super().do_additional_actions(agent, action)
|
||||||
|
if action_result is None:
|
||||||
|
if action == a.USE_DOOR:
|
||||||
|
return self.use_door_action(agent)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return action_result
|
||||||
|
|
||||||
|
def use_door_action(self, agent: Agent):
|
||||||
|
|
||||||
|
# Check if agent really is standing on a door:
|
||||||
|
door = self[c.DOORS].get_near_position(agent.pos)
|
||||||
|
if door is not None:
|
||||||
|
door.use()
|
||||||
|
valid = c.VALID
|
||||||
|
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
|
||||||
|
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
|
||||||
|
# When he doesn't...
|
||||||
|
else:
|
||||||
|
valid = c.NOT_VALID
|
||||||
|
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
|
||||||
|
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
|
||||||
|
|
||||||
|
reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL,
|
||||||
|
reason=a.USE_DOOR, info=info_dict)
|
||||||
|
|
||||||
|
return valid, reward
|
||||||
|
|
||||||
|
def reset_hook(self) -> None:
|
||||||
|
super().reset_hook()
|
||||||
|
# There is nothing to reset.
|
||||||
|
|
||||||
|
def check_additional_done(self) -> (bool, dict):
|
||||||
|
super_done, super_dict = super().check_additional_done()
|
||||||
|
return super_done, super_dict
|
||||||
|
|
||||||
|
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
|
||||||
|
additional_observations = super().observations_hook()
|
||||||
|
|
||||||
|
additional_observations.update({c.DOORS: self[c.DOORS].as_array()})
|
||||||
|
return additional_observations
|
||||||
|
|
||||||
|
def post_step_hook(self) -> List[Dict[str, int]]:
|
||||||
|
super_post_step = super(DoorFactory, self).post_step_hook()
|
||||||
|
return super_post_step
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from environments.utility_classes import AgentRenderOptions as aro
|
||||||
|
render = True
|
||||||
|
|
||||||
|
door_props = DoorProperties(
|
||||||
|
indicate_door_area=True
|
||||||
|
)
|
||||||
|
|
||||||
|
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
|
||||||
|
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True
|
||||||
|
)
|
||||||
|
|
||||||
|
move_props = {'allow_square_movement': True,
|
||||||
|
'allow_diagonal_movement': False,
|
||||||
|
'allow_no_op': False}
|
||||||
|
import time
|
||||||
|
global_timings = []
|
||||||
|
for i in range(10):
|
||||||
|
|
||||||
|
factory = DoorFactory(n_agents=10, done_at_collision=False,
|
||||||
|
level_name='rooms', max_steps=1000,
|
||||||
|
obs_prop=obs_props, parse_doors=True,
|
||||||
|
verbose=True,
|
||||||
|
mv_prop=move_props, dirt_prop=door_props,
|
||||||
|
# inject_agents=[TSPDirtAgent],
|
||||||
|
)
|
||||||
|
|
||||||
|
# noinspection DuplicatedCode
|
||||||
|
n_actions = factory.action_space.n - 1
|
||||||
|
_ = factory.observation_space
|
||||||
|
obs_space = factory.observation_space
|
||||||
|
obs_space_named = factory.named_observation_space
|
||||||
|
action_space_named = factory.named_action_space
|
||||||
|
times = []
|
||||||
|
for epoch in range(10):
|
||||||
|
start_time = time.time()
|
||||||
|
random_actions = [[random.randint(0, n_actions) for _
|
||||||
|
in range(factory.n_agents)] for _
|
||||||
|
in range(factory.max_steps+1)]
|
||||||
|
env_state = factory.reset()
|
||||||
|
if render:
|
||||||
|
factory.render()
|
||||||
|
# tsp_agent = factory.get_injected_agents()[0]
|
||||||
|
|
||||||
|
rwrd = 0
|
||||||
|
for agent_i_action in random_actions:
|
||||||
|
# agent_i_action = tsp_agent.predict()
|
||||||
|
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
|
||||||
|
rwrd += step_rwrd
|
||||||
|
if render:
|
||||||
|
factory.render()
|
||||||
|
if done_bool:
|
||||||
|
break
|
||||||
|
times.append(time.time() - start_time)
|
||||||
|
# print(f'Factory run {epoch} done, reward is:\n {r}')
|
||||||
|
print('Mean Time Taken: ', sum(times) / 10)
|
||||||
|
global_timings.extend(times)
|
||||||
|
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
|
||||||
|
print('Median Time Taken: ', global_timings[len(global_timings)//2])
|
@ -7,9 +7,17 @@ from environments.factory.additional.btry.factory_battery import BatteryFactory
|
|||||||
from environments.factory.additional.dest.factory_dest import DestFactory
|
from environments.factory.additional.dest.factory_dest import DestFactory
|
||||||
from environments.factory.additional.dirt.dirt_util import DirtProperties
|
from environments.factory.additional.dirt.dirt_util import DirtProperties
|
||||||
from environments.factory.additional.dirt.factory_dirt import DirtFactory
|
from environments.factory.additional.dirt.factory_dirt import DirtFactory
|
||||||
|
from environments.factory.additional.doors.factory_doors import DoorFactory
|
||||||
from environments.factory.additional.item.factory_item import ItemFactory
|
from environments.factory.additional.item.factory_item import ItemFactory
|
||||||
|
|
||||||
|
|
||||||
|
# noinspection PyAbstractClass
|
||||||
|
class DoorDirtFactory(DoorFactory, DirtFactory):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# noinspection PyAbstractClass
|
||||||
class DirtItemFactory(ItemFactory, DirtFactory):
|
class DirtItemFactory(ItemFactory, DirtFactory):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
@ -38,8 +46,6 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
render = True
|
render = True
|
||||||
|
|
||||||
dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0)
|
|
||||||
|
|
||||||
obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True,
|
obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True,
|
||||||
pomdp_r=2, additional_agent_placeholder=None)
|
pomdp_r=2, additional_agent_placeholder=None)
|
||||||
|
|
||||||
@ -47,13 +53,13 @@ if __name__ == '__main__':
|
|||||||
'allow_diagonal_movement': False,
|
'allow_diagonal_movement': False,
|
||||||
'allow_no_op': False}
|
'allow_no_op': False}
|
||||||
|
|
||||||
factory = DirtBatteryFactory(n_agents=5, done_at_collision=False,
|
factory = DoorDirtFactory(n_agents=10, done_at_collision=False,
|
||||||
level_name='rooms', max_steps=400,
|
level_name='rooms', max_steps=400,
|
||||||
obs_prop=obs_props, parse_doors=True,
|
obs_prop=obs_props, parse_doors=True,
|
||||||
record_episodes=True, verbose=True,
|
record_episodes=True, verbose=True,
|
||||||
btry_prop=BatteryProperties(),
|
dirt_prop=DirtProperties(),
|
||||||
mv_prop=move_props, dirt_prop=dirt_props
|
mv_prop=move_props)
|
||||||
)
|
|
||||||
|
|
||||||
# noinspection DuplicatedCode
|
# noinspection DuplicatedCode
|
||||||
n_actions = factory.action_space.n - 1
|
n_actions = factory.action_space.n - 1
|
||||||
|
@ -44,7 +44,7 @@ class DirtFactory(BaseFactory):
|
|||||||
def entities_hook(self) -> Dict[(str, Entities)]:
|
def entities_hook(self) -> Dict[(str, Entities)]:
|
||||||
super_entities = super().entities_hook
|
super_entities = super().entities_hook
|
||||||
dirt_register = DirtPiles(self.dirt_prop, self._level_shape)
|
dirt_register = DirtPiles(self.dirt_prop, self._level_shape)
|
||||||
super_entities.update(({c.DIRT: dirt_register}))
|
super_entities.update({c.DIRT: dirt_register})
|
||||||
return super_entities
|
return super_entities
|
||||||
|
|
||||||
def __init__(self, *args,
|
def __init__(self, *args,
|
||||||
|
0
environments/factory/additional/doors/__init__.py
Normal file
0
environments/factory/additional/doors/__init__.py
Normal file
38
environments/factory/additional/doors/doors_collections.py
Normal file
38
environments/factory/additional/doors/doors_collections.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from environments.factory.additional.doors.doors_entities import Door
|
||||||
|
from environments.factory.base.registers import EntityCollection
|
||||||
|
|
||||||
|
from environments.factory.additional.doors.doors_util import Constants as c
|
||||||
|
|
||||||
|
|
||||||
|
class Doors(EntityCollection):
|
||||||
|
|
||||||
|
def __init__(self, *args, indicate_area=False, **kwargs):
|
||||||
|
self.indicate_area = indicate_area
|
||||||
|
self._area_marked = False
|
||||||
|
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
|
||||||
|
|
||||||
|
_accepted_objects = Door
|
||||||
|
|
||||||
|
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
|
||||||
|
try:
|
||||||
|
return next(door for door in self if position in door.tile.neighboring_floor_pos)
|
||||||
|
except StopIteration:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def tick_doors(self):
|
||||||
|
for door in self:
|
||||||
|
door.tick()
|
||||||
|
|
||||||
|
def as_array(self):
|
||||||
|
if not self._area_marked and self.indicate_area:
|
||||||
|
for door in self:
|
||||||
|
for tile in door.tile.neighboring_floor:
|
||||||
|
if self._individual_slices:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
pos = (0, *tile.pos)
|
||||||
|
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
|
||||||
|
self._area_marked = True
|
||||||
|
return super(Doors, self).as_array()
|
69
environments/factory/additional/doors/doors_entities.py
Normal file
69
environments/factory/additional/doors/doors_entities.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
from environments.factory.base.objects import Entity
|
||||||
|
from environments.factory.additional.doors.doors_util import Constants as c
|
||||||
|
|
||||||
|
|
||||||
|
class Door(Entity):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_blocking(self):
|
||||||
|
return False if self.is_open else True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def can_collide(self):
|
||||||
|
return False if self.is_open else True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def encoding(self):
|
||||||
|
# This is important as it shadow is checked by occupation value
|
||||||
|
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
|
||||||
|
|
||||||
|
@property
|
||||||
|
def str_state(self):
|
||||||
|
return 'open' if self.is_open else 'closed'
|
||||||
|
|
||||||
|
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs):
|
||||||
|
super(Door, self).__init__(*args, **kwargs)
|
||||||
|
self._state = c.CLOSED_DOOR
|
||||||
|
self.indicate_area = indicate_area
|
||||||
|
self.auto_close_interval = auto_close_interval
|
||||||
|
self.time_to_close = -1
|
||||||
|
if not closed_on_init:
|
||||||
|
self._open()
|
||||||
|
|
||||||
|
def summarize_state(self):
|
||||||
|
state_dict = super().summarize_state()
|
||||||
|
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
|
||||||
|
return state_dict
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_closed(self):
|
||||||
|
return self._state == c.CLOSED_DOOR
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_open(self):
|
||||||
|
return self._state == c.OPEN_DOOR
|
||||||
|
|
||||||
|
@property
|
||||||
|
def status(self):
|
||||||
|
return self._state
|
||||||
|
|
||||||
|
def use(self):
|
||||||
|
if self._state == c.OPEN_DOOR:
|
||||||
|
self._close()
|
||||||
|
else:
|
||||||
|
self._open()
|
||||||
|
|
||||||
|
def tick(self):
|
||||||
|
if self.is_open and len(self.tile) == 1 and self.time_to_close:
|
||||||
|
self.time_to_close -= 1
|
||||||
|
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
|
||||||
|
self.use()
|
||||||
|
|
||||||
|
def _open(self):
|
||||||
|
self._state = c.OPEN_DOOR
|
||||||
|
self._collection.notify_change_to_value(self)
|
||||||
|
self.time_to_close = self.auto_close_interval
|
||||||
|
|
||||||
|
def _close(self):
|
||||||
|
self._state = c.CLOSED_DOOR
|
||||||
|
self._collection.notify_change_to_value(self)
|
31
environments/factory/additional/doors/doors_util.py
Normal file
31
environments/factory/additional/doors/doors_util.py
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
|
||||||
|
from typing import NamedTuple
|
||||||
|
|
||||||
|
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
|
||||||
|
|
||||||
|
|
||||||
|
class Constants(BaseConstants):
|
||||||
|
DOOR = 'Door' # Identifier of Single-Door Entities.
|
||||||
|
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
|
||||||
|
DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files.
|
||||||
|
|
||||||
|
ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation
|
||||||
|
OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation
|
||||||
|
CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation
|
||||||
|
|
||||||
|
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
|
||||||
|
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
|
||||||
|
# ACCESS_DOOR = 'access' # Identifier to compare access positions
|
||||||
|
|
||||||
|
|
||||||
|
class Actions(BaseActions):
|
||||||
|
USE_DOOR = 'use_door'
|
||||||
|
|
||||||
|
|
||||||
|
class RewardsDoor(NamedTuple):
|
||||||
|
USE_DOOR_VALID: float = -0.00
|
||||||
|
USE_DOOR_FAIL: float = -0.01
|
||||||
|
|
||||||
|
|
||||||
|
class DoorProperties(NamedTuple):
|
||||||
|
indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation.
|
196
environments/factory/additional/doors/factory_doors.py
Normal file
196
environments/factory/additional/doors/factory_doors.py
Normal file
@ -0,0 +1,196 @@
|
|||||||
|
import time
|
||||||
|
from typing import List, Union, Dict
|
||||||
|
import random
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from environments.factory.additional.doors.doors_collections import Doors
|
||||||
|
from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions
|
||||||
|
from environments.factory.base.base_factory import BaseFactory
|
||||||
|
from environments.factory.base.objects import Agent, Action
|
||||||
|
from environments.factory.base.registers import Entities
|
||||||
|
|
||||||
|
from environments import helpers as h
|
||||||
|
|
||||||
|
from environments.factory.base.renderer import RenderEntity
|
||||||
|
from environments.utility_classes import ObservationProperties
|
||||||
|
|
||||||
|
|
||||||
|
def softmax(x):
|
||||||
|
"""Compute softmax values for each sets of scores in x."""
|
||||||
|
e_x = np.exp(x - np.max(x))
|
||||||
|
return e_x / e_x.sum()
|
||||||
|
|
||||||
|
|
||||||
|
def entropy(x):
|
||||||
|
return -(x * np.log(x + 1e-8)).sum()
|
||||||
|
|
||||||
|
|
||||||
|
c = Constants
|
||||||
|
a = Actions
|
||||||
|
|
||||||
|
|
||||||
|
# noinspection PyAttributeOutsideInit, PyAbstractClass
|
||||||
|
class DoorFactory(BaseFactory):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def actions_hook(self) -> Union[Action, List[Action]]:
|
||||||
|
super_actions = super().actions_hook
|
||||||
|
super_actions.append(Action(str_ident=a.USE_DOOR))
|
||||||
|
return super_actions
|
||||||
|
|
||||||
|
@property
|
||||||
|
def entities_hook(self) -> Dict[(str, Entities)]:
|
||||||
|
super_entities = super().entities_hook
|
||||||
|
|
||||||
|
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL)
|
||||||
|
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
|
||||||
|
if np.any(parsed_doors):
|
||||||
|
door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
|
||||||
|
doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area,
|
||||||
|
entity_kwargs=dict()
|
||||||
|
)
|
||||||
|
super_entities.update(({c.DOORS: doors}))
|
||||||
|
return super_entities
|
||||||
|
|
||||||
|
def __init__(self, *args,
|
||||||
|
door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(),
|
||||||
|
env_seed=time.time_ns(), **kwargs):
|
||||||
|
if isinstance(door_properties, dict):
|
||||||
|
door_properties = DoorProperties(**door_properties)
|
||||||
|
if isinstance(rewards_door, dict):
|
||||||
|
rewards_door = RewardsDoor(**rewards_door)
|
||||||
|
self.door_properties = door_properties
|
||||||
|
self.rewards_door = rewards_door
|
||||||
|
self._door_rng = np.random.default_rng(env_seed)
|
||||||
|
self._doors: Doors
|
||||||
|
kwargs.update(env_seed=env_seed)
|
||||||
|
# TODO: Reset ---> document this
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def render_assets_hook(self, mode='human'):
|
||||||
|
additional_assets = super().render_assets_hook()
|
||||||
|
doors = []
|
||||||
|
for i, door in enumerate(self[c.DOORS]):
|
||||||
|
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
|
||||||
|
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
|
||||||
|
additional_assets.extend(doors)
|
||||||
|
return additional_assets
|
||||||
|
|
||||||
|
|
||||||
|
def step_hook(self) -> (List[dict], dict):
|
||||||
|
super_reward_info = super().step_hook()
|
||||||
|
# Step the door close intervall
|
||||||
|
# TODO: Maybe move this to self.post_step_hook? May collide with reward calculation.
|
||||||
|
if doors := self[c.DOORS]:
|
||||||
|
doors.tick_doors()
|
||||||
|
return super_reward_info
|
||||||
|
|
||||||
|
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
|
||||||
|
action_result = super().do_additional_actions(agent, action)
|
||||||
|
if action_result is None:
|
||||||
|
if action == a.USE_DOOR:
|
||||||
|
return self.use_door_action(agent)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return action_result
|
||||||
|
|
||||||
|
def use_door_action(self, agent: Agent):
|
||||||
|
|
||||||
|
# Check if agent really is standing on a door:
|
||||||
|
door = self[c.DOORS].get_near_position(agent.pos)
|
||||||
|
if door is not None:
|
||||||
|
door.use()
|
||||||
|
valid = c.VALID
|
||||||
|
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
|
||||||
|
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
|
||||||
|
# When he doesn't...
|
||||||
|
else:
|
||||||
|
valid = c.NOT_VALID
|
||||||
|
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
|
||||||
|
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
|
||||||
|
|
||||||
|
reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL,
|
||||||
|
reason=a.USE_DOOR, info=info_dict)
|
||||||
|
|
||||||
|
return valid, reward
|
||||||
|
|
||||||
|
def reset_hook(self) -> None:
|
||||||
|
super().reset_hook()
|
||||||
|
# There is nothing to reset.
|
||||||
|
|
||||||
|
def check_additional_done(self) -> (bool, dict):
|
||||||
|
super_done, super_dict = super().check_additional_done()
|
||||||
|
return super_done, super_dict
|
||||||
|
|
||||||
|
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
|
||||||
|
additional_observations = super().observations_hook()
|
||||||
|
|
||||||
|
additional_observations.update({c.DOORS: self[c.DOORS].as_array()})
|
||||||
|
return additional_observations
|
||||||
|
|
||||||
|
def post_step_hook(self) -> List[Dict[str, int]]:
|
||||||
|
super_post_step = super(DoorFactory, self).post_step_hook()
|
||||||
|
return super_post_step
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from environments.utility_classes import AgentRenderOptions as aro
|
||||||
|
render = True
|
||||||
|
|
||||||
|
door_props = DoorProperties(
|
||||||
|
indicate_door_area=True
|
||||||
|
)
|
||||||
|
|
||||||
|
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
|
||||||
|
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True
|
||||||
|
)
|
||||||
|
|
||||||
|
move_props = {'allow_square_movement': True,
|
||||||
|
'allow_diagonal_movement': False,
|
||||||
|
'allow_no_op': False}
|
||||||
|
import time
|
||||||
|
global_timings = []
|
||||||
|
for i in range(10):
|
||||||
|
|
||||||
|
factory = DoorFactory(n_agents=10, done_at_collision=False,
|
||||||
|
level_name='rooms', max_steps=1000,
|
||||||
|
obs_prop=obs_props, parse_doors=True,
|
||||||
|
verbose=True,
|
||||||
|
mv_prop=move_props, dirt_prop=door_props,
|
||||||
|
# inject_agents=[TSPDirtAgent],
|
||||||
|
)
|
||||||
|
|
||||||
|
# noinspection DuplicatedCode
|
||||||
|
n_actions = factory.action_space.n - 1
|
||||||
|
_ = factory.observation_space
|
||||||
|
obs_space = factory.observation_space
|
||||||
|
obs_space_named = factory.named_observation_space
|
||||||
|
action_space_named = factory.named_action_space
|
||||||
|
times = []
|
||||||
|
for epoch in range(10):
|
||||||
|
start_time = time.time()
|
||||||
|
random_actions = [[random.randint(0, n_actions) for _
|
||||||
|
in range(factory.n_agents)] for _
|
||||||
|
in range(factory.max_steps+1)]
|
||||||
|
env_state = factory.reset()
|
||||||
|
if render:
|
||||||
|
factory.render()
|
||||||
|
# tsp_agent = factory.get_injected_agents()[0]
|
||||||
|
|
||||||
|
rwrd = 0
|
||||||
|
for agent_i_action in random_actions:
|
||||||
|
# agent_i_action = tsp_agent.predict()
|
||||||
|
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
|
||||||
|
rwrd += step_rwrd
|
||||||
|
if render:
|
||||||
|
factory.render()
|
||||||
|
if done_bool:
|
||||||
|
break
|
||||||
|
times.append(time.time() - start_time)
|
||||||
|
# print(f'Factory run {epoch} done, reward is:\n {r}')
|
||||||
|
print('Mean Time Taken: ', sum(times) / 10)
|
||||||
|
global_timings.extend(times)
|
||||||
|
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
|
||||||
|
print('Median Time Taken: ', global_timings[len(global_timings)//2])
|
@ -16,7 +16,7 @@ from environments.helpers import Constants as c
|
|||||||
from environments.helpers import EnvActions as a
|
from environments.helpers import EnvActions as a
|
||||||
from environments.helpers import RewardsBase
|
from environments.helpers import RewardsBase
|
||||||
from environments.factory.base.objects import Agent, Floor, Action
|
from environments.factory.base.objects import Agent, Floor, Action
|
||||||
from environments.factory.base.registers import Actions, Entities, Agents, Doors, Floors, Walls, PlaceHolders, \
|
from environments.factory.base.registers import Actions, Entities, Agents, Floors, Walls, PlaceHolders, \
|
||||||
GlobalPositions
|
GlobalPositions
|
||||||
from environments.utility_classes import MovementProperties, ObservationProperties, MarlFrameStack
|
from environments.utility_classes import MovementProperties, ObservationProperties, MarlFrameStack
|
||||||
from environments.utility_classes import AgentRenderOptions as a_obs
|
from environments.utility_classes import AgentRenderOptions as a_obs
|
||||||
@ -88,8 +88,8 @@ class BaseFactory(gym.Env):
|
|||||||
mv_prop: MovementProperties = MovementProperties(),
|
mv_prop: MovementProperties = MovementProperties(),
|
||||||
obs_prop: ObservationProperties = ObservationProperties(),
|
obs_prop: ObservationProperties = ObservationProperties(),
|
||||||
rewards_base: RewardsBase = RewardsBase(),
|
rewards_base: RewardsBase = RewardsBase(),
|
||||||
parse_doors=False, done_at_collision=False, inject_agents: Union[None, List] = None,
|
done_at_collision=False, inject_agents: Union[None, List] = None,
|
||||||
verbose=False, doors_have_area=True, env_seed=time.time_ns(), individual_rewards=False,
|
verbose=False, env_seed=time.time_ns(), individual_rewards=False,
|
||||||
class_name='', **kwargs):
|
class_name='', **kwargs):
|
||||||
|
|
||||||
if class_name:
|
if class_name:
|
||||||
@ -105,8 +105,6 @@ class BaseFactory(gym.Env):
|
|||||||
assert obs_prop.frames_to_stack != 1 and \
|
assert obs_prop.frames_to_stack != 1 and \
|
||||||
obs_prop.frames_to_stack >= 0, \
|
obs_prop.frames_to_stack >= 0, \
|
||||||
"'frames_to_stack' cannot be negative or 1."
|
"'frames_to_stack' cannot be negative or 1."
|
||||||
assert doors_have_area or not obs_prop.indicate_door_area, \
|
|
||||||
'"indicate_door_area" can only active, when "doors_have_area"'
|
|
||||||
if kwargs:
|
if kwargs:
|
||||||
print(f'Following kwargs were passed, but ignored: {kwargs}')
|
print(f'Following kwargs were passed, but ignored: {kwargs}')
|
||||||
|
|
||||||
@ -133,9 +131,7 @@ class BaseFactory(gym.Env):
|
|||||||
|
|
||||||
self.done_at_collision = done_at_collision
|
self.done_at_collision = done_at_collision
|
||||||
self._record_episodes = False
|
self._record_episodes = False
|
||||||
self.parse_doors = parse_doors
|
|
||||||
self._injected_agents = inject_agents or []
|
self._injected_agents = inject_agents or []
|
||||||
self.doors_have_area = doors_have_area
|
|
||||||
self.individual_rewards = individual_rewards
|
self.individual_rewards = individual_rewards
|
||||||
|
|
||||||
# TODO: Reset ---> document this
|
# TODO: Reset ---> document this
|
||||||
@ -174,20 +170,9 @@ class BaseFactory(gym.Env):
|
|||||||
# NOPOS
|
# NOPOS
|
||||||
self._NO_POS_TILE = Floor(c.NO_POS, None)
|
self._NO_POS_TILE = Floor(c.NO_POS, None)
|
||||||
|
|
||||||
# Doors
|
|
||||||
if self.parse_doors:
|
|
||||||
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR)
|
|
||||||
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
|
|
||||||
if np.any(parsed_doors):
|
|
||||||
door_tiles = [floor.by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
|
|
||||||
doors = Doors.from_tiles(door_tiles, self._level_shape, have_area=self.obs_prop.indicate_door_area,
|
|
||||||
entity_kwargs=dict(context=floor)
|
|
||||||
)
|
|
||||||
self._entities.add_additional_items({c.DOORS: doors})
|
|
||||||
|
|
||||||
# Actions
|
# Actions
|
||||||
# TODO: Move this to Agent init, so that agents can have individual action sets.
|
# TODO: Move this to Agent init, so that agents can have individual action sets.
|
||||||
self._actions = Actions(self.mv_prop, can_use_doors=self.parse_doors)
|
self._actions = Actions(self.mv_prop)
|
||||||
if additional_actions := self.actions_hook:
|
if additional_actions := self.actions_hook:
|
||||||
self._actions.add_additional_items(additional_actions)
|
self._actions.add_additional_items(additional_actions)
|
||||||
|
|
||||||
@ -263,8 +248,6 @@ class BaseFactory(gym.Env):
|
|||||||
elif a.NOOP == action_obj:
|
elif a.NOOP == action_obj:
|
||||||
action_valid = c.VALID
|
action_valid = c.VALID
|
||||||
reward = dict(value=self.rewards_base.NOOP, reason=a.NOOP, info={f'{agent.name}_NOOP': 1, 'NOOP': 1})
|
reward = dict(value=self.rewards_base.NOOP, reason=a.NOOP, info={f'{agent.name}_NOOP': 1, 'NOOP': 1})
|
||||||
elif a.USE_DOOR == action_obj:
|
|
||||||
action_valid, reward = self._handle_door_interaction(agent)
|
|
||||||
else:
|
else:
|
||||||
# noinspection PyTupleAssignmentBalance
|
# noinspection PyTupleAssignmentBalance
|
||||||
action_valid, reward = self.do_additional_actions(agent, action_obj)
|
action_valid, reward = self.do_additional_actions(agent, action_obj)
|
||||||
@ -282,12 +265,9 @@ class BaseFactory(gym.Env):
|
|||||||
for tile in tiles_with_collisions:
|
for tile in tiles_with_collisions:
|
||||||
guests = tile.guests_that_can_collide
|
guests = tile.guests_that_can_collide
|
||||||
for i, guest in enumerate(guests):
|
for i, guest in enumerate(guests):
|
||||||
# This does make a copy, but is faster than.copy()
|
for j, collision in enumerate(guests):
|
||||||
this_collisions = guests[:]
|
if j != i and hasattr(guest, 'step_result'):
|
||||||
del this_collisions[i]
|
guest.step_result['collisions'].append(collision)
|
||||||
assert hasattr(guest, 'step_result')
|
|
||||||
for collision in this_collisions:
|
|
||||||
guest.step_result['collisions'].append(collision)
|
|
||||||
|
|
||||||
done = False
|
done = False
|
||||||
if self.done_at_collision:
|
if self.done_at_collision:
|
||||||
@ -299,11 +279,6 @@ class BaseFactory(gym.Env):
|
|||||||
done = done or additional_done
|
done = done or additional_done
|
||||||
info.update(additional_done_info)
|
info.update(additional_done_info)
|
||||||
|
|
||||||
# Step the door close intervall
|
|
||||||
if self.parse_doors:
|
|
||||||
if doors := self[c.DOORS]:
|
|
||||||
doors.tick_doors()
|
|
||||||
|
|
||||||
# Finalize
|
# Finalize
|
||||||
reward, reward_info = self.build_reward_result(rewards)
|
reward, reward_info = self.build_reward_result(rewards)
|
||||||
|
|
||||||
@ -319,41 +294,14 @@ class BaseFactory(gym.Env):
|
|||||||
info.update(post_step_info)
|
info.update(post_step_info)
|
||||||
|
|
||||||
obs, _ = self._build_observations()
|
obs, _ = self._build_observations()
|
||||||
|
|
||||||
return obs, reward, done, info
|
return obs, reward, done, info
|
||||||
|
|
||||||
def _handle_door_interaction(self, agent) -> (bool, dict):
|
|
||||||
if doors := self[c.DOORS]:
|
|
||||||
# Check if agent really is standing on a door:
|
|
||||||
if self.doors_have_area:
|
|
||||||
door = doors.get_near_position(agent.pos)
|
|
||||||
else:
|
|
||||||
door = doors.by_pos(agent.pos)
|
|
||||||
if door is not None:
|
|
||||||
door.use()
|
|
||||||
valid = c.VALID
|
|
||||||
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
|
|
||||||
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
|
|
||||||
# When he doesn't...
|
|
||||||
else:
|
|
||||||
valid = c.NOT_VALID
|
|
||||||
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
|
|
||||||
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise RuntimeError('This should not happen, since the door action should not be available.')
|
|
||||||
reward = dict(value=self.rewards_base.USE_DOOR_VALID if valid else self.rewards_base.USE_DOOR_FAIL,
|
|
||||||
reason=a.USE_DOOR, info=info_dict)
|
|
||||||
|
|
||||||
return valid, reward
|
|
||||||
|
|
||||||
def _build_observations(self) -> np.typing.ArrayLike:
|
def _build_observations(self) -> np.typing.ArrayLike:
|
||||||
# Observation dict:
|
# Observation dict:
|
||||||
per_agent_expl_idx = dict()
|
per_agent_expl_idx = dict()
|
||||||
per_agent_obsn = dict()
|
per_agent_obsn = dict()
|
||||||
# Generel Observations
|
# Generel Observations
|
||||||
lvl_obs = self[c.WALLS].as_array()
|
lvl_obs = self[c.WALLS].as_array()
|
||||||
door_obs = self[c.DOORS].as_array() if self.parse_doors else None
|
|
||||||
if self.obs_prop.render_agents == a_obs.NOT:
|
if self.obs_prop.render_agents == a_obs.NOT:
|
||||||
global_agent_obs = None
|
global_agent_obs = None
|
||||||
elif self.obs_prop.omit_agent_self and self.n_agents == 1:
|
elif self.obs_prop.omit_agent_self and self.n_agents == 1:
|
||||||
@ -391,8 +339,6 @@ class BaseFactory(gym.Env):
|
|||||||
obs_dict[c.AGENT] = agent_obs[:]
|
obs_dict[c.AGENT] = agent_obs[:]
|
||||||
if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None:
|
if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None:
|
||||||
obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs
|
obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs
|
||||||
if self.parse_doors and door_obs is not None:
|
|
||||||
obs_dict[c.DOORS] = door_obs[:]
|
|
||||||
obs_dict.update(add_obs_dict)
|
obs_dict.update(add_obs_dict)
|
||||||
obsn = np.vstack(list(obs_dict.values()))
|
obsn = np.vstack(list(obs_dict.values()))
|
||||||
if self.obs_prop.pomdp_r:
|
if self.obs_prop.pomdp_r:
|
||||||
@ -430,33 +376,11 @@ class BaseFactory(gym.Env):
|
|||||||
raise e
|
raise e
|
||||||
|
|
||||||
obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL
|
obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL
|
||||||
door_shadowing = False
|
|
||||||
if self.parse_doors:
|
|
||||||
if doors := self[c.DOORS]:
|
|
||||||
if door := doors.by_pos(agent.pos):
|
|
||||||
if door.is_closed:
|
|
||||||
for group in door.connectivity_subgroups:
|
|
||||||
if agent.last_pos not in group:
|
|
||||||
door_shadowing = True
|
|
||||||
if self._pomdp_r:
|
|
||||||
blocking = [
|
|
||||||
tuple(np.subtract(x, agent.pos) + (self._pomdp_r, self._pomdp_r))
|
|
||||||
for x in group]
|
|
||||||
xs, ys = zip(*blocking)
|
|
||||||
else:
|
|
||||||
xs, ys = zip(*group)
|
|
||||||
|
|
||||||
# noinspection PyUnresolvedReferences
|
|
||||||
obs_block_light[:, xs, ys] = False
|
|
||||||
|
|
||||||
light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int).squeeze())
|
light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int).squeeze())
|
||||||
if self._pomdp_r:
|
if self._pomdp_r:
|
||||||
light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape))
|
light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape))
|
||||||
else:
|
else:
|
||||||
light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
|
light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
|
||||||
if door_shadowing:
|
|
||||||
# noinspection PyUnboundLocalVariable
|
|
||||||
light_block_map[xs, ys] = 0
|
|
||||||
|
|
||||||
agent.step_result['lightmap'] = light_block_map
|
agent.step_result['lightmap'] = light_block_map
|
||||||
|
|
||||||
@ -550,35 +474,13 @@ class BaseFactory(gym.Env):
|
|||||||
y_new = agent.y + y_diff
|
y_new = agent.y + y_diff
|
||||||
|
|
||||||
new_tile = self[c.FLOOR].by_pos((x_new, y_new))
|
new_tile = self[c.FLOOR].by_pos((x_new, y_new))
|
||||||
if new_tile:
|
if new_tile and not np.any([x.is_blocking for x in new_tile.guests]):
|
||||||
valid = c.VALID
|
valid = c.VALID
|
||||||
else:
|
else:
|
||||||
tile = agent.tile
|
tile = agent.tile
|
||||||
valid = c.VALID
|
valid = c.VALID
|
||||||
return tile, valid
|
return tile, valid
|
||||||
|
|
||||||
if self.parse_doors and agent.last_pos != c.NO_POS:
|
|
||||||
if doors := self[c.DOORS]:
|
|
||||||
if self.doors_have_area:
|
|
||||||
if door := doors.by_pos(new_tile.pos):
|
|
||||||
if door.is_closed:
|
|
||||||
return agent.tile, c.NOT_VALID
|
|
||||||
else: # door.is_closed:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if door := doors.by_pos(agent.pos):
|
|
||||||
if door.is_open:
|
|
||||||
pass
|
|
||||||
else: # door.is_closed:
|
|
||||||
if door.is_linked(agent.last_pos, new_tile.pos):
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
return agent.tile, c.NOT_VALID
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return new_tile, valid
|
return new_tile, valid
|
||||||
|
|
||||||
def build_reward_result(self, global_env_rewards: list) -> (int, dict):
|
def build_reward_result(self, global_env_rewards: list) -> (int, dict):
|
||||||
@ -649,14 +551,10 @@ class BaseFactory(gym.Env):
|
|||||||
for i, agent in enumerate(self[c.AGENT]):
|
for i, agent in enumerate(self[c.AGENT]):
|
||||||
name, state = h.asset_str(agent)
|
name, state = h.asset_str(agent)
|
||||||
agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.step_result['lightmap']))
|
agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.step_result['lightmap']))
|
||||||
doors = []
|
|
||||||
if self.parse_doors:
|
|
||||||
for i, door in enumerate(self[c.DOORS]):
|
|
||||||
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
|
|
||||||
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
|
|
||||||
additional_assets = self.render_assets_hook()
|
additional_assets = self.render_assets_hook()
|
||||||
|
|
||||||
return self._renderer.render(walls + doors + additional_assets + agents)
|
return self._renderer.render(walls + additional_assets + agents)
|
||||||
|
|
||||||
def save_params(self, filepath: Path):
|
def save_params(self, filepath: Path):
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
|
@ -1,12 +1,10 @@
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import Union
|
from typing import Union, List
|
||||||
|
|
||||||
import networkx as nx
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from environments import helpers as h
|
from environments import helpers as h
|
||||||
from environments.helpers import Constants as c
|
from environments.helpers import Constants as c
|
||||||
import itertools
|
|
||||||
|
|
||||||
##########################################################################
|
##########################################################################
|
||||||
# ##################### Base Object Building Blocks ######################### #
|
# ##################### Base Object Building Blocks ######################### #
|
||||||
@ -88,6 +86,10 @@ class EnvObject(Object):
|
|||||||
class Entity(EnvObject):
|
class Entity(EnvObject):
|
||||||
"""Full Env Entity that lives on the env Grid. Doors, Items, DirtPile etc..."""
|
"""Full Env Entity that lives on the env Grid. Doors, Items, DirtPile etc..."""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_blocking(self):
|
||||||
|
return False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def can_collide(self):
|
def can_collide(self):
|
||||||
return False
|
return False
|
||||||
@ -226,6 +228,21 @@ class GlobalPosition(BoundingMixin, EnvObject):
|
|||||||
|
|
||||||
class Floor(EnvObject):
|
class Floor(EnvObject):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def neighboring_floor_pos(self):
|
||||||
|
return [x.pos for x in self.neighboring_floor]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def neighboring_floor(self):
|
||||||
|
if self._neighboring_floor:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
self._neighboring_floor = [x for x in [self._collection.by_pos(np.add(self.pos, pos))
|
||||||
|
for pos in h.POS_MASK.reshape(-1, 2)
|
||||||
|
if not np.all(pos == [0, 0])]
|
||||||
|
if x]
|
||||||
|
return self._neighboring_floor
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def encoding(self):
|
def encoding(self):
|
||||||
return c.FREE_CELL
|
return c.FREE_CELL
|
||||||
@ -254,6 +271,7 @@ class Floor(EnvObject):
|
|||||||
super(Floor, self).__init__(*args, **kwargs)
|
super(Floor, self).__init__(*args, **kwargs)
|
||||||
self._guests = dict()
|
self._guests = dict()
|
||||||
self._pos = tuple(pos)
|
self._pos = tuple(pos)
|
||||||
|
self._neighboring_floor: List[Floor] = list()
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self._guests)
|
return len(self._guests)
|
||||||
@ -298,94 +316,6 @@ class Wall(Floor):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Door(Entity):
|
|
||||||
|
|
||||||
@property
|
|
||||||
def can_collide(self):
|
|
||||||
if self.has_area:
|
|
||||||
return False if self.is_open else True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
@property
|
|
||||||
def encoding(self):
|
|
||||||
# This is important as it shadow is checked by occupation value
|
|
||||||
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
|
|
||||||
|
|
||||||
@property
|
|
||||||
def str_state(self):
|
|
||||||
return 'open' if self.is_open else 'closed'
|
|
||||||
|
|
||||||
@property
|
|
||||||
def access_area(self):
|
|
||||||
return [node for node in self.connectivity.nodes
|
|
||||||
if node not in range(len(self.connectivity_subgroups)) and node != self.pos]
|
|
||||||
|
|
||||||
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False, **kwargs):
|
|
||||||
super(Door, self).__init__(*args, **kwargs)
|
|
||||||
self._state = c.CLOSED_DOOR
|
|
||||||
self.has_area = has_area
|
|
||||||
self.auto_close_interval = auto_close_interval
|
|
||||||
self.time_to_close = -1
|
|
||||||
neighbor_pos = list(itertools.product([-1, 1, 0], repeat=2))[:-1]
|
|
||||||
neighbor_tiles = [context.by_pos(tuple([sum(x) for x in zip(self.pos, diff)])) for diff in neighbor_pos]
|
|
||||||
neighbor_pos = [x.pos for x in neighbor_tiles if x]
|
|
||||||
self.connectivity = h.points_to_graph(neighbor_pos)
|
|
||||||
self.connectivity_subgroups = list(nx.algorithms.components.connected_components(self.connectivity))
|
|
||||||
for idx, group in enumerate(self.connectivity_subgroups):
|
|
||||||
for tile_pos in group:
|
|
||||||
self.connectivity.add_edge(tile_pos, idx)
|
|
||||||
if not closed_on_init:
|
|
||||||
self._open()
|
|
||||||
|
|
||||||
def summarize_state(self):
|
|
||||||
state_dict = super().summarize_state()
|
|
||||||
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
|
|
||||||
return state_dict
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_closed(self):
|
|
||||||
return self._state == c.CLOSED_DOOR
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_open(self):
|
|
||||||
return self._state == c.OPEN_DOOR
|
|
||||||
|
|
||||||
@property
|
|
||||||
def status(self):
|
|
||||||
return self._state
|
|
||||||
|
|
||||||
def use(self):
|
|
||||||
if self._state == c.OPEN_DOOR:
|
|
||||||
self._close()
|
|
||||||
else:
|
|
||||||
self._open()
|
|
||||||
|
|
||||||
def tick(self):
|
|
||||||
if self.is_open and len(self.tile) == 1 and self.time_to_close:
|
|
||||||
self.time_to_close -= 1
|
|
||||||
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
|
|
||||||
self.use()
|
|
||||||
|
|
||||||
def _open(self):
|
|
||||||
self.connectivity.add_edges_from([(self.pos, x) for x in range(len(self.connectivity_subgroups))])
|
|
||||||
self._state = c.OPEN_DOOR
|
|
||||||
self._collection.notify_change_to_value(self)
|
|
||||||
self.time_to_close = self.auto_close_interval
|
|
||||||
|
|
||||||
def _close(self):
|
|
||||||
self.connectivity.remove_node(self.pos)
|
|
||||||
self._state = c.CLOSED_DOOR
|
|
||||||
self._collection.notify_change_to_value(self)
|
|
||||||
|
|
||||||
def is_linked(self, old_pos, new_pos):
|
|
||||||
try:
|
|
||||||
_ = nx.shortest_path(self.connectivity, old_pos, new_pos)
|
|
||||||
return True
|
|
||||||
except nx.exception.NetworkXNoPath:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class Agent(MoveableEntity):
|
class Agent(MoveableEntity):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -6,7 +6,7 @@ from typing import List, Union, Dict, Tuple
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import six
|
import six
|
||||||
|
|
||||||
from environments.factory.base.objects import Entity, Floor, Agent, Door, Action, Wall, PlaceHolder, GlobalPosition, \
|
from environments.factory.base.objects import Entity, Floor, Agent, Action, Wall, PlaceHolder, GlobalPosition, \
|
||||||
Object, EnvObject
|
Object, EnvObject
|
||||||
from environments.utility_classes import MovementProperties
|
from environments.utility_classes import MovementProperties
|
||||||
from environments import helpers as h
|
from environments import helpers as h
|
||||||
@ -452,38 +452,6 @@ class Agents(MovingEntityObjectCollection):
|
|||||||
self._collection[agent.name] = agent
|
self._collection[agent.name] = agent
|
||||||
|
|
||||||
|
|
||||||
class Doors(EntityCollection):
|
|
||||||
|
|
||||||
def __init__(self, *args, have_area: bool = False, **kwargs):
|
|
||||||
self.have_area = have_area
|
|
||||||
self._area_marked = False
|
|
||||||
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
|
|
||||||
|
|
||||||
_accepted_objects = Door
|
|
||||||
|
|
||||||
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
|
|
||||||
try:
|
|
||||||
return next(door for door in self if position in door.access_area)
|
|
||||||
except StopIteration:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def tick_doors(self):
|
|
||||||
for door in self:
|
|
||||||
door.tick()
|
|
||||||
|
|
||||||
def as_array(self):
|
|
||||||
if self.have_area and not self._area_marked:
|
|
||||||
for door in self:
|
|
||||||
for pos in door.access_area:
|
|
||||||
if self._individual_slices:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
pos = (0, *pos)
|
|
||||||
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
|
|
||||||
self._area_marked = True
|
|
||||||
return super(Doors, self).as_array()
|
|
||||||
|
|
||||||
|
|
||||||
class Actions(ObjectCollection):
|
class Actions(ObjectCollection):
|
||||||
_accepted_objects = Action
|
_accepted_objects = Action
|
||||||
|
|
||||||
@ -492,11 +460,10 @@ class Actions(ObjectCollection):
|
|||||||
return self._movement_actions
|
return self._movement_actions
|
||||||
|
|
||||||
# noinspection PyTypeChecker
|
# noinspection PyTypeChecker
|
||||||
def __init__(self, movement_properties: MovementProperties, can_use_doors=False):
|
def __init__(self, movement_properties: MovementProperties):
|
||||||
self.allow_no_op = movement_properties.allow_no_op
|
self.allow_no_op = movement_properties.allow_no_op
|
||||||
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
|
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
|
||||||
self.allow_square_movement = movement_properties.allow_square_movement
|
self.allow_square_movement = movement_properties.allow_square_movement
|
||||||
self.can_use_doors = can_use_doors
|
|
||||||
super(Actions, self).__init__()
|
super(Actions, self).__init__()
|
||||||
|
|
||||||
# Move this to Baseclass, Env init?
|
# Move this to Baseclass, Env init?
|
||||||
@ -507,8 +474,6 @@ class Actions(ObjectCollection):
|
|||||||
self.add_additional_items([self._accepted_objects(str_ident=direction)
|
self.add_additional_items([self._accepted_objects(str_ident=direction)
|
||||||
for direction in h.EnvActions.diagonal_move()])
|
for direction in h.EnvActions.diagonal_move()])
|
||||||
self._movement_actions = self._collection.copy()
|
self._movement_actions = self._collection.copy()
|
||||||
if self.can_use_doors:
|
|
||||||
self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.USE_DOOR)])
|
|
||||||
if self.allow_no_op:
|
if self.allow_no_op:
|
||||||
self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.NOOP)])
|
self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.NOOP)])
|
||||||
|
|
||||||
|
@ -33,6 +33,10 @@ IGNORED_DF_COLUMNS = ['Episode', 'Run', # For plotting, which values ar
|
|||||||
'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count', 'terminal_observation',
|
'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count', 'terminal_observation',
|
||||||
'episode']
|
'episode']
|
||||||
|
|
||||||
|
POS_MASK = np.asarray([[[-1, -1], [0, -1], [1, -1]],
|
||||||
|
[[-1, 0], [0, 0], [1, 0]],
|
||||||
|
[[-1, 1], [0, 1], [1, 1]]])
|
||||||
|
|
||||||
|
|
||||||
class Constants:
|
class Constants:
|
||||||
|
|
||||||
@ -42,12 +46,10 @@ class Constants:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
WALL = '#' # Wall tile identifier for resolving the string based map files.
|
WALL = '#' # Wall tile identifier for resolving the string based map files.
|
||||||
DOOR = 'D' # Door identifier for resolving the string based map files.
|
|
||||||
DANGER_ZONE = 'x' # Dange Zone tile identifier for resolving the string based map files.
|
DANGER_ZONE = 'x' # Dange Zone tile identifier for resolving the string based map files.
|
||||||
|
|
||||||
WALLS = 'Walls' # Identifier of Wall-objects and sets (collections).
|
WALLS = 'Walls' # Identifier of Wall-objects and sets (collections).
|
||||||
FLOOR = 'Floor' # Identifier of Floor-objects and sets (collections).
|
FLOOR = 'Floor' # Identifier of Floor-objects and sets (collections).
|
||||||
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
|
|
||||||
LEVEL = 'Level' # Identifier of Level-objects and sets (collections).
|
LEVEL = 'Level' # Identifier of Level-objects and sets (collections).
|
||||||
AGENT = 'Agent' # Identifier of Agent-objects and sets (collections).
|
AGENT = 'Agent' # Identifier of Agent-objects and sets (collections).
|
||||||
AGENT_PLACEHOLDER = 'AGENT_PLACEHOLDER' # Identifier of Placeholder-objects and sets (collections).
|
AGENT_PLACEHOLDER = 'AGENT_PLACEHOLDER' # Identifier of Placeholder-objects and sets (collections).
|
||||||
@ -56,16 +58,9 @@ class Constants:
|
|||||||
FREE_CELL = 0 # Free-Cell value used in observation
|
FREE_CELL = 0 # Free-Cell value used in observation
|
||||||
OCCUPIED_CELL = 1 # Occupied-Cell value used in observation
|
OCCUPIED_CELL = 1 # Occupied-Cell value used in observation
|
||||||
SHADOWED_CELL = -1 # Shadowed-Cell value used in observation
|
SHADOWED_CELL = -1 # Shadowed-Cell value used in observation
|
||||||
ACCESS_DOOR_CELL = 1/3 # Access-door-Cell value used in observation
|
|
||||||
OPEN_DOOR_CELL = 2/3 # Open-door-Cell value used in observation
|
|
||||||
CLOSED_DOOR_CELL = 3/3 # Closed-door-Cell value used in observation
|
|
||||||
|
|
||||||
NO_POS = (-9999, -9999) # Invalid Position value used in the environment (something is off-grid)
|
NO_POS = (-9999, -9999) # Invalid Position value used in the environment (something is off-grid)
|
||||||
|
|
||||||
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
|
|
||||||
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
|
|
||||||
# ACCESS_DOOR = 'access' # Identifier to compare access positions
|
|
||||||
|
|
||||||
ACTION = 'action' # Identifier of Action-objects and sets (collections).
|
ACTION = 'action' # Identifier of Action-objects and sets (collections).
|
||||||
COLLISION = 'collision' # Identifier to use in the context of collitions.
|
COLLISION = 'collision' # Identifier to use in the context of collitions.
|
||||||
VALID = True # Identifier to rename boolean values in the context of actions.
|
VALID = True # Identifier to rename boolean values in the context of actions.
|
||||||
@ -90,7 +85,6 @@ class EnvActions:
|
|||||||
# Other
|
# Other
|
||||||
# MOVE = 'move'
|
# MOVE = 'move'
|
||||||
NOOP = 'no_op'
|
NOOP = 'no_op'
|
||||||
USE_DOOR = 'use_door'
|
|
||||||
|
|
||||||
_ACTIONMAP = defaultdict(lambda: (0, 0),
|
_ACTIONMAP = defaultdict(lambda: (0, 0),
|
||||||
{NORTH: (-1, 0), NORTHEAST: (-1, 1),
|
{NORTH: (-1, 0), NORTHEAST: (-1, 1),
|
||||||
@ -100,6 +94,8 @@ class EnvActions:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def is_move(cls, action):
|
def is_move(cls, action):
|
||||||
"""
|
"""
|
||||||
@ -166,8 +162,6 @@ class RewardsBase(NamedTuple):
|
|||||||
MOVEMENTS_VALID: float = -0.001
|
MOVEMENTS_VALID: float = -0.001
|
||||||
MOVEMENTS_FAIL: float = -0.05
|
MOVEMENTS_FAIL: float = -0.05
|
||||||
NOOP: float = -0.01
|
NOOP: float = -0.01
|
||||||
USE_DOOR_VALID: float = -0.00
|
|
||||||
USE_DOOR_FAIL: float = -0.01
|
|
||||||
COLLISION: float = -0.5
|
COLLISION: float = -0.5
|
||||||
|
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ if __name__ == '__main__':
|
|||||||
omit_agent_self=True, # This is default
|
omit_agent_self=True, # This is default
|
||||||
additional_agent_placeholder=None, # We will not take care of future agents
|
additional_agent_placeholder=None, # We will not take care of future agents
|
||||||
frames_to_stack=3, # To give the agent a notion of time
|
frames_to_stack=3, # To give the agent a notion of time
|
||||||
pomdp_r=2 # the agents view-radius
|
pomdp_r=2 # the agents' view-radius
|
||||||
)
|
)
|
||||||
# 'MovementProperties' are for specifying how the agent is allowed to move in the env.
|
# 'MovementProperties' are for specifying how the agent is allowed to move in the env.
|
||||||
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
|
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
|
||||||
@ -135,7 +135,7 @@ if __name__ == '__main__':
|
|||||||
env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10))
|
env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10))
|
||||||
|
|
||||||
# Model Init
|
# Model Init
|
||||||
model = model_class("MlpPolicy", env_factory,verbose=1, seed=seed, device='cpu')
|
model = model_class("MlpPolicy", env_factory, verbose=1, seed=seed, device='cpu')
|
||||||
|
|
||||||
# Model train
|
# Model train
|
||||||
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
|
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
|
||||||
@ -166,7 +166,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
# retrieve model class
|
# retrieve model class
|
||||||
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
|
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
|
||||||
# Load the agent agent
|
# Load the agent
|
||||||
model = model_cls.load(policy_path / 'model.zip', device='cpu')
|
model = model_cls.load(policy_path / 'model.zip', device='cpu')
|
||||||
# Load old env kwargs
|
# Load old env kwargs
|
||||||
with next(policy_path.glob(env_params_json)).open('r') as f:
|
with next(policy_path.glob(env_params_json)).open('r') as f:
|
||||||
|
Reference in New Issue
Block a user