Doors are now seperate

This commit is contained in:
Steffen Illium
2023-05-08 10:26:05 +02:00
parent 6c2df735d4
commit a08ae73656
17 changed files with 725 additions and 262 deletions

View File

@ -0,0 +1,38 @@
from typing import Union
from environments.factory.additional.doors.doors_entities import Door
from environments.factory.base.registers import EntityCollection
from environments.factory.additional.doors.doors_util import Constants as c
class Doors(EntityCollection):
def __init__(self, *args, indicate_area=False, **kwargs):
self.indicate_area = indicate_area
self._area_marked = False
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
_accepted_objects = Door
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
try:
return next(door for door in self if position in door.tile.neighboring_floor_pos)
except StopIteration:
return None
def tick_doors(self):
for door in self:
door.tick()
def as_array(self):
if not self._area_marked and self.indicate_area:
for door in self:
for tile in door.tile.neighboring_floor:
if self._individual_slices:
pass
else:
pos = (0, *tile.pos)
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
self._area_marked = True
return super(Doors, self).as_array()

View File

@ -0,0 +1,71 @@
from environments.factory.base.objects import Entity
from environments.factory.additional.doors.doors_util import Constants as c
class Template(Entity):
"""Template for new Entity"""
# How to define / override properties
@property
def is_blocking(self):
return False
@property
def can_collide(self):
return False if self.template_attr else True
@property
def encoding(self):
# This is important as it shadow is checked by occupation value
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
@property
def str_state(self):
return 'open' if self.is_open else 'closed'
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs):
super(Template, self).__init__(*args, **kwargs)
self._state = c.CLOSED_DOOR
self.indicate_area = indicate_area
self.auto_close_interval = auto_close_interval
self.time_to_close = -1
if not closed_on_init:
self._open()
def summarize_state(self):
state_dict = super().summarize_state()
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
return state_dict
@property
def is_closed(self):
return self._state == c.CLOSED_DOOR
@property
def is_open(self):
return self._state == c.OPEN_DOOR
@property
def status(self):
return self._state
def use(self):
if self._state == c.OPEN_DOOR:
self._close()
else:
self._open()
def tick(self):
if self.is_open and len(self.tile) == 1 and self.time_to_close:
self.time_to_close -= 1
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
self.use()
def _open(self):
self._state = c.OPEN_DOOR
self._collection.notify_change_to_value(self)
self.time_to_close = self.auto_close_interval
def _close(self):
self._state = c.CLOSED_DOOR
self._collection.notify_change_to_value(self)

View File

@ -0,0 +1,31 @@
from typing import NamedTuple
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
class Constants(BaseConstants):
DOOR = 'Door' # Identifier of Single-Door Entities.
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files.
ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation
OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation
CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
# ACCESS_DOOR = 'access' # Identifier to compare access positions
class Actions(BaseActions):
USE_DOOR = 'use_door'
class RewardsDoor(NamedTuple):
USE_DOOR_VALID: float = -0.00
USE_DOOR_FAIL: float = -0.01
class DoorProperties(NamedTuple):
indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation.

View File

@ -0,0 +1,196 @@
import time
from typing import List, Union, Dict
import random
import numpy as np
from environments.factory.additional.doors.doors_collections import Doors
from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action
from environments.factory.base.registers import Entities
from environments import helpers as h
from environments.factory.base.renderer import RenderEntity
from environments.utility_classes import ObservationProperties
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum()
def entropy(x):
return -(x * np.log(x + 1e-8)).sum()
c = Constants
a = Actions
# noinspection PyAttributeOutsideInit, PyAbstractClass
class DoorFactory(BaseFactory):
@property
def actions_hook(self) -> Union[Action, List[Action]]:
super_actions = super().actions_hook
super_actions.append(Action(str_ident=a.USE_DOOR))
return super_actions
@property
def entities_hook(self) -> Dict[(str, Entities)]:
super_entities = super().entities_hook
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL)
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
if np.any(parsed_doors):
door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area,
entity_kwargs=dict()
)
super_entities.update(({c.DOORS: doors}))
return super_entities
def __init__(self, *args,
door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(),
env_seed=time.time_ns(), **kwargs):
if isinstance(door_properties, dict):
door_properties = DoorProperties(**door_properties)
if isinstance(rewards_door, dict):
rewards_door = RewardsDoor(**rewards_door)
self.door_properties = door_properties
self.rewards_door = rewards_door
self._door_rng = np.random.default_rng(env_seed)
self._doors: Doors
kwargs.update(env_seed=env_seed)
# TODO: Reset ---> document this
super().__init__(*args, **kwargs)
def render_assets_hook(self, mode='human'):
additional_assets = super().render_assets_hook()
doors = []
for i, door in enumerate(self[c.DOORS]):
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
additional_assets.extend(doors)
return additional_assets
def step_hook(self) -> (List[dict], dict):
super_reward_info = super().step_hook()
# Step the door close intervall
# TODO: Maybe move this to self.post_step_hook? May collide with reward calculation.
if doors := self[c.DOORS]:
doors.tick_doors()
return super_reward_info
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
action_result = super().do_additional_actions(agent, action)
if action_result is None:
if action == a.USE_DOOR:
return self.use_door_action(agent)
else:
return None
else:
return action_result
def use_door_action(self, agent: Agent):
# Check if agent really is standing on a door:
door = self[c.DOORS].get_near_position(agent.pos)
if door is not None:
door.use()
valid = c.VALID
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
# When he doesn't...
else:
valid = c.NOT_VALID
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL,
reason=a.USE_DOOR, info=info_dict)
return valid, reward
def reset_hook(self) -> None:
super().reset_hook()
# There is nothing to reset.
def check_additional_done(self) -> (bool, dict):
super_done, super_dict = super().check_additional_done()
return super_done, super_dict
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
additional_observations = super().observations_hook()
additional_observations.update({c.DOORS: self[c.DOORS].as_array()})
return additional_observations
def post_step_hook(self) -> List[Dict[str, int]]:
super_post_step = super(DoorFactory, self).post_step_hook()
return super_post_step
if __name__ == '__main__':
from environments.utility_classes import AgentRenderOptions as aro
render = True
door_props = DoorProperties(
indicate_door_area=True
)
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True
)
move_props = {'allow_square_movement': True,
'allow_diagonal_movement': False,
'allow_no_op': False}
import time
global_timings = []
for i in range(10):
factory = DoorFactory(n_agents=10, done_at_collision=False,
level_name='rooms', max_steps=1000,
obs_prop=obs_props, parse_doors=True,
verbose=True,
mv_prop=move_props, dirt_prop=door_props,
# inject_agents=[TSPDirtAgent],
)
# noinspection DuplicatedCode
n_actions = factory.action_space.n - 1
_ = factory.observation_space
obs_space = factory.observation_space
obs_space_named = factory.named_observation_space
action_space_named = factory.named_action_space
times = []
for epoch in range(10):
start_time = time.time()
random_actions = [[random.randint(0, n_actions) for _
in range(factory.n_agents)] for _
in range(factory.max_steps+1)]
env_state = factory.reset()
if render:
factory.render()
# tsp_agent = factory.get_injected_agents()[0]
rwrd = 0
for agent_i_action in random_actions:
# agent_i_action = tsp_agent.predict()
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
rwrd += step_rwrd
if render:
factory.render()
if done_bool:
break
times.append(time.time() - start_time)
# print(f'Factory run {epoch} done, reward is:\n {r}')
print('Mean Time Taken: ', sum(times) / 10)
global_timings.extend(times)
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
print('Median Time Taken: ', global_timings[len(global_timings)//2])

View File

@ -7,9 +7,17 @@ from environments.factory.additional.btry.factory_battery import BatteryFactory
from environments.factory.additional.dest.factory_dest import DestFactory from environments.factory.additional.dest.factory_dest import DestFactory
from environments.factory.additional.dirt.dirt_util import DirtProperties from environments.factory.additional.dirt.dirt_util import DirtProperties
from environments.factory.additional.dirt.factory_dirt import DirtFactory from environments.factory.additional.dirt.factory_dirt import DirtFactory
from environments.factory.additional.doors.factory_doors import DoorFactory
from environments.factory.additional.item.factory_item import ItemFactory from environments.factory.additional.item.factory_item import ItemFactory
# noinspection PyAbstractClass
class DoorDirtFactory(DoorFactory, DirtFactory):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# noinspection PyAbstractClass
class DirtItemFactory(ItemFactory, DirtFactory): class DirtItemFactory(ItemFactory, DirtFactory):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@ -38,8 +46,6 @@ if __name__ == '__main__':
render = True render = True
dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0)
obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True, obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True,
pomdp_r=2, additional_agent_placeholder=None) pomdp_r=2, additional_agent_placeholder=None)
@ -47,13 +53,13 @@ if __name__ == '__main__':
'allow_diagonal_movement': False, 'allow_diagonal_movement': False,
'allow_no_op': False} 'allow_no_op': False}
factory = DirtBatteryFactory(n_agents=5, done_at_collision=False, factory = DoorDirtFactory(n_agents=10, done_at_collision=False,
level_name='rooms', max_steps=400, level_name='rooms', max_steps=400,
obs_prop=obs_props, parse_doors=True, obs_prop=obs_props, parse_doors=True,
record_episodes=True, verbose=True, record_episodes=True, verbose=True,
btry_prop=BatteryProperties(), dirt_prop=DirtProperties(),
mv_prop=move_props, dirt_prop=dirt_props mv_prop=move_props)
)
# noinspection DuplicatedCode # noinspection DuplicatedCode
n_actions = factory.action_space.n - 1 n_actions = factory.action_space.n - 1

View File

@ -44,7 +44,7 @@ class DirtFactory(BaseFactory):
def entities_hook(self) -> Dict[(str, Entities)]: def entities_hook(self) -> Dict[(str, Entities)]:
super_entities = super().entities_hook super_entities = super().entities_hook
dirt_register = DirtPiles(self.dirt_prop, self._level_shape) dirt_register = DirtPiles(self.dirt_prop, self._level_shape)
super_entities.update(({c.DIRT: dirt_register})) super_entities.update({c.DIRT: dirt_register})
return super_entities return super_entities
def __init__(self, *args, def __init__(self, *args,

View File

@ -0,0 +1,38 @@
from typing import Union
from environments.factory.additional.doors.doors_entities import Door
from environments.factory.base.registers import EntityCollection
from environments.factory.additional.doors.doors_util import Constants as c
class Doors(EntityCollection):
def __init__(self, *args, indicate_area=False, **kwargs):
self.indicate_area = indicate_area
self._area_marked = False
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
_accepted_objects = Door
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
try:
return next(door for door in self if position in door.tile.neighboring_floor_pos)
except StopIteration:
return None
def tick_doors(self):
for door in self:
door.tick()
def as_array(self):
if not self._area_marked and self.indicate_area:
for door in self:
for tile in door.tile.neighboring_floor:
if self._individual_slices:
pass
else:
pos = (0, *tile.pos)
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
self._area_marked = True
return super(Doors, self).as_array()

View File

@ -0,0 +1,69 @@
from environments.factory.base.objects import Entity
from environments.factory.additional.doors.doors_util import Constants as c
class Door(Entity):
@property
def is_blocking(self):
return False if self.is_open else True
@property
def can_collide(self):
return False if self.is_open else True
@property
def encoding(self):
# This is important as it shadow is checked by occupation value
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
@property
def str_state(self):
return 'open' if self.is_open else 'closed'
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs):
super(Door, self).__init__(*args, **kwargs)
self._state = c.CLOSED_DOOR
self.indicate_area = indicate_area
self.auto_close_interval = auto_close_interval
self.time_to_close = -1
if not closed_on_init:
self._open()
def summarize_state(self):
state_dict = super().summarize_state()
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
return state_dict
@property
def is_closed(self):
return self._state == c.CLOSED_DOOR
@property
def is_open(self):
return self._state == c.OPEN_DOOR
@property
def status(self):
return self._state
def use(self):
if self._state == c.OPEN_DOOR:
self._close()
else:
self._open()
def tick(self):
if self.is_open and len(self.tile) == 1 and self.time_to_close:
self.time_to_close -= 1
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
self.use()
def _open(self):
self._state = c.OPEN_DOOR
self._collection.notify_change_to_value(self)
self.time_to_close = self.auto_close_interval
def _close(self):
self._state = c.CLOSED_DOOR
self._collection.notify_change_to_value(self)

View File

@ -0,0 +1,31 @@
from typing import NamedTuple
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
class Constants(BaseConstants):
DOOR = 'Door' # Identifier of Single-Door Entities.
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files.
ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation
OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation
CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
# ACCESS_DOOR = 'access' # Identifier to compare access positions
class Actions(BaseActions):
USE_DOOR = 'use_door'
class RewardsDoor(NamedTuple):
USE_DOOR_VALID: float = -0.00
USE_DOOR_FAIL: float = -0.01
class DoorProperties(NamedTuple):
indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation.

View File

@ -0,0 +1,196 @@
import time
from typing import List, Union, Dict
import random
import numpy as np
from environments.factory.additional.doors.doors_collections import Doors
from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action
from environments.factory.base.registers import Entities
from environments import helpers as h
from environments.factory.base.renderer import RenderEntity
from environments.utility_classes import ObservationProperties
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum()
def entropy(x):
return -(x * np.log(x + 1e-8)).sum()
c = Constants
a = Actions
# noinspection PyAttributeOutsideInit, PyAbstractClass
class DoorFactory(BaseFactory):
@property
def actions_hook(self) -> Union[Action, List[Action]]:
super_actions = super().actions_hook
super_actions.append(Action(str_ident=a.USE_DOOR))
return super_actions
@property
def entities_hook(self) -> Dict[(str, Entities)]:
super_entities = super().entities_hook
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL)
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
if np.any(parsed_doors):
door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area,
entity_kwargs=dict()
)
super_entities.update(({c.DOORS: doors}))
return super_entities
def __init__(self, *args,
door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(),
env_seed=time.time_ns(), **kwargs):
if isinstance(door_properties, dict):
door_properties = DoorProperties(**door_properties)
if isinstance(rewards_door, dict):
rewards_door = RewardsDoor(**rewards_door)
self.door_properties = door_properties
self.rewards_door = rewards_door
self._door_rng = np.random.default_rng(env_seed)
self._doors: Doors
kwargs.update(env_seed=env_seed)
# TODO: Reset ---> document this
super().__init__(*args, **kwargs)
def render_assets_hook(self, mode='human'):
additional_assets = super().render_assets_hook()
doors = []
for i, door in enumerate(self[c.DOORS]):
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
additional_assets.extend(doors)
return additional_assets
def step_hook(self) -> (List[dict], dict):
super_reward_info = super().step_hook()
# Step the door close intervall
# TODO: Maybe move this to self.post_step_hook? May collide with reward calculation.
if doors := self[c.DOORS]:
doors.tick_doors()
return super_reward_info
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
action_result = super().do_additional_actions(agent, action)
if action_result is None:
if action == a.USE_DOOR:
return self.use_door_action(agent)
else:
return None
else:
return action_result
def use_door_action(self, agent: Agent):
# Check if agent really is standing on a door:
door = self[c.DOORS].get_near_position(agent.pos)
if door is not None:
door.use()
valid = c.VALID
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
# When he doesn't...
else:
valid = c.NOT_VALID
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL,
reason=a.USE_DOOR, info=info_dict)
return valid, reward
def reset_hook(self) -> None:
super().reset_hook()
# There is nothing to reset.
def check_additional_done(self) -> (bool, dict):
super_done, super_dict = super().check_additional_done()
return super_done, super_dict
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
additional_observations = super().observations_hook()
additional_observations.update({c.DOORS: self[c.DOORS].as_array()})
return additional_observations
def post_step_hook(self) -> List[Dict[str, int]]:
super_post_step = super(DoorFactory, self).post_step_hook()
return super_post_step
if __name__ == '__main__':
from environments.utility_classes import AgentRenderOptions as aro
render = True
door_props = DoorProperties(
indicate_door_area=True
)
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True
)
move_props = {'allow_square_movement': True,
'allow_diagonal_movement': False,
'allow_no_op': False}
import time
global_timings = []
for i in range(10):
factory = DoorFactory(n_agents=10, done_at_collision=False,
level_name='rooms', max_steps=1000,
obs_prop=obs_props, parse_doors=True,
verbose=True,
mv_prop=move_props, dirt_prop=door_props,
# inject_agents=[TSPDirtAgent],
)
# noinspection DuplicatedCode
n_actions = factory.action_space.n - 1
_ = factory.observation_space
obs_space = factory.observation_space
obs_space_named = factory.named_observation_space
action_space_named = factory.named_action_space
times = []
for epoch in range(10):
start_time = time.time()
random_actions = [[random.randint(0, n_actions) for _
in range(factory.n_agents)] for _
in range(factory.max_steps+1)]
env_state = factory.reset()
if render:
factory.render()
# tsp_agent = factory.get_injected_agents()[0]
rwrd = 0
for agent_i_action in random_actions:
# agent_i_action = tsp_agent.predict()
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
rwrd += step_rwrd
if render:
factory.render()
if done_bool:
break
times.append(time.time() - start_time)
# print(f'Factory run {epoch} done, reward is:\n {r}')
print('Mean Time Taken: ', sum(times) / 10)
global_timings.extend(times)
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
print('Median Time Taken: ', global_timings[len(global_timings)//2])

View File

@ -16,7 +16,7 @@ from environments.helpers import Constants as c
from environments.helpers import EnvActions as a from environments.helpers import EnvActions as a
from environments.helpers import RewardsBase from environments.helpers import RewardsBase
from environments.factory.base.objects import Agent, Floor, Action from environments.factory.base.objects import Agent, Floor, Action
from environments.factory.base.registers import Actions, Entities, Agents, Doors, Floors, Walls, PlaceHolders, \ from environments.factory.base.registers import Actions, Entities, Agents, Floors, Walls, PlaceHolders, \
GlobalPositions GlobalPositions
from environments.utility_classes import MovementProperties, ObservationProperties, MarlFrameStack from environments.utility_classes import MovementProperties, ObservationProperties, MarlFrameStack
from environments.utility_classes import AgentRenderOptions as a_obs from environments.utility_classes import AgentRenderOptions as a_obs
@ -88,8 +88,8 @@ class BaseFactory(gym.Env):
mv_prop: MovementProperties = MovementProperties(), mv_prop: MovementProperties = MovementProperties(),
obs_prop: ObservationProperties = ObservationProperties(), obs_prop: ObservationProperties = ObservationProperties(),
rewards_base: RewardsBase = RewardsBase(), rewards_base: RewardsBase = RewardsBase(),
parse_doors=False, done_at_collision=False, inject_agents: Union[None, List] = None, done_at_collision=False, inject_agents: Union[None, List] = None,
verbose=False, doors_have_area=True, env_seed=time.time_ns(), individual_rewards=False, verbose=False, env_seed=time.time_ns(), individual_rewards=False,
class_name='', **kwargs): class_name='', **kwargs):
if class_name: if class_name:
@ -105,8 +105,6 @@ class BaseFactory(gym.Env):
assert obs_prop.frames_to_stack != 1 and \ assert obs_prop.frames_to_stack != 1 and \
obs_prop.frames_to_stack >= 0, \ obs_prop.frames_to_stack >= 0, \
"'frames_to_stack' cannot be negative or 1." "'frames_to_stack' cannot be negative or 1."
assert doors_have_area or not obs_prop.indicate_door_area, \
'"indicate_door_area" can only active, when "doors_have_area"'
if kwargs: if kwargs:
print(f'Following kwargs were passed, but ignored: {kwargs}') print(f'Following kwargs were passed, but ignored: {kwargs}')
@ -133,9 +131,7 @@ class BaseFactory(gym.Env):
self.done_at_collision = done_at_collision self.done_at_collision = done_at_collision
self._record_episodes = False self._record_episodes = False
self.parse_doors = parse_doors
self._injected_agents = inject_agents or [] self._injected_agents = inject_agents or []
self.doors_have_area = doors_have_area
self.individual_rewards = individual_rewards self.individual_rewards = individual_rewards
# TODO: Reset ---> document this # TODO: Reset ---> document this
@ -174,20 +170,9 @@ class BaseFactory(gym.Env):
# NOPOS # NOPOS
self._NO_POS_TILE = Floor(c.NO_POS, None) self._NO_POS_TILE = Floor(c.NO_POS, None)
# Doors
if self.parse_doors:
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR)
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
if np.any(parsed_doors):
door_tiles = [floor.by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
doors = Doors.from_tiles(door_tiles, self._level_shape, have_area=self.obs_prop.indicate_door_area,
entity_kwargs=dict(context=floor)
)
self._entities.add_additional_items({c.DOORS: doors})
# Actions # Actions
# TODO: Move this to Agent init, so that agents can have individual action sets. # TODO: Move this to Agent init, so that agents can have individual action sets.
self._actions = Actions(self.mv_prop, can_use_doors=self.parse_doors) self._actions = Actions(self.mv_prop)
if additional_actions := self.actions_hook: if additional_actions := self.actions_hook:
self._actions.add_additional_items(additional_actions) self._actions.add_additional_items(additional_actions)
@ -263,8 +248,6 @@ class BaseFactory(gym.Env):
elif a.NOOP == action_obj: elif a.NOOP == action_obj:
action_valid = c.VALID action_valid = c.VALID
reward = dict(value=self.rewards_base.NOOP, reason=a.NOOP, info={f'{agent.name}_NOOP': 1, 'NOOP': 1}) reward = dict(value=self.rewards_base.NOOP, reason=a.NOOP, info={f'{agent.name}_NOOP': 1, 'NOOP': 1})
elif a.USE_DOOR == action_obj:
action_valid, reward = self._handle_door_interaction(agent)
else: else:
# noinspection PyTupleAssignmentBalance # noinspection PyTupleAssignmentBalance
action_valid, reward = self.do_additional_actions(agent, action_obj) action_valid, reward = self.do_additional_actions(agent, action_obj)
@ -282,12 +265,9 @@ class BaseFactory(gym.Env):
for tile in tiles_with_collisions: for tile in tiles_with_collisions:
guests = tile.guests_that_can_collide guests = tile.guests_that_can_collide
for i, guest in enumerate(guests): for i, guest in enumerate(guests):
# This does make a copy, but is faster than.copy() for j, collision in enumerate(guests):
this_collisions = guests[:] if j != i and hasattr(guest, 'step_result'):
del this_collisions[i] guest.step_result['collisions'].append(collision)
assert hasattr(guest, 'step_result')
for collision in this_collisions:
guest.step_result['collisions'].append(collision)
done = False done = False
if self.done_at_collision: if self.done_at_collision:
@ -299,11 +279,6 @@ class BaseFactory(gym.Env):
done = done or additional_done done = done or additional_done
info.update(additional_done_info) info.update(additional_done_info)
# Step the door close intervall
if self.parse_doors:
if doors := self[c.DOORS]:
doors.tick_doors()
# Finalize # Finalize
reward, reward_info = self.build_reward_result(rewards) reward, reward_info = self.build_reward_result(rewards)
@ -319,41 +294,14 @@ class BaseFactory(gym.Env):
info.update(post_step_info) info.update(post_step_info)
obs, _ = self._build_observations() obs, _ = self._build_observations()
return obs, reward, done, info return obs, reward, done, info
def _handle_door_interaction(self, agent) -> (bool, dict):
if doors := self[c.DOORS]:
# Check if agent really is standing on a door:
if self.doors_have_area:
door = doors.get_near_position(agent.pos)
else:
door = doors.by_pos(agent.pos)
if door is not None:
door.use()
valid = c.VALID
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
# When he doesn't...
else:
valid = c.NOT_VALID
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
else:
raise RuntimeError('This should not happen, since the door action should not be available.')
reward = dict(value=self.rewards_base.USE_DOOR_VALID if valid else self.rewards_base.USE_DOOR_FAIL,
reason=a.USE_DOOR, info=info_dict)
return valid, reward
def _build_observations(self) -> np.typing.ArrayLike: def _build_observations(self) -> np.typing.ArrayLike:
# Observation dict: # Observation dict:
per_agent_expl_idx = dict() per_agent_expl_idx = dict()
per_agent_obsn = dict() per_agent_obsn = dict()
# Generel Observations # Generel Observations
lvl_obs = self[c.WALLS].as_array() lvl_obs = self[c.WALLS].as_array()
door_obs = self[c.DOORS].as_array() if self.parse_doors else None
if self.obs_prop.render_agents == a_obs.NOT: if self.obs_prop.render_agents == a_obs.NOT:
global_agent_obs = None global_agent_obs = None
elif self.obs_prop.omit_agent_self and self.n_agents == 1: elif self.obs_prop.omit_agent_self and self.n_agents == 1:
@ -391,8 +339,6 @@ class BaseFactory(gym.Env):
obs_dict[c.AGENT] = agent_obs[:] obs_dict[c.AGENT] = agent_obs[:]
if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None: if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None:
obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs
if self.parse_doors and door_obs is not None:
obs_dict[c.DOORS] = door_obs[:]
obs_dict.update(add_obs_dict) obs_dict.update(add_obs_dict)
obsn = np.vstack(list(obs_dict.values())) obsn = np.vstack(list(obs_dict.values()))
if self.obs_prop.pomdp_r: if self.obs_prop.pomdp_r:
@ -430,33 +376,11 @@ class BaseFactory(gym.Env):
raise e raise e
obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL
door_shadowing = False
if self.parse_doors:
if doors := self[c.DOORS]:
if door := doors.by_pos(agent.pos):
if door.is_closed:
for group in door.connectivity_subgroups:
if agent.last_pos not in group:
door_shadowing = True
if self._pomdp_r:
blocking = [
tuple(np.subtract(x, agent.pos) + (self._pomdp_r, self._pomdp_r))
for x in group]
xs, ys = zip(*blocking)
else:
xs, ys = zip(*group)
# noinspection PyUnresolvedReferences
obs_block_light[:, xs, ys] = False
light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int).squeeze()) light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int).squeeze())
if self._pomdp_r: if self._pomdp_r:
light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape)) light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape))
else: else:
light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape)) light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
if door_shadowing:
# noinspection PyUnboundLocalVariable
light_block_map[xs, ys] = 0
agent.step_result['lightmap'] = light_block_map agent.step_result['lightmap'] = light_block_map
@ -550,35 +474,13 @@ class BaseFactory(gym.Env):
y_new = agent.y + y_diff y_new = agent.y + y_diff
new_tile = self[c.FLOOR].by_pos((x_new, y_new)) new_tile = self[c.FLOOR].by_pos((x_new, y_new))
if new_tile: if new_tile and not np.any([x.is_blocking for x in new_tile.guests]):
valid = c.VALID valid = c.VALID
else: else:
tile = agent.tile tile = agent.tile
valid = c.VALID valid = c.VALID
return tile, valid return tile, valid
if self.parse_doors and agent.last_pos != c.NO_POS:
if doors := self[c.DOORS]:
if self.doors_have_area:
if door := doors.by_pos(new_tile.pos):
if door.is_closed:
return agent.tile, c.NOT_VALID
else: # door.is_closed:
pass
if door := doors.by_pos(agent.pos):
if door.is_open:
pass
else: # door.is_closed:
if door.is_linked(agent.last_pos, new_tile.pos):
pass
else:
return agent.tile, c.NOT_VALID
else:
pass
else:
pass
return new_tile, valid return new_tile, valid
def build_reward_result(self, global_env_rewards: list) -> (int, dict): def build_reward_result(self, global_env_rewards: list) -> (int, dict):
@ -649,14 +551,10 @@ class BaseFactory(gym.Env):
for i, agent in enumerate(self[c.AGENT]): for i, agent in enumerate(self[c.AGENT]):
name, state = h.asset_str(agent) name, state = h.asset_str(agent)
agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.step_result['lightmap'])) agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.step_result['lightmap']))
doors = []
if self.parse_doors:
for i, door in enumerate(self[c.DOORS]):
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
additional_assets = self.render_assets_hook() additional_assets = self.render_assets_hook()
return self._renderer.render(walls + doors + additional_assets + agents) return self._renderer.render(walls + additional_assets + agents)
def save_params(self, filepath: Path): def save_params(self, filepath: Path):
# noinspection PyProtectedMember # noinspection PyProtectedMember

View File

@ -1,12 +1,10 @@
from collections import defaultdict from collections import defaultdict
from typing import Union from typing import Union, List
import networkx as nx
import numpy as np import numpy as np
from environments import helpers as h from environments import helpers as h
from environments.helpers import Constants as c from environments.helpers import Constants as c
import itertools
########################################################################## ##########################################################################
# ##################### Base Object Building Blocks ######################### # # ##################### Base Object Building Blocks ######################### #
@ -88,6 +86,10 @@ class EnvObject(Object):
class Entity(EnvObject): class Entity(EnvObject):
"""Full Env Entity that lives on the env Grid. Doors, Items, DirtPile etc...""" """Full Env Entity that lives on the env Grid. Doors, Items, DirtPile etc..."""
@property
def is_blocking(self):
return False
@property @property
def can_collide(self): def can_collide(self):
return False return False
@ -226,6 +228,21 @@ class GlobalPosition(BoundingMixin, EnvObject):
class Floor(EnvObject): class Floor(EnvObject):
@property
def neighboring_floor_pos(self):
return [x.pos for x in self.neighboring_floor]
@property
def neighboring_floor(self):
if self._neighboring_floor:
pass
else:
self._neighboring_floor = [x for x in [self._collection.by_pos(np.add(self.pos, pos))
for pos in h.POS_MASK.reshape(-1, 2)
if not np.all(pos == [0, 0])]
if x]
return self._neighboring_floor
@property @property
def encoding(self): def encoding(self):
return c.FREE_CELL return c.FREE_CELL
@ -254,6 +271,7 @@ class Floor(EnvObject):
super(Floor, self).__init__(*args, **kwargs) super(Floor, self).__init__(*args, **kwargs)
self._guests = dict() self._guests = dict()
self._pos = tuple(pos) self._pos = tuple(pos)
self._neighboring_floor: List[Floor] = list()
def __len__(self): def __len__(self):
return len(self._guests) return len(self._guests)
@ -298,94 +316,6 @@ class Wall(Floor):
pass pass
class Door(Entity):
@property
def can_collide(self):
if self.has_area:
return False if self.is_open else True
else:
return False
@property
def encoding(self):
# This is important as it shadow is checked by occupation value
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
@property
def str_state(self):
return 'open' if self.is_open else 'closed'
@property
def access_area(self):
return [node for node in self.connectivity.nodes
if node not in range(len(self.connectivity_subgroups)) and node != self.pos]
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False, **kwargs):
super(Door, self).__init__(*args, **kwargs)
self._state = c.CLOSED_DOOR
self.has_area = has_area
self.auto_close_interval = auto_close_interval
self.time_to_close = -1
neighbor_pos = list(itertools.product([-1, 1, 0], repeat=2))[:-1]
neighbor_tiles = [context.by_pos(tuple([sum(x) for x in zip(self.pos, diff)])) for diff in neighbor_pos]
neighbor_pos = [x.pos for x in neighbor_tiles if x]
self.connectivity = h.points_to_graph(neighbor_pos)
self.connectivity_subgroups = list(nx.algorithms.components.connected_components(self.connectivity))
for idx, group in enumerate(self.connectivity_subgroups):
for tile_pos in group:
self.connectivity.add_edge(tile_pos, idx)
if not closed_on_init:
self._open()
def summarize_state(self):
state_dict = super().summarize_state()
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
return state_dict
@property
def is_closed(self):
return self._state == c.CLOSED_DOOR
@property
def is_open(self):
return self._state == c.OPEN_DOOR
@property
def status(self):
return self._state
def use(self):
if self._state == c.OPEN_DOOR:
self._close()
else:
self._open()
def tick(self):
if self.is_open and len(self.tile) == 1 and self.time_to_close:
self.time_to_close -= 1
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
self.use()
def _open(self):
self.connectivity.add_edges_from([(self.pos, x) for x in range(len(self.connectivity_subgroups))])
self._state = c.OPEN_DOOR
self._collection.notify_change_to_value(self)
self.time_to_close = self.auto_close_interval
def _close(self):
self.connectivity.remove_node(self.pos)
self._state = c.CLOSED_DOOR
self._collection.notify_change_to_value(self)
def is_linked(self, old_pos, new_pos):
try:
_ = nx.shortest_path(self.connectivity, old_pos, new_pos)
return True
except nx.exception.NetworkXNoPath:
return False
class Agent(MoveableEntity): class Agent(MoveableEntity):
@property @property

View File

@ -6,7 +6,7 @@ from typing import List, Union, Dict, Tuple
import numpy as np import numpy as np
import six import six
from environments.factory.base.objects import Entity, Floor, Agent, Door, Action, Wall, PlaceHolder, GlobalPosition, \ from environments.factory.base.objects import Entity, Floor, Agent, Action, Wall, PlaceHolder, GlobalPosition, \
Object, EnvObject Object, EnvObject
from environments.utility_classes import MovementProperties from environments.utility_classes import MovementProperties
from environments import helpers as h from environments import helpers as h
@ -452,38 +452,6 @@ class Agents(MovingEntityObjectCollection):
self._collection[agent.name] = agent self._collection[agent.name] = agent
class Doors(EntityCollection):
def __init__(self, *args, have_area: bool = False, **kwargs):
self.have_area = have_area
self._area_marked = False
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
_accepted_objects = Door
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
try:
return next(door for door in self if position in door.access_area)
except StopIteration:
return None
def tick_doors(self):
for door in self:
door.tick()
def as_array(self):
if self.have_area and not self._area_marked:
for door in self:
for pos in door.access_area:
if self._individual_slices:
pass
else:
pos = (0, *pos)
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
self._area_marked = True
return super(Doors, self).as_array()
class Actions(ObjectCollection): class Actions(ObjectCollection):
_accepted_objects = Action _accepted_objects = Action
@ -492,11 +460,10 @@ class Actions(ObjectCollection):
return self._movement_actions return self._movement_actions
# noinspection PyTypeChecker # noinspection PyTypeChecker
def __init__(self, movement_properties: MovementProperties, can_use_doors=False): def __init__(self, movement_properties: MovementProperties):
self.allow_no_op = movement_properties.allow_no_op self.allow_no_op = movement_properties.allow_no_op
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
self.allow_square_movement = movement_properties.allow_square_movement self.allow_square_movement = movement_properties.allow_square_movement
self.can_use_doors = can_use_doors
super(Actions, self).__init__() super(Actions, self).__init__()
# Move this to Baseclass, Env init? # Move this to Baseclass, Env init?
@ -507,8 +474,6 @@ class Actions(ObjectCollection):
self.add_additional_items([self._accepted_objects(str_ident=direction) self.add_additional_items([self._accepted_objects(str_ident=direction)
for direction in h.EnvActions.diagonal_move()]) for direction in h.EnvActions.diagonal_move()])
self._movement_actions = self._collection.copy() self._movement_actions = self._collection.copy()
if self.can_use_doors:
self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.USE_DOOR)])
if self.allow_no_op: if self.allow_no_op:
self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.NOOP)]) self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.NOOP)])

View File

@ -33,6 +33,10 @@ IGNORED_DF_COLUMNS = ['Episode', 'Run', # For plotting, which values ar
'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count', 'terminal_observation', 'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count', 'terminal_observation',
'episode'] 'episode']
POS_MASK = np.asarray([[[-1, -1], [0, -1], [1, -1]],
[[-1, 0], [0, 0], [1, 0]],
[[-1, 1], [0, 1], [1, 1]]])
class Constants: class Constants:
@ -42,12 +46,10 @@ class Constants:
""" """
WALL = '#' # Wall tile identifier for resolving the string based map files. WALL = '#' # Wall tile identifier for resolving the string based map files.
DOOR = 'D' # Door identifier for resolving the string based map files.
DANGER_ZONE = 'x' # Dange Zone tile identifier for resolving the string based map files. DANGER_ZONE = 'x' # Dange Zone tile identifier for resolving the string based map files.
WALLS = 'Walls' # Identifier of Wall-objects and sets (collections). WALLS = 'Walls' # Identifier of Wall-objects and sets (collections).
FLOOR = 'Floor' # Identifier of Floor-objects and sets (collections). FLOOR = 'Floor' # Identifier of Floor-objects and sets (collections).
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
LEVEL = 'Level' # Identifier of Level-objects and sets (collections). LEVEL = 'Level' # Identifier of Level-objects and sets (collections).
AGENT = 'Agent' # Identifier of Agent-objects and sets (collections). AGENT = 'Agent' # Identifier of Agent-objects and sets (collections).
AGENT_PLACEHOLDER = 'AGENT_PLACEHOLDER' # Identifier of Placeholder-objects and sets (collections). AGENT_PLACEHOLDER = 'AGENT_PLACEHOLDER' # Identifier of Placeholder-objects and sets (collections).
@ -56,16 +58,9 @@ class Constants:
FREE_CELL = 0 # Free-Cell value used in observation FREE_CELL = 0 # Free-Cell value used in observation
OCCUPIED_CELL = 1 # Occupied-Cell value used in observation OCCUPIED_CELL = 1 # Occupied-Cell value used in observation
SHADOWED_CELL = -1 # Shadowed-Cell value used in observation SHADOWED_CELL = -1 # Shadowed-Cell value used in observation
ACCESS_DOOR_CELL = 1/3 # Access-door-Cell value used in observation
OPEN_DOOR_CELL = 2/3 # Open-door-Cell value used in observation
CLOSED_DOOR_CELL = 3/3 # Closed-door-Cell value used in observation
NO_POS = (-9999, -9999) # Invalid Position value used in the environment (something is off-grid) NO_POS = (-9999, -9999) # Invalid Position value used in the environment (something is off-grid)
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
# ACCESS_DOOR = 'access' # Identifier to compare access positions
ACTION = 'action' # Identifier of Action-objects and sets (collections). ACTION = 'action' # Identifier of Action-objects and sets (collections).
COLLISION = 'collision' # Identifier to use in the context of collitions. COLLISION = 'collision' # Identifier to use in the context of collitions.
VALID = True # Identifier to rename boolean values in the context of actions. VALID = True # Identifier to rename boolean values in the context of actions.
@ -90,7 +85,6 @@ class EnvActions:
# Other # Other
# MOVE = 'move' # MOVE = 'move'
NOOP = 'no_op' NOOP = 'no_op'
USE_DOOR = 'use_door'
_ACTIONMAP = defaultdict(lambda: (0, 0), _ACTIONMAP = defaultdict(lambda: (0, 0),
{NORTH: (-1, 0), NORTHEAST: (-1, 1), {NORTH: (-1, 0), NORTHEAST: (-1, 1),
@ -100,6 +94,8 @@ class EnvActions:
} }
) )
@classmethod @classmethod
def is_move(cls, action): def is_move(cls, action):
""" """
@ -166,8 +162,6 @@ class RewardsBase(NamedTuple):
MOVEMENTS_VALID: float = -0.001 MOVEMENTS_VALID: float = -0.001
MOVEMENTS_FAIL: float = -0.05 MOVEMENTS_FAIL: float = -0.05
NOOP: float = -0.01 NOOP: float = -0.01
USE_DOOR_VALID: float = -0.00
USE_DOOR_FAIL: float = -0.01
COLLISION: float = -0.5 COLLISION: float = -0.5

View File

@ -68,7 +68,7 @@ if __name__ == '__main__':
omit_agent_self=True, # This is default omit_agent_self=True, # This is default
additional_agent_placeholder=None, # We will not take care of future agents additional_agent_placeholder=None, # We will not take care of future agents
frames_to_stack=3, # To give the agent a notion of time frames_to_stack=3, # To give the agent a notion of time
pomdp_r=2 # the agents view-radius pomdp_r=2 # the agents' view-radius
) )
# 'MovementProperties' are for specifying how the agent is allowed to move in the env. # 'MovementProperties' are for specifying how the agent is allowed to move in the env.
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices) move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
@ -135,7 +135,7 @@ if __name__ == '__main__':
env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10)) env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10))
# Model Init # Model Init
model = model_class("MlpPolicy", env_factory,verbose=1, seed=seed, device='cpu') model = model_class("MlpPolicy", env_factory, verbose=1, seed=seed, device='cpu')
# Model train # Model train
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback]) model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
@ -166,7 +166,7 @@ if __name__ == '__main__':
# retrieve model class # retrieve model class
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name) model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
# Load the agent agent # Load the agent
model = model_cls.load(policy_path / 'model.zip', device='cpu') model = model_cls.load(policy_path / 'model.zip', device='cpu')
# Load old env kwargs # Load old env kwargs
with next(policy_path.glob(env_params_json)).open('r') as f: with next(policy_path.glob(env_params_json)).open('r') as f: