Items and combination of item and dirt
This commit is contained in:
BIN
environments/factory/assets/drop_off.png
Normal file
BIN
environments/factory/assets/drop_off.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.3 KiB |
BIN
environments/factory/assets/item.png
Normal file
BIN
environments/factory/assets/item.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.0 KiB |
@ -1,3 +1,4 @@
|
||||
import abc
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import List, Union, Iterable
|
||||
@ -10,6 +11,7 @@ import yaml
|
||||
from gym.wrappers import FrameStack
|
||||
|
||||
from environments.factory.base.shadow_casting import Map
|
||||
from environments.factory.renderer import Renderer, RenderEntity
|
||||
from environments.helpers import Constants as c, Constants
|
||||
from environments import helpers as h
|
||||
from environments.factory.base.objects import Slice, Agent, Tile, Action
|
||||
@ -28,20 +30,7 @@ class BaseFactory(gym.Env):
|
||||
|
||||
@property
|
||||
def observation_space(self):
|
||||
if self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs:
|
||||
if self.n_agents > 1:
|
||||
slices = self._slices.n - (self._agents.n - 1)
|
||||
else:
|
||||
slices = self._slices.n - 1
|
||||
elif self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs:
|
||||
slices = self._slices.n - (self._agents.n - 1)
|
||||
elif not self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs:
|
||||
slices = self._slices.n - self._agents.n
|
||||
elif not self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs:
|
||||
slices = self._slices.n
|
||||
else:
|
||||
raise RuntimeError('This should not happen!')
|
||||
|
||||
slices = self._slices.n_observable_slices
|
||||
level_shape = (self.pomdp_r * 2 + 1, self.pomdp_r * 2 + 1) if self.pomdp_r else self._level_shape
|
||||
space = spaces.Box(low=0, high=1, shape=(slices, *level_shape), dtype=np.float32)
|
||||
return space
|
||||
@ -54,36 +43,6 @@ class BaseFactory(gym.Env):
|
||||
def movement_actions(self):
|
||||
return self._actions.movement_actions
|
||||
|
||||
@property
|
||||
def additional_actions(self) -> Union[str, List[str]]:
|
||||
"""
|
||||
When heriting from this Base Class, you musst implement this methode!!!
|
||||
|
||||
:return: A list of Actions-object holding all additional actions.
|
||||
:rtype: List[Action]
|
||||
"""
|
||||
raise NotImplementedError('Please register additional actions ')
|
||||
|
||||
@property
|
||||
def additional_entities(self) -> Union[Entities, List[Entities]]:
|
||||
"""
|
||||
When heriting from this Base Class, you musst implement this methode!!!
|
||||
|
||||
:return: A single Entites collection or a list of such.
|
||||
:rtype: Union[Entities, List[Entities]]
|
||||
"""
|
||||
raise NotImplementedError('Please register additional entities.')
|
||||
|
||||
@property
|
||||
def additional_slices(self) -> Union[Slice, List[Slice]]:
|
||||
"""
|
||||
When heriting from this Base Class, you musst implement this methode!!!
|
||||
|
||||
:return: A list of Slice-objects.
|
||||
:rtype: List[Slice]
|
||||
"""
|
||||
raise NotImplementedError('Please register additional slices.')
|
||||
|
||||
def __enter__(self):
|
||||
return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
|
||||
|
||||
@ -94,17 +53,20 @@ class BaseFactory(gym.Env):
|
||||
movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
|
||||
combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False,
|
||||
omit_agent_slice_in_obs=False, done_at_collision=False, cast_shadows=True,
|
||||
verbose=False, doors_have_area=True, **kwargs):
|
||||
verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs):
|
||||
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
|
||||
|
||||
# Attribute Assignment
|
||||
self._base_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns()))
|
||||
self.env_seed = env_seed
|
||||
self._base_rng = np.random.default_rng(self.env_seed)
|
||||
self.movement_properties = movement_properties
|
||||
self.level_name = level_name
|
||||
self._level_shape = None
|
||||
self.verbose = verbose
|
||||
self._renderer = None # expensive - don't use it when not required !
|
||||
|
||||
self.n_agents = n_agents
|
||||
|
||||
self.max_steps = max_steps
|
||||
self.pomdp_r = pomdp_r
|
||||
self.combin_agent_slices_in_obs = combin_agent_slices_in_obs
|
||||
@ -132,25 +94,37 @@ class BaseFactory(gym.Env):
|
||||
# Level
|
||||
level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt'
|
||||
parsed_level = h.parse_level(level_filepath)
|
||||
level = [Slice(c.LEVEL.name, h.one_hot_level(parsed_level), is_blocking_light=True)]
|
||||
level = [Slice(c.LEVEL, h.one_hot_level(parsed_level), is_blocking_light=True)]
|
||||
self._level_shape = level[0].shape
|
||||
|
||||
# Doors
|
||||
parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
|
||||
if parsed_doors.any():
|
||||
doors = [Slice(c.DOORS.name, parsed_doors, is_blocking_light=True)]
|
||||
doors = [Slice(c.DOORS, parsed_doors, is_blocking_light=True)]
|
||||
else:
|
||||
doors = []
|
||||
|
||||
# Agents
|
||||
agents = []
|
||||
for i in range(self.n_agents):
|
||||
agents.append(Slice(f'{c.AGENT.name}#{i}', np.zeros_like(level[0].slice, dtype=np.float32)))
|
||||
state_slices.register_additional_items(level+doors+agents)
|
||||
agent_names = [f'{c.AGENT.value}#{i}' for i in range(self.n_agents)]
|
||||
|
||||
# Additional Slices from SubDomains
|
||||
if additional_slices := self.additional_slices:
|
||||
state_slices.register_additional_items(additional_slices)
|
||||
if self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs:
|
||||
if self.n_agents == 1:
|
||||
observables = [False]
|
||||
else:
|
||||
observables = [True] + ([False] * (self.n_agents - 1))
|
||||
elif self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs:
|
||||
observables = [True] + ([False] * (self.n_agents - 1))
|
||||
elif not self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs:
|
||||
observables = [False] + ([True] * (self.n_agents - 1))
|
||||
elif not self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs:
|
||||
observables = [True] * self.n_agents
|
||||
else:
|
||||
raise RuntimeError('This should not happen!')
|
||||
|
||||
for observable, agent_name in zip(observables, agent_names):
|
||||
agents.append(Slice(agent_name, np.zeros_like(level[0].slice, dtype=np.float32), is_observable=observable))
|
||||
state_slices.register_additional_items(level+doors+agents+self.additional_slices)
|
||||
return state_slices
|
||||
|
||||
def _init_obs_cube(self) -> np.ndarray:
|
||||
@ -198,18 +172,6 @@ class BaseFactory(gym.Env):
|
||||
obs = self._get_observations()
|
||||
return obs
|
||||
|
||||
def pre_step(self) -> None:
|
||||
pass
|
||||
|
||||
def do_additional_reset(self) -> None:
|
||||
pass
|
||||
|
||||
def do_additional_step(self) -> dict:
|
||||
return {}
|
||||
|
||||
def post_step(self) -> dict:
|
||||
return {}
|
||||
|
||||
def step(self, actions):
|
||||
actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions
|
||||
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
|
||||
@ -217,31 +179,22 @@ class BaseFactory(gym.Env):
|
||||
done = False
|
||||
|
||||
# Pre step Hook for later use
|
||||
self.pre_step()
|
||||
self.hook_pre_step()
|
||||
|
||||
# Move this in a seperate function?
|
||||
for action, agent in zip(actions, self._agents):
|
||||
agent.clear_temp_sate()
|
||||
action_name = self._actions[action]
|
||||
if self._actions.is_moving_action(action):
|
||||
valid = self._move_or_colide(agent, action_name)
|
||||
elif self._actions.is_no_op(action):
|
||||
action_obj = self._actions[action]
|
||||
if self._actions.is_moving_action(action_obj):
|
||||
valid = self._move_or_colide(agent, action_obj)
|
||||
elif self._actions.is_no_op(action_obj):
|
||||
valid = c.VALID.value
|
||||
elif self._actions.is_door_usage(action):
|
||||
# Check if agent really is standing on a door:
|
||||
if self.doors_have_area:
|
||||
door = self._doors.get_near_position(agent.pos)
|
||||
else:
|
||||
door = self._doors.by_pos(agent.pos)
|
||||
if door is not None:
|
||||
door.use()
|
||||
valid = c.VALID.value
|
||||
# When he doesn't...
|
||||
else:
|
||||
valid = c.NOT_VALID.value
|
||||
elif self._actions.is_door_usage(action_obj):
|
||||
valid = self._handle_door_interaction(agent)
|
||||
else:
|
||||
valid = self.do_additional_actions(agent, action)
|
||||
agent.temp_action = action
|
||||
valid = self.do_additional_actions(agent, action_obj)
|
||||
assert valid is not None, 'This should not happen, every Action musst be detected correctly!'
|
||||
agent.temp_action = action_obj
|
||||
agent.temp_valid = valid
|
||||
|
||||
# In-between step Hook for later use
|
||||
@ -275,12 +228,25 @@ class BaseFactory(gym.Env):
|
||||
info.update(self._summarize_state())
|
||||
|
||||
# Post step Hook for later use
|
||||
info.update(self.post_step())
|
||||
info.update(self.hook_post_step())
|
||||
|
||||
obs = self._get_observations()
|
||||
|
||||
return obs, reward, done, info
|
||||
|
||||
def _handle_door_interaction(self, agent):
|
||||
# Check if agent really is standing on a door:
|
||||
if self.doors_have_area:
|
||||
door = self._doors.get_near_position(agent.pos)
|
||||
else:
|
||||
door = self._doors.by_pos(agent.pos)
|
||||
if door is not None:
|
||||
door.use()
|
||||
return c.VALID.value
|
||||
# When he doesn't...
|
||||
else:
|
||||
return c.NOT_VALID.value
|
||||
|
||||
def _flush_state(self):
|
||||
self._obs_cube[np.arange(len(self._slices)) != self._slices.get_idx(c.LEVEL)] = c.FREE_CELL.value
|
||||
if self.parse_doors:
|
||||
@ -291,7 +257,7 @@ class BaseFactory(gym.Env):
|
||||
self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.CLOSED_DOOR.value
|
||||
for agent in self._agents:
|
||||
self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.pos] = c.OCCUPIED_CELL.value
|
||||
if agent.last_pos != h.NO_POS:
|
||||
if agent.last_pos != c.NO_POS:
|
||||
self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.last_pos] = c.FREE_CELL.value
|
||||
|
||||
def _get_observations(self) -> np.ndarray:
|
||||
@ -318,8 +284,8 @@ class BaseFactory(gym.Env):
|
||||
obs = self._obs_cube
|
||||
|
||||
if self.cast_shadows:
|
||||
obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx, slice
|
||||
in enumerate(self._slices) if slice.is_blocking_light]
|
||||
obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx, obs_slice
|
||||
in enumerate(self._slices) if obs_slice.is_blocking_light]
|
||||
door_shadowing = False
|
||||
if door := self._doors.by_pos(agent.pos):
|
||||
if door.is_closed:
|
||||
@ -332,6 +298,7 @@ class BaseFactory(gym.Env):
|
||||
xs, ys = zip(*blocking)
|
||||
else:
|
||||
xs, ys = zip(*group)
|
||||
# noinspection PyTypeChecker
|
||||
obs_block_light[self._slices.get_idx(c.LEVEL)][xs, ys] = False
|
||||
|
||||
light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int))
|
||||
@ -340,9 +307,14 @@ class BaseFactory(gym.Env):
|
||||
else:
|
||||
light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
|
||||
if door_shadowing:
|
||||
# noinspection PyUnboundLocalVariable
|
||||
light_block_map[xs, ys] = 0
|
||||
agent.temp_light_map = light_block_map
|
||||
obs = (obs * light_block_map) - ((1 - light_block_map) * obs[self._slices.get_idx(c.LEVEL)])
|
||||
for obs_idx in range(obs.shape[0]):
|
||||
if self._slices[obs_idx].can_be_shadowed:
|
||||
obs[obs_idx] = (obs[obs_idx] * light_block_map) - (
|
||||
(1 - light_block_map) * obs[self._slices.get_idx(c.LEVEL)]
|
||||
)
|
||||
|
||||
if self.combin_agent_slices_in_obs and self.n_agents > 1:
|
||||
agent_obs = np.sum(obs[[key for key, l_slice in self._slices.items() if c.AGENT.name in l_slice.name and
|
||||
@ -357,9 +329,6 @@ class BaseFactory(gym.Env):
|
||||
else:
|
||||
return obs
|
||||
|
||||
def do_additional_actions(self, agent: Agent, action: int) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
def get_all_tiles_with_collisions(self) -> List[Tile]:
|
||||
tiles_with_collisions = list()
|
||||
for tile in self._tiles:
|
||||
@ -392,7 +361,7 @@ class BaseFactory(gym.Env):
|
||||
valid = c.VALID
|
||||
return tile, valid
|
||||
|
||||
if self.parse_doors and agent.last_pos != h.NO_POS:
|
||||
if self.parse_doors and agent.last_pos != c.NO_POS:
|
||||
if door := self._doors.by_pos(new_tile.pos):
|
||||
if door.can_collide:
|
||||
return agent.tile, c.NOT_VALID
|
||||
@ -416,10 +385,63 @@ class BaseFactory(gym.Env):
|
||||
|
||||
def calculate_reward(self) -> (int, dict):
|
||||
# Returns: Reward, Info
|
||||
raise NotImplementedError
|
||||
info_dict = dict()
|
||||
reward = 0
|
||||
|
||||
for agent in self._agents:
|
||||
if self._actions.is_moving_action(agent.temp_action):
|
||||
if agent.temp_valid:
|
||||
# info_dict.update(movement=1)
|
||||
reward -= 0.00
|
||||
else:
|
||||
# self.print('collision')
|
||||
reward -= 0.01
|
||||
self.print(f'{agent.name} just hit the wall at {agent.pos}.')
|
||||
info_dict.update({f'{agent.name}_vs_LEVEL': 1})
|
||||
|
||||
elif self._actions.is_door_usage(agent.temp_action):
|
||||
if agent.temp_valid:
|
||||
self.print(f'{agent.name} did just use the door at {agent.pos}.')
|
||||
info_dict.update(door_used=1)
|
||||
else:
|
||||
reward -= 0.01
|
||||
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
|
||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||
info_dict.update({f'{agent.name}_failed_door_open': 1})
|
||||
elif self._actions.is_no_op(agent.temp_action):
|
||||
info_dict.update(no_op=1)
|
||||
reward -= 0.00
|
||||
|
||||
additional_reward, additional_info_dict = self.calculate_additional_reward(agent)
|
||||
reward += additional_reward
|
||||
info_dict.update(additional_info_dict)
|
||||
|
||||
for other_agent in agent.temp_collisions:
|
||||
info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1})
|
||||
|
||||
self.print(f"reward is {reward}")
|
||||
return reward, info_dict
|
||||
|
||||
def render(self, mode='human'):
|
||||
raise NotImplementedError
|
||||
if not self._renderer: # lazy init
|
||||
height, width = self._obs_cube.shape[1:]
|
||||
self._renderer = Renderer(width, height, view_radius=self.pomdp_r, fps=5)
|
||||
|
||||
walls = [RenderEntity('wall', pos)
|
||||
for pos in np.argwhere(self._slices.by_enum(c.LEVEL).slice == c.OCCUPIED_CELL.value)]
|
||||
|
||||
agents = []
|
||||
for i, agent in enumerate(self._agents):
|
||||
name, state = h.asset_str(agent)
|
||||
agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.temp_light_map))
|
||||
doors = []
|
||||
if self.parse_doors:
|
||||
for i, door in enumerate(self._doors):
|
||||
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
|
||||
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
|
||||
additional_assets = self.render_additional_assets()
|
||||
|
||||
self._renderer.render(walls + doors + additional_assets + agents)
|
||||
|
||||
def save_params(self, filepath: Path):
|
||||
# noinspection PyProtectedMember
|
||||
@ -440,3 +462,66 @@ class BaseFactory(gym.Env):
|
||||
def print(self, string):
|
||||
if self.verbose:
|
||||
print(string)
|
||||
|
||||
# Properties which are called by the base class to extend beyond attributes of the base class
|
||||
@property
|
||||
def additional_actions(self) -> Union[Action, List[Action]]:
|
||||
"""
|
||||
When heriting from this Base Class, you musst implement this methode!!!
|
||||
|
||||
:return: A list of Actions-object holding all additional actions.
|
||||
:rtype: List[Action]
|
||||
"""
|
||||
return []
|
||||
|
||||
@property
|
||||
def additional_entities(self) -> Union[Entities, List[Entities]]:
|
||||
"""
|
||||
When heriting from this Base Class, you musst implement this methode!!!
|
||||
|
||||
:return: A single Entites collection or a list of such.
|
||||
:rtype: Union[Entities, List[Entities]]
|
||||
"""
|
||||
return []
|
||||
|
||||
@property
|
||||
def additional_slices(self) -> Union[Slice, List[Slice]]:
|
||||
"""
|
||||
When heriting from this Base Class, you musst implement this methode!!!
|
||||
|
||||
:return: A list of Slice-objects.
|
||||
:rtype: List[Slice]
|
||||
"""
|
||||
return []
|
||||
|
||||
# Functions which provide additions to functions of the base class
|
||||
# Always call super!!!!!!
|
||||
@abc.abstractmethod
|
||||
def do_additional_reset(self) -> None:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def do_additional_step(self) -> dict:
|
||||
return {}
|
||||
|
||||
@abc.abstractmethod
|
||||
def do_additional_actions(self, agent: Agent, action: int) -> Union[None, bool]:
|
||||
return None
|
||||
|
||||
@abc.abstractmethod
|
||||
def calculate_additional_reward(self, agent: Agent) -> (int, dict):
|
||||
return 0, {}
|
||||
|
||||
@abc.abstractmethod
|
||||
def render_additional_assets(self):
|
||||
return []
|
||||
|
||||
# Hooks for in between operations.
|
||||
# Always call super!!!!!!
|
||||
@abc.abstractmethod
|
||||
def hook_pre_step(self) -> None:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def hook_post_step(self) -> dict:
|
||||
return {}
|
||||
|
@ -1,8 +1,5 @@
|
||||
import itertools
|
||||
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
from environments import helpers as h
|
||||
from environments.helpers import Constants as c
|
||||
import itertools
|
||||
|
||||
@ -16,35 +13,32 @@ class Object:
|
||||
def __bool__(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def i(self):
|
||||
return self._identifier
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self._identifier
|
||||
return self._name
|
||||
|
||||
def __init__(self, identifier, **kwargs):
|
||||
self._identifier = identifier
|
||||
def __init__(self, name, name_is_identifier=False, **kwargs):
|
||||
name = name.name if hasattr(name, 'name') else name
|
||||
self._name = f'{self.__class__.__name__}#{name}' if name_is_identifier else name
|
||||
if kwargs:
|
||||
print(f'Following kwargs were passed, but ignored: {kwargs}')
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.__class__.__name__}({self._identifier})'
|
||||
return f'{self.__class__.__name__}({self.name})'
|
||||
|
||||
|
||||
class Action(Object):
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.i
|
||||
|
||||
def __init__(self, *args):
|
||||
super(Action, self).__init__(*args)
|
||||
|
||||
|
||||
class Slice(Object):
|
||||
|
||||
@property
|
||||
def is_observable(self):
|
||||
return self._is_observable
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
return self.slice.shape
|
||||
@ -57,10 +51,16 @@ class Slice(Object):
|
||||
def free_tiles(self):
|
||||
return np.argwhere(self.slice == c.FREE_CELL.value)
|
||||
|
||||
def __init__(self, identifier, arrayslice, is_blocking_light=False):
|
||||
def __init__(self, identifier, arrayslice, is_blocking_light=False, can_be_shadowed=True, is_observable=True):
|
||||
super(Slice, self).__init__(identifier)
|
||||
self.slice = arrayslice
|
||||
self.is_blocking_light = is_blocking_light
|
||||
self.can_be_shadowed = can_be_shadowed
|
||||
self._is_observable = is_observable
|
||||
|
||||
def set_slice(self, new_slice: np.ndarray):
|
||||
assert self.slice.shape == new_slice.shape
|
||||
self.slice = new_slice
|
||||
|
||||
|
||||
class Wall(Object):
|
||||
@ -89,8 +89,8 @@ class Tile(Object):
|
||||
def pos(self):
|
||||
return self._pos
|
||||
|
||||
def __init__(self, i, pos):
|
||||
super(Tile, self).__init__(i)
|
||||
def __init__(self, i, pos, **kwargs):
|
||||
super(Tile, self).__init__(i, **kwargs)
|
||||
self._guests = dict()
|
||||
self._pos = tuple(pos)
|
||||
|
||||
@ -164,7 +164,7 @@ class MoveableEntity(Entity):
|
||||
if self._last_tile:
|
||||
return self._last_tile.pos
|
||||
else:
|
||||
return h.NO_POS
|
||||
return c.NO_POS
|
||||
|
||||
@property
|
||||
def direction_of_view(self):
|
||||
@ -206,8 +206,8 @@ class Door(Entity):
|
||||
return [node for node in self.connectivity.nodes
|
||||
if node not in range(len(self.connectivity_subgroups)) and node != self.pos]
|
||||
|
||||
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False):
|
||||
super(Door, self).__init__(*args)
|
||||
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False, **kwargs):
|
||||
super(Door, self).__init__(*args, **kwargs)
|
||||
self._state = c.CLOSED_DOOR
|
||||
self.has_area = has_area
|
||||
self.auto_close_interval = auto_close_interval
|
||||
@ -270,8 +270,8 @@ class Door(Entity):
|
||||
|
||||
class Agent(MoveableEntity):
|
||||
|
||||
def __init__(self, *args):
|
||||
super(Agent, self).__init__(*args)
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Agent, self).__init__(*args, **kwargs)
|
||||
self.clear_temp_sate()
|
||||
|
||||
# noinspection PyAttributeOutsideInit
|
||||
@ -280,5 +280,5 @@ class Agent(MoveableEntity):
|
||||
# if attr.startswith('temp'):
|
||||
self.temp_collisions = []
|
||||
self.temp_valid = None
|
||||
self.temp_action = -1
|
||||
self.temp_action = None
|
||||
self.temp_light_map = None
|
||||
|
@ -1,9 +1,7 @@
|
||||
import itertools
|
||||
import random
|
||||
from enum import Enum
|
||||
from typing import List, Union
|
||||
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
|
||||
from environments.factory.base.objects import Entity, Tile, Agent, Door, Slice, Action
|
||||
@ -16,11 +14,8 @@ class Register:
|
||||
_accepted_objects = Entity
|
||||
|
||||
@classmethod
|
||||
def from_argwhere_coordinates(cls, positions: (int, int), tiles):
|
||||
entities = [cls._accepted_objects(i, tiles.by_pos(position)) for i, position in enumerate(positions)]
|
||||
registered_obj = cls()
|
||||
registered_obj.register_additional_items(entities)
|
||||
return registered_obj
|
||||
def from_argwhere_coordinates(cls, positions: [(int, int)], tiles):
|
||||
return cls.from_tiles([tiles.by_pos(position) for position in positions])
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
@ -72,8 +67,8 @@ class Register:
|
||||
def by_name(self, item):
|
||||
return self[self._names[item]]
|
||||
|
||||
def by_enum(self, enum: Enum):
|
||||
return self[self._names[enum.name]]
|
||||
def by_enum(self, enum_obj: Enum):
|
||||
return self[self._names[enum_obj.name]]
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.__class__.__name__}({self._register})'
|
||||
@ -84,13 +79,13 @@ class Register:
|
||||
def get_idx_by_name(self, item):
|
||||
return self._names[item]
|
||||
|
||||
def get_idx(self, enum: Enum):
|
||||
return self._names[enum.name]
|
||||
def get_idx(self, enum_obj: Enum):
|
||||
return self._names[enum_obj.name]
|
||||
|
||||
@classmethod
|
||||
def from_tiles(cls, tiles, **kwargs):
|
||||
entities = [cls._accepted_objects(f'{cls._accepted_objects.__name__.upper()}#{i}', tile, **kwargs)
|
||||
for i, tile in enumerate(tiles)]
|
||||
# objects_name = cls._accepted_objects.__name__
|
||||
entities = [cls._accepted_objects(i, tile, name_is_identifier=True, **kwargs) for i, tile in enumerate(tiles)]
|
||||
registered_obj = cls()
|
||||
registered_obj.register_additional_items(entities)
|
||||
return registered_obj
|
||||
@ -98,14 +93,6 @@ class Register:
|
||||
|
||||
class EntityRegister(Register):
|
||||
|
||||
@classmethod
|
||||
def from_argwhere_coordinates(cls, argwhere_coordinates, **kwargs):
|
||||
tiles = cls()
|
||||
tiles.register_additional_items(
|
||||
[cls._accepted_objects(i, pos, **kwargs) for i, pos in enumerate(argwhere_coordinates)]
|
||||
)
|
||||
return tiles
|
||||
|
||||
def __init__(self):
|
||||
super(EntityRegister, self).__init__()
|
||||
self._tiles = dict()
|
||||
@ -141,6 +128,15 @@ class Entities(Register):
|
||||
class FloorTiles(EntityRegister):
|
||||
_accepted_objects = Tile
|
||||
|
||||
@classmethod
|
||||
def from_argwhere_coordinates(cls, argwhere_coordinates):
|
||||
tiles = cls()
|
||||
# noinspection PyTypeChecker
|
||||
tiles.register_additional_items(
|
||||
[cls._accepted_objects(i, pos, name_is_identifier=True) for i, pos in enumerate(argwhere_coordinates)]
|
||||
)
|
||||
return tiles
|
||||
|
||||
@property
|
||||
def occupied_tiles(self):
|
||||
tiles = [tile for tile in self if tile.is_occupied()]
|
||||
@ -148,7 +144,7 @@ class FloorTiles(EntityRegister):
|
||||
return tiles
|
||||
|
||||
@property
|
||||
def empty_tiles(self):
|
||||
def empty_tiles(self) -> List[Tile]:
|
||||
tiles = [tile for tile in self if tile.is_empty()]
|
||||
random.shuffle(tiles)
|
||||
return tiles
|
||||
@ -185,6 +181,7 @@ class Actions(Register):
|
||||
def movement_actions(self):
|
||||
return self._movement_actions
|
||||
|
||||
# noinspection PyTypeChecker
|
||||
def __init__(self, movement_properties: MovementProperties, can_use_doors=False):
|
||||
self.allow_no_op = movement_properties.allow_no_op
|
||||
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
|
||||
@ -193,43 +190,47 @@ class Actions(Register):
|
||||
super(Actions, self).__init__()
|
||||
|
||||
if self.allow_square_movement:
|
||||
self.register_additional_items([self._accepted_objects(direction) for direction in h.MANHATTAN_MOVES])
|
||||
self.register_additional_items([self._accepted_objects(direction) for direction in h.ManhattanMoves])
|
||||
if self.allow_diagonal_movement:
|
||||
self.register_additional_items([self._accepted_objects(direction) for direction in h.DIAGONAL_MOVES])
|
||||
self.register_additional_items([self._accepted_objects(direction) for direction in h.DiagonalMoves])
|
||||
self._movement_actions = self._register.copy()
|
||||
if self.can_use_doors:
|
||||
self.register_additional_items([self._accepted_objects('use_door')])
|
||||
self.register_additional_items([self._accepted_objects(h.EnvActions.USE_DOOR)])
|
||||
if self.allow_no_op:
|
||||
self.register_additional_items([self._accepted_objects('no-op')])
|
||||
self.register_additional_items([self._accepted_objects(h.EnvActions.NOOP)])
|
||||
|
||||
def is_moving_action(self, action: Union[int]):
|
||||
#if isinstance(action, Action):
|
||||
# return (action.name in h.MANHATTAN_MOVES and self.allow_square_movement) or \
|
||||
# (action.name in h.DIAGONAL_MOVES and self.allow_diagonal_movement)
|
||||
#else:
|
||||
return action in self.movement_actions.keys()
|
||||
return action in self.movement_actions.values()
|
||||
|
||||
def is_no_op(self, action: Union[str, int]):
|
||||
if isinstance(action, str):
|
||||
action = self.by_name(action)
|
||||
return self[action].name == 'no-op'
|
||||
def is_no_op(self, action: Union[str, Action, int]):
|
||||
if isinstance(action, int):
|
||||
action = self[action]
|
||||
if isinstance(action, Action):
|
||||
action = action.name
|
||||
return action == h.EnvActions.NOOP.name
|
||||
|
||||
def is_door_usage(self, action: Union[str, int]):
|
||||
if isinstance(action, str):
|
||||
action = self.by_name(action)
|
||||
return self[action].name == 'use_door'
|
||||
if isinstance(action, int):
|
||||
action = self[action]
|
||||
if isinstance(action, Action):
|
||||
action = action.name
|
||||
return action == h.EnvActions.USE_DOOR.name
|
||||
|
||||
|
||||
class StateSlices(Register):
|
||||
|
||||
_accepted_objects = Slice
|
||||
@property
|
||||
def n_observable_slices(self):
|
||||
return len([x for x in self if x.is_observable])
|
||||
|
||||
|
||||
@property
|
||||
def AGENTSTARTIDX(self):
|
||||
if self._agent_start_idx:
|
||||
return self._agent_start_idx
|
||||
else:
|
||||
self._agent_start_idx = min([idx for idx, x in self.items() if c.AGENT.name in x.name])
|
||||
self._agent_start_idx = min([idx for idx, x in self.items() if c.AGENT.value in x.name])
|
||||
return self._agent_start_idx
|
||||
|
||||
def __init__(self):
|
||||
|
229
environments/factory/double_task_factory.py
Normal file
229
environments/factory/double_task_factory.py
Normal file
@ -0,0 +1,229 @@
|
||||
import time
|
||||
from collections import deque
|
||||
from enum import Enum
|
||||
from typing import List, Union, NamedTuple
|
||||
import numpy as np
|
||||
|
||||
from environments.factory.simple_factory import SimpleFactory
|
||||
from environments.helpers import Constants as c
|
||||
from environments import helpers as h
|
||||
from environments.factory.base.objects import Agent, Slice, Entity, Action
|
||||
from environments.factory.base.registers import Entities
|
||||
|
||||
from environments.factory.renderer import RenderEntity
|
||||
|
||||
|
||||
PICK_UP = 'pick_up'
|
||||
DROP_OFF = 'drop_off'
|
||||
NO_ITEM = 0
|
||||
ITEM_DROP_OFF = -1
|
||||
|
||||
|
||||
def inventory_slice_name(agent_i):
|
||||
if isinstance(agent_i, int):
|
||||
return f'{c.INVENTORY.name}_{c.AGENT.value}#{agent_i}'
|
||||
else:
|
||||
return f'{c.INVENTORY.name}_{agent_i}'
|
||||
|
||||
|
||||
class DropOffLocation(Entity):
|
||||
|
||||
def __init__(self, *args, storage_size_until_full: int = 5, **kwargs):
|
||||
super(DropOffLocation, self).__init__(DROP_OFF, *args, **kwargs)
|
||||
self.storage = deque(maxlen=storage_size_until_full)
|
||||
|
||||
def place_item(self, item):
|
||||
self.storage.append(item)
|
||||
return True
|
||||
|
||||
@property
|
||||
def is_full(self):
|
||||
return self.storage.maxlen == len(self.storage)
|
||||
|
||||
|
||||
class ItemProperties(NamedTuple):
|
||||
n_items: int = 1 # How many items are there at the same time
|
||||
spawn_frequency: int = 5 # Spawn Frequency in Steps
|
||||
max_dropoff_storage_size: int = 5 # How many items are needed until the drop off is full
|
||||
max_agent_storage_size: int = 5 # How many items are needed until the agent inventory is full
|
||||
agent_can_interact: bool = True # Whether agents have the possibility to interact with the domain items
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit,PyUnresolvedReferences
|
||||
class DoubleTaskFactory(SimpleFactory):
|
||||
# noinspection PyMissingConstructor
|
||||
def __init__(self, item_properties: ItemProperties, *args, with_dirt=False, env_seed=time.time_ns(), **kwargs):
|
||||
self.item_properties = item_properties
|
||||
kwargs.update(env_seed=env_seed)
|
||||
self._item_rng = np.random.default_rng(env_seed)
|
||||
assert item_properties.n_items < kwargs.get('pomdp_r', 0) ** 2 or not kwargs.get('pomdp_r', 0)
|
||||
self._super = self.__class__ if with_dirt else SimpleFactory
|
||||
super(self._super, self).__init__(*args, **kwargs)
|
||||
|
||||
@property
|
||||
def additional_actions(self) -> Union[Action, List[Action]]:
|
||||
super_actions = super(self._super, self).additional_actions
|
||||
super_actions.append(Action(h.EnvActions.ITEM_ACTION))
|
||||
return super_actions
|
||||
|
||||
@property
|
||||
def additional_entities(self) -> Union[Entities, List[Entities]]:
|
||||
super_entities = super(self._super, self).additional_entities
|
||||
return super_entities
|
||||
|
||||
@property
|
||||
def additional_slices(self) -> Union[Slice, List[Slice]]:
|
||||
super_slices = super(self._super, self).additional_slices
|
||||
super_slices.append(Slice(c.ITEM, np.zeros(self._level_shape)))
|
||||
super_slices.extend([Slice(inventory_slice_name(agent_i), np.zeros(self._level_shape), can_be_shadowed=False)
|
||||
for agent_i in range(self.n_agents)])
|
||||
return super_slices
|
||||
|
||||
def _flush_state(self):
|
||||
super(self._super, self)._flush_state()
|
||||
|
||||
# Flush environmental item state
|
||||
slice_idx = self._slices.get_idx(c.ITEM)
|
||||
self._obs_cube[slice_idx] = self._slices[slice_idx].slice
|
||||
|
||||
# Flush per agent inventory state
|
||||
for agent in self._agents:
|
||||
agent_slice_idx = self._slices.get_idx_by_name(inventory_slice_name(agent.name))
|
||||
self._slices[agent_slice_idx].slice[:] = 0
|
||||
if len(agent.inventory) > 0:
|
||||
max_x = self.pomdp_r if self.pomdp_r else self._level_shape[0]
|
||||
x, y = (0, 0) if not self.pomdp_r else (max(agent.x - max_x, 0), max(agent.y - max_x, 0))
|
||||
for item in agent.inventory:
|
||||
x_diff, y_diff = divmod(item, max_x)
|
||||
self._slices[agent_slice_idx].slice[int(x+x_diff), int(y+y_diff)] = item
|
||||
self._obs_cube[agent_slice_idx] = self._slices[agent_slice_idx].slice
|
||||
|
||||
def _is_item_action(self, action):
|
||||
if isinstance(action, int):
|
||||
action = self._actions[action]
|
||||
if isinstance(action, Action):
|
||||
action = action.name
|
||||
return action == h.EnvActions.ITEM_ACTION.name
|
||||
|
||||
def do_item_action(self, agent: Agent):
|
||||
item_slice = self._slices.by_enum(c.ITEM).slice
|
||||
|
||||
if item := item_slice[agent.pos]:
|
||||
if item == ITEM_DROP_OFF:
|
||||
if agent.inventory:
|
||||
valid = self._item_drop_off.place_item(agent.inventory.pop(0))
|
||||
return valid
|
||||
else:
|
||||
return c.NOT_VALID
|
||||
|
||||
elif item != NO_ITEM:
|
||||
if len(agent.inventory) < self.item_properties.max_agent_storage_size:
|
||||
agent.inventory.append(item_slice[agent.pos])
|
||||
item_slice[agent.pos] = NO_ITEM
|
||||
else:
|
||||
return c.NOT_VALID
|
||||
return c.VALID
|
||||
else:
|
||||
return c.NOT_VALID
|
||||
|
||||
def do_additional_actions(self, agent: Agent, action: int) -> Union[None, bool]:
|
||||
valid = super(self._super, self).do_additional_actions(agent, action)
|
||||
if valid is None:
|
||||
if self._is_item_action(action):
|
||||
if self.item_properties.agent_can_interact:
|
||||
valid = self.do_item_action(agent)
|
||||
return bool(valid)
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return valid
|
||||
|
||||
def do_additional_reset(self) -> None:
|
||||
super(self._super, self).do_additional_reset()
|
||||
self.spawn_drop_off_location()
|
||||
self.spawn_items(self.item_properties.n_items)
|
||||
self._next_item_spawn = self.item_properties.spawn_frequency
|
||||
for agent in self._agents:
|
||||
agent.inventory = list()
|
||||
|
||||
def do_additional_step(self) -> dict:
|
||||
info_dict = super(self._super, self).do_additional_step()
|
||||
if not self._next_item_spawn:
|
||||
if item_to_spawn := (self.item_properties.n_items -
|
||||
(np.sum(self._slices.by_enum(c.ITEM).slice.astype(bool)) - 1)):
|
||||
self.spawn_items(item_to_spawn)
|
||||
self._next_item_spawn = self.item_properties.spawn_frequency
|
||||
else:
|
||||
self.print('No Items are spawning, limit is reached.')
|
||||
else:
|
||||
self._next_item_spawn -= 1
|
||||
return info_dict
|
||||
|
||||
def spawn_drop_off_location(self):
|
||||
single_empty_tile = self._tiles.empty_tiles[0]
|
||||
self._item_drop_off = DropOffLocation(single_empty_tile,
|
||||
storage_size_until_full=self.item_properties.max_dropoff_storage_size)
|
||||
single_empty_tile.enter(self._item_drop_off)
|
||||
self._slices.by_enum(c.ITEM).slice[single_empty_tile.pos] = ITEM_DROP_OFF
|
||||
|
||||
def calculate_additional_reward(self, agent: Agent) -> (int, dict):
|
||||
reward, info_dict = super(self._super, self).calculate_additional_reward(agent)
|
||||
if self._is_item_action(agent.temp_action):
|
||||
if agent.temp_valid:
|
||||
if agent.pos == self._item_drop_off.pos:
|
||||
info_dict.update({f'{agent.name}_item_dropoff': 1})
|
||||
|
||||
reward += 1
|
||||
else:
|
||||
info_dict.update({f'{agent.name}_item_pickup': 1})
|
||||
reward += 0.1
|
||||
else:
|
||||
info_dict.update({f'{agent.name}_failed_item_action': 1})
|
||||
reward -= 0.1
|
||||
return reward, info_dict
|
||||
|
||||
def render_additional_assets(self, mode='human'):
|
||||
additional_assets = super(self._super, self).render_additional_assets()
|
||||
item_slice = self._slices.by_enum(c.ITEM).slice
|
||||
items = [RenderEntity(DROP_OFF if item_slice[tile.pos] == ITEM_DROP_OFF else c.ITEM.value, tile.pos)
|
||||
for tile in [tile for tile in self._tiles if item_slice[tile.pos] != NO_ITEM]]
|
||||
additional_assets.extend(items)
|
||||
return additional_assets
|
||||
|
||||
def spawn_items(self, n_items):
|
||||
tiles = self._tiles.empty_tiles[:n_items]
|
||||
item_slice = self._slices.by_enum(c.ITEM).slice
|
||||
for idx, tile in enumerate(tiles, start=1):
|
||||
item_slice[tile.pos] = idx
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import random
|
||||
render = True
|
||||
|
||||
item_props = ItemProperties()
|
||||
|
||||
factory = DoubleTaskFactory(item_props, n_agents=1, done_at_collision=False, frames_to_stack=0,
|
||||
level_name='rooms', max_steps=400,
|
||||
omit_agent_slice_in_obs=True, parse_doors=True, pomdp_r=3,
|
||||
record_episodes=False, verbose=False
|
||||
)
|
||||
|
||||
n_actions = factory.action_space.n - 1
|
||||
_ = factory.observation_space
|
||||
|
||||
for epoch in range(100):
|
||||
random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)]
|
||||
env_state = factory.reset()
|
||||
rew = 0
|
||||
for agent_i_action in random_actions:
|
||||
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
|
||||
rew += step_r
|
||||
if render:
|
||||
factory.render()
|
||||
if done_bool:
|
||||
break
|
||||
print(f'Factory run {epoch} done, reward is:\n {rew}')
|
@ -1,115 +0,0 @@
|
||||
import time
|
||||
from collections import deque
|
||||
from typing import List, Union, NamedTuple
|
||||
import numpy as np
|
||||
|
||||
from environments.helpers import Constants as c
|
||||
from environments import helpers as h
|
||||
from environments.factory.base.base_factory import BaseFactory
|
||||
from environments.factory.base.objects import Agent, Action, Object, Slice, Entity
|
||||
from environments.factory.base.registers import Entities
|
||||
|
||||
from environments.factory.renderer import Renderer
|
||||
from environments.utility_classes import MovementProperties
|
||||
|
||||
|
||||
|
||||
ITEM = 'item'
|
||||
INVENTORY = 'inventory'
|
||||
PICK_UP = 'pick_up'
|
||||
DROP_DOWN = 'drop_down'
|
||||
ITEM_ACTION = 'item_action'
|
||||
NO_ITEM = 0
|
||||
ITEM_DROP_OFF = -1
|
||||
|
||||
|
||||
def inventory_slice_name(agent):
|
||||
return f'{agent.name}_{INVENTORY}'
|
||||
|
||||
|
||||
class DropOffLocation(Entity):
|
||||
|
||||
def __init__(self, *args, storage_size_until_full: int = 5, **kwargs):
|
||||
super(DropOffLocation, self).__init__(*args, **kwargs)
|
||||
self.storage = deque(maxlen=storage_size_until_full)
|
||||
|
||||
def place_item(self, item):
|
||||
self.storage.append(item)
|
||||
return True
|
||||
|
||||
@property
|
||||
def is_full(self):
|
||||
return self.storage.maxlen == len(self.storage)
|
||||
|
||||
|
||||
class ItemProperties(NamedTuple):
|
||||
n_items: int = 1 # How many items are there at the same time
|
||||
spawn_frequency: int = 5 # Spawn Frequency in Steps
|
||||
max_dropoff_storage_size: int = 5 # How many items are needed until the drop off is full
|
||||
max_agent_storage_size: int = 5 # How many items are needed until the agent inventory is full
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit
|
||||
class ItemFactory(BaseFactory):
|
||||
def __init__(self, item_properties: ItemProperties, *args, **kwargs):
|
||||
self.item_properties = item_properties
|
||||
self._item_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns()))
|
||||
super(ItemFactory, self).__init__(*args, **kwargs)
|
||||
|
||||
@property
|
||||
def additional_actions(self) -> Union[str, List[str]]:
|
||||
return [ITEM_ACTION]
|
||||
|
||||
@property
|
||||
def additional_entities(self) -> Union[Entities, List[Entities]]:
|
||||
return []
|
||||
|
||||
@property
|
||||
def additional_slices(self) -> Union[Slice, List[Slice]]:
|
||||
return [Slice(ITEM, np.zeros(self._level_shape))] + [
|
||||
Slice(inventory_slice_name(agent), np.zeros(self._level_shape)) for agent in self._agents]
|
||||
|
||||
def _is_item_action(self, action):
|
||||
if isinstance(action, str):
|
||||
action = self._actions.by_name(action)
|
||||
return self._actions[action].name == ITEM_ACTION
|
||||
|
||||
def do_item_action(self, agent):
|
||||
item_slice = self._slices.by_name(ITEM).slice
|
||||
inventory_slice = self._slices.by_name(inventory_slice_name(agent)).slice
|
||||
|
||||
if item := item_slice[agent.pos]:
|
||||
if item == ITEM_DROP_OFF:
|
||||
|
||||
valid = self._item_drop_off.place_item(inventory_slice.sum())
|
||||
|
||||
|
||||
item_slice[agent.pos] = NO_ITEM
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def do_additional_actions(self, agent: Agent, action: int) -> bool:
|
||||
if self._is_item_action(action):
|
||||
valid = self.do_item_action(agent)
|
||||
return valid
|
||||
else:
|
||||
raise RuntimeError('This should not happen!!!')
|
||||
|
||||
def do_additional_reset(self) -> None:
|
||||
self.spawn_drop_off_location()
|
||||
self.spawn_items(self.n_items)
|
||||
if self.n_items > 1:
|
||||
self._next_item_spawn = self.item_properties.spawn_frequency
|
||||
|
||||
def spawn_drop_off_location(self):
|
||||
single_empty_tile = self._tiles.empty_tiles[0]
|
||||
self._item_drop_off = DropOffLocation(storage_size_until_full=self.item_properties.max_dropoff_storage_size)
|
||||
|
||||
def calculate_reward(self) -> (int, dict):
|
||||
pass
|
||||
|
||||
def render(self, mode='human'):
|
||||
pass
|
||||
|
||||
|
@ -8,7 +8,7 @@ from typing import NamedTuple, Any
|
||||
import time
|
||||
|
||||
|
||||
class Entity(NamedTuple):
|
||||
class RenderEntity(NamedTuple):
|
||||
name: str
|
||||
pos: np.array
|
||||
value: float = 1
|
||||
@ -108,7 +108,7 @@ class Renderer:
|
||||
blits.extendleft(vis_rects)
|
||||
if entity.state != 'blank':
|
||||
agent_state_blits = self.blit_params(
|
||||
Entity(entity.state, (entity.pos[0]+0.12, entity.pos[1]), 0.48, 'scale')
|
||||
RenderEntity(entity.state, (entity.pos[0] + 0.12, entity.pos[1]), 0.48, 'scale')
|
||||
)
|
||||
textsurface = self.font.render(str(entity.id), False, (0, 0, 0))
|
||||
text_blit = dict(source=textsurface, dest=(bp['dest'].center[0]-.07*self.cell_size,
|
||||
@ -125,6 +125,6 @@ class Renderer:
|
||||
if __name__ == '__main__':
|
||||
renderer = Renderer(fps=2, cell_size=40)
|
||||
for i in range(15):
|
||||
entity_1 = Entity('agent', [5, i], 1, 'idle', 'idle')
|
||||
entity_1 = RenderEntity('agent', [5, i], 1, 'idle', 'idle')
|
||||
renderer.render([entity_1])
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
import time
|
||||
from enum import Enum
|
||||
from typing import List, Union, NamedTuple
|
||||
import random
|
||||
|
||||
@ -7,24 +8,32 @@ import numpy as np
|
||||
from environments.helpers import Constants as c
|
||||
from environments import helpers as h
|
||||
from environments.factory.base.base_factory import BaseFactory
|
||||
from environments.factory.base.objects import Agent, Action, Object, Slice
|
||||
from environments.factory.base.objects import Agent, Action, Slice
|
||||
from environments.factory.base.registers import Entities
|
||||
|
||||
from environments.factory.renderer import Renderer, Entity
|
||||
from environments.factory.renderer import RenderEntity
|
||||
from environments.utility_classes import MovementProperties
|
||||
|
||||
DIRT = "dirt"
|
||||
CLEAN_UP_ACTION = 'clean_up'
|
||||
|
||||
CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP
|
||||
|
||||
|
||||
class ObsSlice(Enum):
|
||||
OWN = -1
|
||||
LEVEL = c.LEVEL.value
|
||||
AGENT = c.AGENT.value
|
||||
|
||||
|
||||
class DirtProperties(NamedTuple):
|
||||
clean_amount: int = 1 # How much does the robot clean with one actions.
|
||||
max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent.
|
||||
gain_amount: float = 0.3 # How much dirt does spawn per tile
|
||||
spawn_frequency: int = 5 # Spawn Frequency in Steps
|
||||
gain_amount: float = 0.3 # How much dirt does spawn per tile.
|
||||
spawn_frequency: int = 5 # Spawn Frequency in Steps.
|
||||
max_local_amount: int = 2 # Max dirt amount per tile.
|
||||
max_global_amount: int = 20 # Max dirt amount in the whole environment.
|
||||
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place
|
||||
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place.
|
||||
agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment.
|
||||
on_obs_slice: Enum = ObsSlice.LEVEL
|
||||
|
||||
|
||||
def softmax(x):
|
||||
@ -41,69 +50,50 @@ def entropy(x):
|
||||
class SimpleFactory(BaseFactory):
|
||||
|
||||
@property
|
||||
def additional_actions(self) -> List[Object]:
|
||||
return [Action(CLEAN_UP_ACTION)]
|
||||
def additional_actions(self) -> Union[Action, List[Action]]:
|
||||
super_actions = super(SimpleFactory, self).additional_actions
|
||||
if self.dirt_properties.agent_can_interact:
|
||||
super_actions.append(Action(CLEAN_UP_ACTION))
|
||||
return super_actions
|
||||
|
||||
@property
|
||||
def additional_entities(self) -> Union[Entities, List[Entities]]:
|
||||
return []
|
||||
super_entities = super(SimpleFactory, self).additional_entities
|
||||
return super_entities
|
||||
|
||||
@property
|
||||
def additional_slices(self) -> List[Slice]:
|
||||
return [Slice('dirt', np.zeros(self._level_shape))]
|
||||
super_slices = super(SimpleFactory, self).additional_slices
|
||||
super_slices.extend([Slice(c.DIRT, np.zeros(self._level_shape))])
|
||||
return super_slices
|
||||
|
||||
def _is_clean_up_action(self, action: Union[str, int]):
|
||||
if isinstance(action, str):
|
||||
action = self._actions.by_name(action)
|
||||
return self._actions[action].name == CLEAN_UP_ACTION
|
||||
def _is_clean_up_action(self, action: Union[str, Action, int]):
|
||||
if isinstance(action, int):
|
||||
action = self._actions[action]
|
||||
if isinstance(action, Action):
|
||||
action = action.name
|
||||
return action == CLEAN_UP_ACTION.name
|
||||
|
||||
def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), **kwargs):
|
||||
def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), env_seed=time.time_ns(), **kwargs):
|
||||
self.dirt_properties = dirt_properties
|
||||
self._renderer = None # expensive - don't use it when not required !
|
||||
self._dirt_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns()))
|
||||
self._dirt_rng = np.random.default_rng(env_seed)
|
||||
kwargs.update(env_seed=env_seed)
|
||||
super(SimpleFactory, self).__init__(*args, **kwargs)
|
||||
|
||||
def _flush_state(self):
|
||||
super(SimpleFactory, self)._flush_state()
|
||||
self._obs_cube[self._slices.get_idx_by_name(DIRT)] = self._slices.by_name(DIRT).slice
|
||||
self._obs_cube[self._slices.get_idx(c.DIRT)] = self._slices.by_enum(c.DIRT).slice
|
||||
|
||||
def render(self, mode='human'):
|
||||
|
||||
if not self._renderer: # lazy init
|
||||
height, width = self._obs_cube.shape[1:]
|
||||
self._renderer = Renderer(width, height, view_radius=self.pomdp_r, fps=5)
|
||||
dirt_slice = self._slices.by_name(DIRT).slice
|
||||
dirt = [Entity('dirt', tile.pos, min(0.15 + dirt_slice[tile.pos], 1.5), 'scale')
|
||||
def render_additional_assets(self, mode='human'):
|
||||
additional_assets = super(SimpleFactory, self).render_additional_assets()
|
||||
dirt_slice = self._slices.by_enum(c.DIRT).slice
|
||||
dirt = [RenderEntity('dirt', tile.pos, min(0.15 + dirt_slice[tile.pos], 1.5), 'scale')
|
||||
for tile in [tile for tile in self._tiles if dirt_slice[tile.pos]]]
|
||||
walls = [Entity('wall', pos)
|
||||
for pos in np.argwhere(self._slices.by_enum(c.LEVEL).slice == c.OCCUPIED_CELL.value)]
|
||||
|
||||
def asset_str(agent):
|
||||
# What does this abonimation do?
|
||||
# if any([x is None for x in [self._slices[j] for j in agent.collisions]]):
|
||||
# print('error')
|
||||
col_names = [x.name for x in agent.temp_collisions]
|
||||
if c.AGENT.value in col_names:
|
||||
return 'agent_collision', 'blank'
|
||||
elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names:
|
||||
return c.AGENT.value, 'invalid'
|
||||
elif self._is_clean_up_action(agent.temp_action):
|
||||
return c.AGENT.value, 'valid'
|
||||
else:
|
||||
return c.AGENT.value, 'idle'
|
||||
agents = []
|
||||
for i, agent in enumerate(self._agents):
|
||||
name, state = asset_str(agent)
|
||||
agents.append(Entity(name, agent.pos, 1, 'none', state, i+1, agent.temp_light_map))
|
||||
doors = []
|
||||
if self.parse_doors:
|
||||
for i, door in enumerate(self._doors):
|
||||
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
|
||||
agents.append(Entity(name, door.pos, 1, 'none', state, i+1))
|
||||
self._renderer.render(dirt+walls+agents+doors)
|
||||
additional_assets.extend(dirt)
|
||||
return additional_assets
|
||||
|
||||
def spawn_dirt(self) -> None:
|
||||
dirt_slice = self._slices.by_name(DIRT).slice
|
||||
dirt_slice = self._slices.by_enum(c.DIRT).slice
|
||||
# dirty_tiles = [tile for tile in self._tiles if dirt_slice[tile.pos]]
|
||||
curr_dirt_amount = dirt_slice.sum()
|
||||
if not curr_dirt_amount > self.dirt_properties.max_global_amount:
|
||||
@ -119,7 +109,7 @@ class SimpleFactory(BaseFactory):
|
||||
pass
|
||||
|
||||
def clean_up(self, agent: Agent) -> bool:
|
||||
dirt_slice = self._slices.by_name(DIRT).slice
|
||||
dirt_slice = self._slices.by_enum(c.DIRT).slice
|
||||
if old_dirt_amount := dirt_slice[agent.pos]:
|
||||
new_dirt_amount = old_dirt_amount - self.dirt_properties.clean_amount
|
||||
dirt_slice[agent.pos] = max(new_dirt_amount, c.FREE_CELL.value)
|
||||
@ -128,10 +118,11 @@ class SimpleFactory(BaseFactory):
|
||||
return False
|
||||
|
||||
def do_additional_step(self) -> dict:
|
||||
info_dict = super(SimpleFactory, self).do_additional_step()
|
||||
if smear_amount := self.dirt_properties.dirt_smear_amount:
|
||||
dirt_slice = self._slices.by_name(DIRT).slice
|
||||
dirt_slice = self._slices.by_enum(c.DIRT).slice
|
||||
for agent in self._agents:
|
||||
if agent.temp_valid and agent.last_pos != h.NO_POS:
|
||||
if agent.temp_valid and agent.last_pos != c.NO_POS:
|
||||
if dirt := dirt_slice[agent.last_pos]:
|
||||
if smeared_dirt := round(dirt * smear_amount, 2):
|
||||
dirt_slice[agent.last_pos] = max(0, dirt_slice[agent.last_pos]-smeared_dirt)
|
||||
@ -144,23 +135,30 @@ class SimpleFactory(BaseFactory):
|
||||
self._next_dirt_spawn = self.dirt_properties.spawn_frequency
|
||||
else:
|
||||
self._next_dirt_spawn -= 1
|
||||
return {}
|
||||
return info_dict
|
||||
|
||||
def do_additional_actions(self, agent: Agent, action: int) -> bool:
|
||||
if self._is_clean_up_action(action):
|
||||
valid = self.clean_up(agent)
|
||||
return valid
|
||||
def do_additional_actions(self, agent: Agent, action: int) -> Union[None, bool]:
|
||||
valid = super(SimpleFactory, self).do_additional_actions(agent, action)
|
||||
if valid is None:
|
||||
if self._is_clean_up_action(action):
|
||||
if self.dirt_properties.agent_can_interact:
|
||||
valid = self.clean_up(agent)
|
||||
return valid
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return c.NOT_VALID.value
|
||||
return valid
|
||||
|
||||
def do_additional_reset(self) -> None:
|
||||
super(SimpleFactory, self).do_additional_reset()
|
||||
self.spawn_dirt()
|
||||
self._next_dirt_spawn = self.dirt_properties.spawn_frequency
|
||||
|
||||
def calculate_reward(self) -> (int, dict):
|
||||
info_dict = dict()
|
||||
|
||||
dirt_slice = self._slices.by_name(DIRT).slice
|
||||
def calculate_additional_reward(self, agent: Agent) -> (int, dict):
|
||||
reward, info_dict = super(SimpleFactory, self).calculate_additional_reward(agent)
|
||||
dirt_slice = self._slices.by_enum(c.DIRT).slice
|
||||
dirty_tiles = [dirt_slice[tile.pos] for tile in self._tiles if dirt_slice[tile.pos]]
|
||||
current_dirt_amount = sum(dirty_tiles)
|
||||
dirty_tile_count = len(dirty_tiles)
|
||||
@ -173,56 +171,21 @@ class SimpleFactory(BaseFactory):
|
||||
info_dict.update(dirty_tile_count=dirty_tile_count)
|
||||
info_dict.update(dirt_distribution_score=dirt_distribution_score)
|
||||
|
||||
try:
|
||||
# penalty = current_dirt_amount
|
||||
reward = 0
|
||||
except (ZeroDivisionError, RuntimeWarning):
|
||||
reward = 0
|
||||
|
||||
for agent in self._agents:
|
||||
if agent.temp_collisions:
|
||||
self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
|
||||
|
||||
if self._is_clean_up_action(agent.temp_action):
|
||||
if agent.temp_valid:
|
||||
reward += 0.5
|
||||
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
|
||||
info_dict.update(dirt_cleaned=1)
|
||||
else:
|
||||
reward -= 0.01
|
||||
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
|
||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||
info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
|
||||
|
||||
elif self._actions.is_moving_action(agent.temp_action):
|
||||
if agent.temp_valid:
|
||||
# info_dict.update(movement=1)
|
||||
reward -= 0.00
|
||||
else:
|
||||
# self.print('collision')
|
||||
reward -= 0.01
|
||||
self.print(f'{agent.name} just hit the wall at {agent.pos}.')
|
||||
info_dict.update({f'{agent.name}_vs_LEVEL': 1})
|
||||
|
||||
elif self._actions.is_door_usage(agent.temp_action):
|
||||
if agent.temp_valid:
|
||||
self.print(f'{agent.name} did just use the door at {agent.pos}.')
|
||||
info_dict.update(door_used=1)
|
||||
else:
|
||||
reward -= 0.01
|
||||
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
|
||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||
info_dict.update({f'{agent.name}_failed_door_open': 1})
|
||||
if agent.temp_collisions:
|
||||
self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
|
||||
|
||||
if self._is_clean_up_action(agent.temp_action):
|
||||
if agent.temp_valid:
|
||||
reward += 0.5
|
||||
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
|
||||
info_dict.update(dirt_cleaned=1)
|
||||
else:
|
||||
info_dict.update(no_op=1)
|
||||
reward -= 0.00
|
||||
reward -= 0.01
|
||||
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
|
||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||
info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
|
||||
|
||||
for other_agent in agent.temp_collisions:
|
||||
info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1})
|
||||
|
||||
self.print(f"reward is {reward}")
|
||||
# Potential based rewards ->
|
||||
# track the last reward , minus the current reward = potential
|
||||
return reward, info_dict
|
||||
|
@ -5,58 +5,76 @@ from typing import Tuple, Union
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# Constants
|
||||
class Constants(Enum):
|
||||
WALL = '#'
|
||||
DOOR = 'D'
|
||||
DANGER_ZONE = 'x'
|
||||
LEVEL = 'level'
|
||||
AGENT = 'Agent'
|
||||
FREE_CELL = 0
|
||||
OCCUPIED_CELL = 1
|
||||
|
||||
DOORS = 'doors'
|
||||
CLOSED_DOOR = 1
|
||||
OPEN_DOOR = -1
|
||||
|
||||
ACTION = auto()
|
||||
COLLISIONS = auto()
|
||||
VALID = True
|
||||
NOT_VALID = False
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.value)
|
||||
|
||||
|
||||
LEVELS_DIR = 'levels'
|
||||
|
||||
TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles']
|
||||
IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount',
|
||||
'dirty_tile_count', 'terminal_observation', 'episode']
|
||||
|
||||
MANHATTAN_MOVES = ['north', 'east', 'south', 'west']
|
||||
DIAGONAL_MOVES = ['north_east', 'south_east', 'south_west', 'north_west']
|
||||
|
||||
NO_POS = (-9999, -9999)
|
||||
# Constants
|
||||
class Constants(Enum):
|
||||
WALL = '#'
|
||||
DOOR = 'D'
|
||||
DANGER_ZONE = 'x'
|
||||
LEVEL = 'level'
|
||||
AGENT = 'Agent'
|
||||
FREE_CELL = 0
|
||||
OCCUPIED_CELL = 1
|
||||
NO_POS = (-9999, -9999)
|
||||
|
||||
ACTIONMAP = defaultdict(lambda: (0, 0), dict(north=(-1, 0), east=(0, 1),
|
||||
south=(1, 0), west=(0, -1),
|
||||
north_east=(-1, +1), south_east=(1, 1),
|
||||
south_west=(+1, -1), north_west=(-1, -1)
|
||||
)
|
||||
DOORS = 'doors'
|
||||
CLOSED_DOOR = 1
|
||||
OPEN_DOOR = -1
|
||||
|
||||
ACTION = auto()
|
||||
COLLISIONS = auto()
|
||||
VALID = True
|
||||
NOT_VALID = False
|
||||
|
||||
# Dirt Env
|
||||
DIRT = 'dirt'
|
||||
|
||||
# Item Env
|
||||
ITEM = 'item'
|
||||
INVENTORY = 'inventory'
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.value)
|
||||
|
||||
|
||||
class ManhattanMoves(Enum):
|
||||
NORTH = 'north'
|
||||
EAST = 'east'
|
||||
SOUTH = 'south'
|
||||
WEST = 'west'
|
||||
|
||||
|
||||
class DiagonalMoves(Enum):
|
||||
NORTHEAST = 'north_east'
|
||||
SOUTHEAST = 'south_east'
|
||||
SOUTHWEST = 'south_west'
|
||||
NORTHWEST = 'north_west'
|
||||
|
||||
|
||||
class EnvActions(Enum):
|
||||
NOOP = 'no_op'
|
||||
USE_DOOR = 'use_door'
|
||||
CLEAN_UP = 'clean_up'
|
||||
ITEM_ACTION = 'item_action'
|
||||
|
||||
|
||||
d = DiagonalMoves
|
||||
m = ManhattanMoves
|
||||
c = Constants
|
||||
|
||||
ACTIONMAP = defaultdict(lambda: (0, 0), {m.NORTH.name: (-1, 0), d.NORTHEAST.name: (-1, +1),
|
||||
m.EAST.name: (0, 1), d.SOUTHEAST.name: (1, 1),
|
||||
m.SOUTH.name: (1, 0), d.SOUTHWEST.name: (+1, -1),
|
||||
m.WEST.name: (0, -1), d.NORTHWEST.name: (-1, -1)
|
||||
}
|
||||
)
|
||||
|
||||
HORIZONTAL_DOOR_MAP = np.asarray([[0, 0, 0], [1, 0, 1], [0, 0, 0]])
|
||||
VERTICAL_DOOR_MAP = np.asarray([[0, 1, 0], [0, 0, 0], [0, 1, 0]])
|
||||
|
||||
HORIZONTAL_DOOR_ZONE_1 = np.asarray([[1, 1, 1], [0, 0, 0], [0, 0, 0]])
|
||||
HORIZONTAL_DOOR_ZONE_2 = np.asarray([[0, 0, 0], [0, 0, 0], [1, 1, 1]])
|
||||
VERTICAL_DOOR_ZONE_1 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
|
||||
VERTICAL_DOOR_ZONE_2 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
|
||||
|
||||
|
||||
|
||||
|
||||
# Utility functions
|
||||
def parse_level(path):
|
||||
@ -67,13 +85,13 @@ def parse_level(path):
|
||||
return level
|
||||
|
||||
|
||||
def one_hot_level(level, wall_char: Union[Constants, str] = Constants.WALL):
|
||||
def one_hot_level(level, wall_char: Union[c, str] = c.WALL):
|
||||
grid = np.array(level)
|
||||
binary_grid = np.zeros(grid.shape, dtype=np.int8)
|
||||
if wall_char in Constants:
|
||||
binary_grid[grid == wall_char.value] = Constants.OCCUPIED_CELL.value
|
||||
if wall_char in c:
|
||||
binary_grid[grid == wall_char.value] = c.OCCUPIED_CELL.value
|
||||
else:
|
||||
binary_grid[grid == wall_char] = Constants.OCCUPIED_CELL.value
|
||||
binary_grid[grid == wall_char] = c.OCCUPIED_CELL.value
|
||||
return binary_grid
|
||||
|
||||
|
||||
@ -89,7 +107,22 @@ def check_position(slice_to_check_against: np.ndarray, position_to_check: Tuple[
|
||||
|
||||
# Check for collision with level walls
|
||||
valid = valid and not slice_to_check_against[x_pos, y_pos]
|
||||
return Constants.VALID if valid else Constants.NOT_VALID
|
||||
return c.VALID if valid else c.NOT_VALID
|
||||
|
||||
|
||||
def asset_str(agent):
|
||||
# What does this abonimation do?
|
||||
# if any([x is None for x in [self._slices[j] for j in agent.collisions]]):
|
||||
# print('error')
|
||||
col_names = [x.name for x in agent.temp_collisions]
|
||||
if c.AGENT.value in col_names:
|
||||
return 'agent_collision', 'blank'
|
||||
elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names:
|
||||
return c.AGENT.value, 'invalid'
|
||||
elif agent.temp_valid:
|
||||
return c.AGENT.value, 'valid'
|
||||
else:
|
||||
return c.AGENT.value, 'idle'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
18
main.py
18
main.py
@ -9,6 +9,7 @@ import pandas as pd
|
||||
|
||||
from stable_baselines3.common.callbacks import CallbackList
|
||||
|
||||
from environments.factory.double_task_factory import DoubleTaskFactory, ItemProperties
|
||||
from environments.factory.simple_factory import DirtProperties, SimpleFactory
|
||||
from environments.helpers import IGNORED_DF_COLUMNS
|
||||
from environments.logging.monitor import MonitorCallback
|
||||
@ -94,11 +95,12 @@ if __name__ == '__main__':
|
||||
|
||||
dirt_props = DirtProperties(clean_amount=1, gain_amount=0.1, max_global_amount=20,
|
||||
max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05,
|
||||
dirt_smear_amount=0.0)
|
||||
dirt_smear_amount=0.0, agent_can_interact=False)
|
||||
item_props = ItemProperties(n_items=5, agent_can_interact=True)
|
||||
move_props = MovementProperties(allow_diagonal_movement=True,
|
||||
allow_square_movement=True,
|
||||
allow_no_op=False)
|
||||
train_steps = 2.5e6
|
||||
train_steps = 6e5
|
||||
time_stamp = int(time.time())
|
||||
|
||||
out_path = None
|
||||
@ -106,11 +108,13 @@ if __name__ == '__main__':
|
||||
for modeL_type in [A2C, PPO, DQN]: # ,RegDQN, QRDQN]:
|
||||
for seed in range(3):
|
||||
|
||||
with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=True,
|
||||
movement_properties=move_props, level_name='rooms', frames_to_stack=3,
|
||||
omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False,
|
||||
cast_shadows=True, doors_have_area=False, seed=seed
|
||||
) as env:
|
||||
with DoubleTaskFactory(n_agents=1, with_dirt=False,
|
||||
item_properties=item_props, dirt_properties=None, movement_properties=move_props,
|
||||
pomdp_radius=2, max_steps=500, parse_doors=True,
|
||||
level_name='rooms', frames_to_stack=3,
|
||||
omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False,
|
||||
cast_shadows=True, doors_have_area=False, seed=seed
|
||||
) as env:
|
||||
|
||||
if modeL_type.__name__ in ["PPO", "A2C"]:
|
||||
kwargs = dict(ent_coef=0.01)
|
||||
|
@ -28,3 +28,5 @@ PyYAML~=5.3.1
|
||||
pyglet~=1.5.0
|
||||
optuna~=2.7.0
|
||||
natsort~=7.1.1
|
||||
tqdm~=4.60.0
|
||||
networkx~=2.6.1
|
Reference in New Issue
Block a user