Items and combination of item and dirt

This commit is contained in:
Steffen Illium
2021-08-23 09:51:35 +02:00
parent 244d4eed68
commit d5e4d44823
12 changed files with 647 additions and 445 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.0 KiB

View File

@ -1,3 +1,4 @@
import abc
import time import time
from pathlib import Path from pathlib import Path
from typing import List, Union, Iterable from typing import List, Union, Iterable
@ -10,6 +11,7 @@ import yaml
from gym.wrappers import FrameStack from gym.wrappers import FrameStack
from environments.factory.base.shadow_casting import Map from environments.factory.base.shadow_casting import Map
from environments.factory.renderer import Renderer, RenderEntity
from environments.helpers import Constants as c, Constants from environments.helpers import Constants as c, Constants
from environments import helpers as h from environments import helpers as h
from environments.factory.base.objects import Slice, Agent, Tile, Action from environments.factory.base.objects import Slice, Agent, Tile, Action
@ -28,20 +30,7 @@ class BaseFactory(gym.Env):
@property @property
def observation_space(self): def observation_space(self):
if self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs: slices = self._slices.n_observable_slices
if self.n_agents > 1:
slices = self._slices.n - (self._agents.n - 1)
else:
slices = self._slices.n - 1
elif self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs:
slices = self._slices.n - (self._agents.n - 1)
elif not self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs:
slices = self._slices.n - self._agents.n
elif not self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs:
slices = self._slices.n
else:
raise RuntimeError('This should not happen!')
level_shape = (self.pomdp_r * 2 + 1, self.pomdp_r * 2 + 1) if self.pomdp_r else self._level_shape level_shape = (self.pomdp_r * 2 + 1, self.pomdp_r * 2 + 1) if self.pomdp_r else self._level_shape
space = spaces.Box(low=0, high=1, shape=(slices, *level_shape), dtype=np.float32) space = spaces.Box(low=0, high=1, shape=(slices, *level_shape), dtype=np.float32)
return space return space
@ -54,36 +43,6 @@ class BaseFactory(gym.Env):
def movement_actions(self): def movement_actions(self):
return self._actions.movement_actions return self._actions.movement_actions
@property
def additional_actions(self) -> Union[str, List[str]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A list of Actions-object holding all additional actions.
:rtype: List[Action]
"""
raise NotImplementedError('Please register additional actions ')
@property
def additional_entities(self) -> Union[Entities, List[Entities]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A single Entites collection or a list of such.
:rtype: Union[Entities, List[Entities]]
"""
raise NotImplementedError('Please register additional entities.')
@property
def additional_slices(self) -> Union[Slice, List[Slice]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A list of Slice-objects.
:rtype: List[Slice]
"""
raise NotImplementedError('Please register additional slices.')
def __enter__(self): def __enter__(self):
return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack) return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
@ -94,17 +53,20 @@ class BaseFactory(gym.Env):
movement_properties: MovementProperties = MovementProperties(), parse_doors=False, movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False, combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False,
omit_agent_slice_in_obs=False, done_at_collision=False, cast_shadows=True, omit_agent_slice_in_obs=False, done_at_collision=False, cast_shadows=True,
verbose=False, doors_have_area=True, **kwargs): verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs):
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1." assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
# Attribute Assignment # Attribute Assignment
self._base_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns())) self.env_seed = env_seed
self._base_rng = np.random.default_rng(self.env_seed)
self.movement_properties = movement_properties self.movement_properties = movement_properties
self.level_name = level_name self.level_name = level_name
self._level_shape = None self._level_shape = None
self.verbose = verbose self.verbose = verbose
self._renderer = None # expensive - don't use it when not required !
self.n_agents = n_agents self.n_agents = n_agents
self.max_steps = max_steps self.max_steps = max_steps
self.pomdp_r = pomdp_r self.pomdp_r = pomdp_r
self.combin_agent_slices_in_obs = combin_agent_slices_in_obs self.combin_agent_slices_in_obs = combin_agent_slices_in_obs
@ -132,25 +94,37 @@ class BaseFactory(gym.Env):
# Level # Level
level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt' level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt'
parsed_level = h.parse_level(level_filepath) parsed_level = h.parse_level(level_filepath)
level = [Slice(c.LEVEL.name, h.one_hot_level(parsed_level), is_blocking_light=True)] level = [Slice(c.LEVEL, h.one_hot_level(parsed_level), is_blocking_light=True)]
self._level_shape = level[0].shape self._level_shape = level[0].shape
# Doors # Doors
parsed_doors = h.one_hot_level(parsed_level, c.DOOR) parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
if parsed_doors.any(): if parsed_doors.any():
doors = [Slice(c.DOORS.name, parsed_doors, is_blocking_light=True)] doors = [Slice(c.DOORS, parsed_doors, is_blocking_light=True)]
else: else:
doors = [] doors = []
# Agents # Agents
agents = [] agents = []
for i in range(self.n_agents): agent_names = [f'{c.AGENT.value}#{i}' for i in range(self.n_agents)]
agents.append(Slice(f'{c.AGENT.name}#{i}', np.zeros_like(level[0].slice, dtype=np.float32)))
state_slices.register_additional_items(level+doors+agents)
# Additional Slices from SubDomains if self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs:
if additional_slices := self.additional_slices: if self.n_agents == 1:
state_slices.register_additional_items(additional_slices) observables = [False]
else:
observables = [True] + ([False] * (self.n_agents - 1))
elif self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs:
observables = [True] + ([False] * (self.n_agents - 1))
elif not self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs:
observables = [False] + ([True] * (self.n_agents - 1))
elif not self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs:
observables = [True] * self.n_agents
else:
raise RuntimeError('This should not happen!')
for observable, agent_name in zip(observables, agent_names):
agents.append(Slice(agent_name, np.zeros_like(level[0].slice, dtype=np.float32), is_observable=observable))
state_slices.register_additional_items(level+doors+agents+self.additional_slices)
return state_slices return state_slices
def _init_obs_cube(self) -> np.ndarray: def _init_obs_cube(self) -> np.ndarray:
@ -198,18 +172,6 @@ class BaseFactory(gym.Env):
obs = self._get_observations() obs = self._get_observations()
return obs return obs
def pre_step(self) -> None:
pass
def do_additional_reset(self) -> None:
pass
def do_additional_step(self) -> dict:
return {}
def post_step(self) -> dict:
return {}
def step(self, actions): def step(self, actions):
actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]' assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
@ -217,31 +179,22 @@ class BaseFactory(gym.Env):
done = False done = False
# Pre step Hook for later use # Pre step Hook for later use
self.pre_step() self.hook_pre_step()
# Move this in a seperate function? # Move this in a seperate function?
for action, agent in zip(actions, self._agents): for action, agent in zip(actions, self._agents):
agent.clear_temp_sate() agent.clear_temp_sate()
action_name = self._actions[action] action_obj = self._actions[action]
if self._actions.is_moving_action(action): if self._actions.is_moving_action(action_obj):
valid = self._move_or_colide(agent, action_name) valid = self._move_or_colide(agent, action_obj)
elif self._actions.is_no_op(action): elif self._actions.is_no_op(action_obj):
valid = c.VALID.value valid = c.VALID.value
elif self._actions.is_door_usage(action): elif self._actions.is_door_usage(action_obj):
# Check if agent really is standing on a door: valid = self._handle_door_interaction(agent)
if self.doors_have_area:
door = self._doors.get_near_position(agent.pos)
else:
door = self._doors.by_pos(agent.pos)
if door is not None:
door.use()
valid = c.VALID.value
# When he doesn't...
else:
valid = c.NOT_VALID.value
else: else:
valid = self.do_additional_actions(agent, action) valid = self.do_additional_actions(agent, action_obj)
agent.temp_action = action assert valid is not None, 'This should not happen, every Action musst be detected correctly!'
agent.temp_action = action_obj
agent.temp_valid = valid agent.temp_valid = valid
# In-between step Hook for later use # In-between step Hook for later use
@ -275,12 +228,25 @@ class BaseFactory(gym.Env):
info.update(self._summarize_state()) info.update(self._summarize_state())
# Post step Hook for later use # Post step Hook for later use
info.update(self.post_step()) info.update(self.hook_post_step())
obs = self._get_observations() obs = self._get_observations()
return obs, reward, done, info return obs, reward, done, info
def _handle_door_interaction(self, agent):
# Check if agent really is standing on a door:
if self.doors_have_area:
door = self._doors.get_near_position(agent.pos)
else:
door = self._doors.by_pos(agent.pos)
if door is not None:
door.use()
return c.VALID.value
# When he doesn't...
else:
return c.NOT_VALID.value
def _flush_state(self): def _flush_state(self):
self._obs_cube[np.arange(len(self._slices)) != self._slices.get_idx(c.LEVEL)] = c.FREE_CELL.value self._obs_cube[np.arange(len(self._slices)) != self._slices.get_idx(c.LEVEL)] = c.FREE_CELL.value
if self.parse_doors: if self.parse_doors:
@ -291,7 +257,7 @@ class BaseFactory(gym.Env):
self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.CLOSED_DOOR.value self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.CLOSED_DOOR.value
for agent in self._agents: for agent in self._agents:
self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.pos] = c.OCCUPIED_CELL.value self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.pos] = c.OCCUPIED_CELL.value
if agent.last_pos != h.NO_POS: if agent.last_pos != c.NO_POS:
self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.last_pos] = c.FREE_CELL.value self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.last_pos] = c.FREE_CELL.value
def _get_observations(self) -> np.ndarray: def _get_observations(self) -> np.ndarray:
@ -318,8 +284,8 @@ class BaseFactory(gym.Env):
obs = self._obs_cube obs = self._obs_cube
if self.cast_shadows: if self.cast_shadows:
obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx, slice obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx, obs_slice
in enumerate(self._slices) if slice.is_blocking_light] in enumerate(self._slices) if obs_slice.is_blocking_light]
door_shadowing = False door_shadowing = False
if door := self._doors.by_pos(agent.pos): if door := self._doors.by_pos(agent.pos):
if door.is_closed: if door.is_closed:
@ -332,6 +298,7 @@ class BaseFactory(gym.Env):
xs, ys = zip(*blocking) xs, ys = zip(*blocking)
else: else:
xs, ys = zip(*group) xs, ys = zip(*group)
# noinspection PyTypeChecker
obs_block_light[self._slices.get_idx(c.LEVEL)][xs, ys] = False obs_block_light[self._slices.get_idx(c.LEVEL)][xs, ys] = False
light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int)) light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int))
@ -340,9 +307,14 @@ class BaseFactory(gym.Env):
else: else:
light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape)) light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
if door_shadowing: if door_shadowing:
# noinspection PyUnboundLocalVariable
light_block_map[xs, ys] = 0 light_block_map[xs, ys] = 0
agent.temp_light_map = light_block_map agent.temp_light_map = light_block_map
obs = (obs * light_block_map) - ((1 - light_block_map) * obs[self._slices.get_idx(c.LEVEL)]) for obs_idx in range(obs.shape[0]):
if self._slices[obs_idx].can_be_shadowed:
obs[obs_idx] = (obs[obs_idx] * light_block_map) - (
(1 - light_block_map) * obs[self._slices.get_idx(c.LEVEL)]
)
if self.combin_agent_slices_in_obs and self.n_agents > 1: if self.combin_agent_slices_in_obs and self.n_agents > 1:
agent_obs = np.sum(obs[[key for key, l_slice in self._slices.items() if c.AGENT.name in l_slice.name and agent_obs = np.sum(obs[[key for key, l_slice in self._slices.items() if c.AGENT.name in l_slice.name and
@ -357,9 +329,6 @@ class BaseFactory(gym.Env):
else: else:
return obs return obs
def do_additional_actions(self, agent: Agent, action: int) -> bool:
raise NotImplementedError
def get_all_tiles_with_collisions(self) -> List[Tile]: def get_all_tiles_with_collisions(self) -> List[Tile]:
tiles_with_collisions = list() tiles_with_collisions = list()
for tile in self._tiles: for tile in self._tiles:
@ -392,7 +361,7 @@ class BaseFactory(gym.Env):
valid = c.VALID valid = c.VALID
return tile, valid return tile, valid
if self.parse_doors and agent.last_pos != h.NO_POS: if self.parse_doors and agent.last_pos != c.NO_POS:
if door := self._doors.by_pos(new_tile.pos): if door := self._doors.by_pos(new_tile.pos):
if door.can_collide: if door.can_collide:
return agent.tile, c.NOT_VALID return agent.tile, c.NOT_VALID
@ -416,10 +385,63 @@ class BaseFactory(gym.Env):
def calculate_reward(self) -> (int, dict): def calculate_reward(self) -> (int, dict):
# Returns: Reward, Info # Returns: Reward, Info
raise NotImplementedError info_dict = dict()
reward = 0
for agent in self._agents:
if self._actions.is_moving_action(agent.temp_action):
if agent.temp_valid:
# info_dict.update(movement=1)
reward -= 0.00
else:
# self.print('collision')
reward -= 0.01
self.print(f'{agent.name} just hit the wall at {agent.pos}.')
info_dict.update({f'{agent.name}_vs_LEVEL': 1})
elif self._actions.is_door_usage(agent.temp_action):
if agent.temp_valid:
self.print(f'{agent.name} did just use the door at {agent.pos}.')
info_dict.update(door_used=1)
else:
reward -= 0.01
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_door_open': 1})
elif self._actions.is_no_op(agent.temp_action):
info_dict.update(no_op=1)
reward -= 0.00
additional_reward, additional_info_dict = self.calculate_additional_reward(agent)
reward += additional_reward
info_dict.update(additional_info_dict)
for other_agent in agent.temp_collisions:
info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1})
self.print(f"reward is {reward}")
return reward, info_dict
def render(self, mode='human'): def render(self, mode='human'):
raise NotImplementedError if not self._renderer: # lazy init
height, width = self._obs_cube.shape[1:]
self._renderer = Renderer(width, height, view_radius=self.pomdp_r, fps=5)
walls = [RenderEntity('wall', pos)
for pos in np.argwhere(self._slices.by_enum(c.LEVEL).slice == c.OCCUPIED_CELL.value)]
agents = []
for i, agent in enumerate(self._agents):
name, state = h.asset_str(agent)
agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.temp_light_map))
doors = []
if self.parse_doors:
for i, door in enumerate(self._doors):
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
additional_assets = self.render_additional_assets()
self._renderer.render(walls + doors + additional_assets + agents)
def save_params(self, filepath: Path): def save_params(self, filepath: Path):
# noinspection PyProtectedMember # noinspection PyProtectedMember
@ -440,3 +462,66 @@ class BaseFactory(gym.Env):
def print(self, string): def print(self, string):
if self.verbose: if self.verbose:
print(string) print(string)
# Properties which are called by the base class to extend beyond attributes of the base class
@property
def additional_actions(self) -> Union[Action, List[Action]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A list of Actions-object holding all additional actions.
:rtype: List[Action]
"""
return []
@property
def additional_entities(self) -> Union[Entities, List[Entities]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A single Entites collection or a list of such.
:rtype: Union[Entities, List[Entities]]
"""
return []
@property
def additional_slices(self) -> Union[Slice, List[Slice]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A list of Slice-objects.
:rtype: List[Slice]
"""
return []
# Functions which provide additions to functions of the base class
# Always call super!!!!!!
@abc.abstractmethod
def do_additional_reset(self) -> None:
pass
@abc.abstractmethod
def do_additional_step(self) -> dict:
return {}
@abc.abstractmethod
def do_additional_actions(self, agent: Agent, action: int) -> Union[None, bool]:
return None
@abc.abstractmethod
def calculate_additional_reward(self, agent: Agent) -> (int, dict):
return 0, {}
@abc.abstractmethod
def render_additional_assets(self):
return []
# Hooks for in between operations.
# Always call super!!!!!!
@abc.abstractmethod
def hook_pre_step(self) -> None:
pass
@abc.abstractmethod
def hook_post_step(self) -> dict:
return {}

View File

@ -1,8 +1,5 @@
import itertools
import networkx as nx import networkx as nx
import numpy as np import numpy as np
from environments import helpers as h
from environments.helpers import Constants as c from environments.helpers import Constants as c
import itertools import itertools
@ -16,35 +13,32 @@ class Object:
def __bool__(self): def __bool__(self):
return True return True
@property
def i(self):
return self._identifier
@property @property
def name(self): def name(self):
return self._identifier return self._name
def __init__(self, identifier, **kwargs): def __init__(self, name, name_is_identifier=False, **kwargs):
self._identifier = identifier name = name.name if hasattr(name, 'name') else name
self._name = f'{self.__class__.__name__}#{name}' if name_is_identifier else name
if kwargs: if kwargs:
print(f'Following kwargs were passed, but ignored: {kwargs}') print(f'Following kwargs were passed, but ignored: {kwargs}')
def __repr__(self): def __repr__(self):
return f'{self.__class__.__name__}({self._identifier})' return f'{self.__class__.__name__}({self.name})'
class Action(Object): class Action(Object):
@property
def name(self):
return self.i
def __init__(self, *args): def __init__(self, *args):
super(Action, self).__init__(*args) super(Action, self).__init__(*args)
class Slice(Object): class Slice(Object):
@property
def is_observable(self):
return self._is_observable
@property @property
def shape(self): def shape(self):
return self.slice.shape return self.slice.shape
@ -57,10 +51,16 @@ class Slice(Object):
def free_tiles(self): def free_tiles(self):
return np.argwhere(self.slice == c.FREE_CELL.value) return np.argwhere(self.slice == c.FREE_CELL.value)
def __init__(self, identifier, arrayslice, is_blocking_light=False): def __init__(self, identifier, arrayslice, is_blocking_light=False, can_be_shadowed=True, is_observable=True):
super(Slice, self).__init__(identifier) super(Slice, self).__init__(identifier)
self.slice = arrayslice self.slice = arrayslice
self.is_blocking_light = is_blocking_light self.is_blocking_light = is_blocking_light
self.can_be_shadowed = can_be_shadowed
self._is_observable = is_observable
def set_slice(self, new_slice: np.ndarray):
assert self.slice.shape == new_slice.shape
self.slice = new_slice
class Wall(Object): class Wall(Object):
@ -89,8 +89,8 @@ class Tile(Object):
def pos(self): def pos(self):
return self._pos return self._pos
def __init__(self, i, pos): def __init__(self, i, pos, **kwargs):
super(Tile, self).__init__(i) super(Tile, self).__init__(i, **kwargs)
self._guests = dict() self._guests = dict()
self._pos = tuple(pos) self._pos = tuple(pos)
@ -164,7 +164,7 @@ class MoveableEntity(Entity):
if self._last_tile: if self._last_tile:
return self._last_tile.pos return self._last_tile.pos
else: else:
return h.NO_POS return c.NO_POS
@property @property
def direction_of_view(self): def direction_of_view(self):
@ -206,8 +206,8 @@ class Door(Entity):
return [node for node in self.connectivity.nodes return [node for node in self.connectivity.nodes
if node not in range(len(self.connectivity_subgroups)) and node != self.pos] if node not in range(len(self.connectivity_subgroups)) and node != self.pos]
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False): def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False, **kwargs):
super(Door, self).__init__(*args) super(Door, self).__init__(*args, **kwargs)
self._state = c.CLOSED_DOOR self._state = c.CLOSED_DOOR
self.has_area = has_area self.has_area = has_area
self.auto_close_interval = auto_close_interval self.auto_close_interval = auto_close_interval
@ -270,8 +270,8 @@ class Door(Entity):
class Agent(MoveableEntity): class Agent(MoveableEntity):
def __init__(self, *args): def __init__(self, *args, **kwargs):
super(Agent, self).__init__(*args) super(Agent, self).__init__(*args, **kwargs)
self.clear_temp_sate() self.clear_temp_sate()
# noinspection PyAttributeOutsideInit # noinspection PyAttributeOutsideInit
@ -280,5 +280,5 @@ class Agent(MoveableEntity):
# if attr.startswith('temp'): # if attr.startswith('temp'):
self.temp_collisions = [] self.temp_collisions = []
self.temp_valid = None self.temp_valid = None
self.temp_action = -1 self.temp_action = None
self.temp_light_map = None self.temp_light_map = None

View File

@ -1,9 +1,7 @@
import itertools
import random import random
from enum import Enum from enum import Enum
from typing import List, Union from typing import List, Union
import networkx as nx
import numpy as np import numpy as np
from environments.factory.base.objects import Entity, Tile, Agent, Door, Slice, Action from environments.factory.base.objects import Entity, Tile, Agent, Door, Slice, Action
@ -16,11 +14,8 @@ class Register:
_accepted_objects = Entity _accepted_objects = Entity
@classmethod @classmethod
def from_argwhere_coordinates(cls, positions: (int, int), tiles): def from_argwhere_coordinates(cls, positions: [(int, int)], tiles):
entities = [cls._accepted_objects(i, tiles.by_pos(position)) for i, position in enumerate(positions)] return cls.from_tiles([tiles.by_pos(position) for position in positions])
registered_obj = cls()
registered_obj.register_additional_items(entities)
return registered_obj
@property @property
def name(self): def name(self):
@ -72,8 +67,8 @@ class Register:
def by_name(self, item): def by_name(self, item):
return self[self._names[item]] return self[self._names[item]]
def by_enum(self, enum: Enum): def by_enum(self, enum_obj: Enum):
return self[self._names[enum.name]] return self[self._names[enum_obj.name]]
def __repr__(self): def __repr__(self):
return f'{self.__class__.__name__}({self._register})' return f'{self.__class__.__name__}({self._register})'
@ -84,13 +79,13 @@ class Register:
def get_idx_by_name(self, item): def get_idx_by_name(self, item):
return self._names[item] return self._names[item]
def get_idx(self, enum: Enum): def get_idx(self, enum_obj: Enum):
return self._names[enum.name] return self._names[enum_obj.name]
@classmethod @classmethod
def from_tiles(cls, tiles, **kwargs): def from_tiles(cls, tiles, **kwargs):
entities = [cls._accepted_objects(f'{cls._accepted_objects.__name__.upper()}#{i}', tile, **kwargs) # objects_name = cls._accepted_objects.__name__
for i, tile in enumerate(tiles)] entities = [cls._accepted_objects(i, tile, name_is_identifier=True, **kwargs) for i, tile in enumerate(tiles)]
registered_obj = cls() registered_obj = cls()
registered_obj.register_additional_items(entities) registered_obj.register_additional_items(entities)
return registered_obj return registered_obj
@ -98,14 +93,6 @@ class Register:
class EntityRegister(Register): class EntityRegister(Register):
@classmethod
def from_argwhere_coordinates(cls, argwhere_coordinates, **kwargs):
tiles = cls()
tiles.register_additional_items(
[cls._accepted_objects(i, pos, **kwargs) for i, pos in enumerate(argwhere_coordinates)]
)
return tiles
def __init__(self): def __init__(self):
super(EntityRegister, self).__init__() super(EntityRegister, self).__init__()
self._tiles = dict() self._tiles = dict()
@ -141,6 +128,15 @@ class Entities(Register):
class FloorTiles(EntityRegister): class FloorTiles(EntityRegister):
_accepted_objects = Tile _accepted_objects = Tile
@classmethod
def from_argwhere_coordinates(cls, argwhere_coordinates):
tiles = cls()
# noinspection PyTypeChecker
tiles.register_additional_items(
[cls._accepted_objects(i, pos, name_is_identifier=True) for i, pos in enumerate(argwhere_coordinates)]
)
return tiles
@property @property
def occupied_tiles(self): def occupied_tiles(self):
tiles = [tile for tile in self if tile.is_occupied()] tiles = [tile for tile in self if tile.is_occupied()]
@ -148,7 +144,7 @@ class FloorTiles(EntityRegister):
return tiles return tiles
@property @property
def empty_tiles(self): def empty_tiles(self) -> List[Tile]:
tiles = [tile for tile in self if tile.is_empty()] tiles = [tile for tile in self if tile.is_empty()]
random.shuffle(tiles) random.shuffle(tiles)
return tiles return tiles
@ -185,6 +181,7 @@ class Actions(Register):
def movement_actions(self): def movement_actions(self):
return self._movement_actions return self._movement_actions
# noinspection PyTypeChecker
def __init__(self, movement_properties: MovementProperties, can_use_doors=False): def __init__(self, movement_properties: MovementProperties, can_use_doors=False):
self.allow_no_op = movement_properties.allow_no_op self.allow_no_op = movement_properties.allow_no_op
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
@ -193,43 +190,47 @@ class Actions(Register):
super(Actions, self).__init__() super(Actions, self).__init__()
if self.allow_square_movement: if self.allow_square_movement:
self.register_additional_items([self._accepted_objects(direction) for direction in h.MANHATTAN_MOVES]) self.register_additional_items([self._accepted_objects(direction) for direction in h.ManhattanMoves])
if self.allow_diagonal_movement: if self.allow_diagonal_movement:
self.register_additional_items([self._accepted_objects(direction) for direction in h.DIAGONAL_MOVES]) self.register_additional_items([self._accepted_objects(direction) for direction in h.DiagonalMoves])
self._movement_actions = self._register.copy() self._movement_actions = self._register.copy()
if self.can_use_doors: if self.can_use_doors:
self.register_additional_items([self._accepted_objects('use_door')]) self.register_additional_items([self._accepted_objects(h.EnvActions.USE_DOOR)])
if self.allow_no_op: if self.allow_no_op:
self.register_additional_items([self._accepted_objects('no-op')]) self.register_additional_items([self._accepted_objects(h.EnvActions.NOOP)])
def is_moving_action(self, action: Union[int]): def is_moving_action(self, action: Union[int]):
#if isinstance(action, Action): return action in self.movement_actions.values()
# return (action.name in h.MANHATTAN_MOVES and self.allow_square_movement) or \
# (action.name in h.DIAGONAL_MOVES and self.allow_diagonal_movement)
#else:
return action in self.movement_actions.keys()
def is_no_op(self, action: Union[str, int]): def is_no_op(self, action: Union[str, Action, int]):
if isinstance(action, str): if isinstance(action, int):
action = self.by_name(action) action = self[action]
return self[action].name == 'no-op' if isinstance(action, Action):
action = action.name
return action == h.EnvActions.NOOP.name
def is_door_usage(self, action: Union[str, int]): def is_door_usage(self, action: Union[str, int]):
if isinstance(action, str): if isinstance(action, int):
action = self.by_name(action) action = self[action]
return self[action].name == 'use_door' if isinstance(action, Action):
action = action.name
return action == h.EnvActions.USE_DOOR.name
class StateSlices(Register): class StateSlices(Register):
_accepted_objects = Slice _accepted_objects = Slice
@property
def n_observable_slices(self):
return len([x for x in self if x.is_observable])
@property @property
def AGENTSTARTIDX(self): def AGENTSTARTIDX(self):
if self._agent_start_idx: if self._agent_start_idx:
return self._agent_start_idx return self._agent_start_idx
else: else:
self._agent_start_idx = min([idx for idx, x in self.items() if c.AGENT.name in x.name]) self._agent_start_idx = min([idx for idx, x in self.items() if c.AGENT.value in x.name])
return self._agent_start_idx return self._agent_start_idx
def __init__(self): def __init__(self):

View File

@ -0,0 +1,229 @@
import time
from collections import deque
from enum import Enum
from typing import List, Union, NamedTuple
import numpy as np
from environments.factory.simple_factory import SimpleFactory
from environments.helpers import Constants as c
from environments import helpers as h
from environments.factory.base.objects import Agent, Slice, Entity, Action
from environments.factory.base.registers import Entities
from environments.factory.renderer import RenderEntity
PICK_UP = 'pick_up'
DROP_OFF = 'drop_off'
NO_ITEM = 0
ITEM_DROP_OFF = -1
def inventory_slice_name(agent_i):
if isinstance(agent_i, int):
return f'{c.INVENTORY.name}_{c.AGENT.value}#{agent_i}'
else:
return f'{c.INVENTORY.name}_{agent_i}'
class DropOffLocation(Entity):
def __init__(self, *args, storage_size_until_full: int = 5, **kwargs):
super(DropOffLocation, self).__init__(DROP_OFF, *args, **kwargs)
self.storage = deque(maxlen=storage_size_until_full)
def place_item(self, item):
self.storage.append(item)
return True
@property
def is_full(self):
return self.storage.maxlen == len(self.storage)
class ItemProperties(NamedTuple):
n_items: int = 1 # How many items are there at the same time
spawn_frequency: int = 5 # Spawn Frequency in Steps
max_dropoff_storage_size: int = 5 # How many items are needed until the drop off is full
max_agent_storage_size: int = 5 # How many items are needed until the agent inventory is full
agent_can_interact: bool = True # Whether agents have the possibility to interact with the domain items
# noinspection PyAttributeOutsideInit,PyUnresolvedReferences
class DoubleTaskFactory(SimpleFactory):
# noinspection PyMissingConstructor
def __init__(self, item_properties: ItemProperties, *args, with_dirt=False, env_seed=time.time_ns(), **kwargs):
self.item_properties = item_properties
kwargs.update(env_seed=env_seed)
self._item_rng = np.random.default_rng(env_seed)
assert item_properties.n_items < kwargs.get('pomdp_r', 0) ** 2 or not kwargs.get('pomdp_r', 0)
self._super = self.__class__ if with_dirt else SimpleFactory
super(self._super, self).__init__(*args, **kwargs)
@property
def additional_actions(self) -> Union[Action, List[Action]]:
super_actions = super(self._super, self).additional_actions
super_actions.append(Action(h.EnvActions.ITEM_ACTION))
return super_actions
@property
def additional_entities(self) -> Union[Entities, List[Entities]]:
super_entities = super(self._super, self).additional_entities
return super_entities
@property
def additional_slices(self) -> Union[Slice, List[Slice]]:
super_slices = super(self._super, self).additional_slices
super_slices.append(Slice(c.ITEM, np.zeros(self._level_shape)))
super_slices.extend([Slice(inventory_slice_name(agent_i), np.zeros(self._level_shape), can_be_shadowed=False)
for agent_i in range(self.n_agents)])
return super_slices
def _flush_state(self):
super(self._super, self)._flush_state()
# Flush environmental item state
slice_idx = self._slices.get_idx(c.ITEM)
self._obs_cube[slice_idx] = self._slices[slice_idx].slice
# Flush per agent inventory state
for agent in self._agents:
agent_slice_idx = self._slices.get_idx_by_name(inventory_slice_name(agent.name))
self._slices[agent_slice_idx].slice[:] = 0
if len(agent.inventory) > 0:
max_x = self.pomdp_r if self.pomdp_r else self._level_shape[0]
x, y = (0, 0) if not self.pomdp_r else (max(agent.x - max_x, 0), max(agent.y - max_x, 0))
for item in agent.inventory:
x_diff, y_diff = divmod(item, max_x)
self._slices[agent_slice_idx].slice[int(x+x_diff), int(y+y_diff)] = item
self._obs_cube[agent_slice_idx] = self._slices[agent_slice_idx].slice
def _is_item_action(self, action):
if isinstance(action, int):
action = self._actions[action]
if isinstance(action, Action):
action = action.name
return action == h.EnvActions.ITEM_ACTION.name
def do_item_action(self, agent: Agent):
item_slice = self._slices.by_enum(c.ITEM).slice
if item := item_slice[agent.pos]:
if item == ITEM_DROP_OFF:
if agent.inventory:
valid = self._item_drop_off.place_item(agent.inventory.pop(0))
return valid
else:
return c.NOT_VALID
elif item != NO_ITEM:
if len(agent.inventory) < self.item_properties.max_agent_storage_size:
agent.inventory.append(item_slice[agent.pos])
item_slice[agent.pos] = NO_ITEM
else:
return c.NOT_VALID
return c.VALID
else:
return c.NOT_VALID
def do_additional_actions(self, agent: Agent, action: int) -> Union[None, bool]:
valid = super(self._super, self).do_additional_actions(agent, action)
if valid is None:
if self._is_item_action(action):
if self.item_properties.agent_can_interact:
valid = self.do_item_action(agent)
return bool(valid)
else:
return False
else:
return None
else:
return valid
def do_additional_reset(self) -> None:
super(self._super, self).do_additional_reset()
self.spawn_drop_off_location()
self.spawn_items(self.item_properties.n_items)
self._next_item_spawn = self.item_properties.spawn_frequency
for agent in self._agents:
agent.inventory = list()
def do_additional_step(self) -> dict:
info_dict = super(self._super, self).do_additional_step()
if not self._next_item_spawn:
if item_to_spawn := (self.item_properties.n_items -
(np.sum(self._slices.by_enum(c.ITEM).slice.astype(bool)) - 1)):
self.spawn_items(item_to_spawn)
self._next_item_spawn = self.item_properties.spawn_frequency
else:
self.print('No Items are spawning, limit is reached.')
else:
self._next_item_spawn -= 1
return info_dict
def spawn_drop_off_location(self):
single_empty_tile = self._tiles.empty_tiles[0]
self._item_drop_off = DropOffLocation(single_empty_tile,
storage_size_until_full=self.item_properties.max_dropoff_storage_size)
single_empty_tile.enter(self._item_drop_off)
self._slices.by_enum(c.ITEM).slice[single_empty_tile.pos] = ITEM_DROP_OFF
def calculate_additional_reward(self, agent: Agent) -> (int, dict):
reward, info_dict = super(self._super, self).calculate_additional_reward(agent)
if self._is_item_action(agent.temp_action):
if agent.temp_valid:
if agent.pos == self._item_drop_off.pos:
info_dict.update({f'{agent.name}_item_dropoff': 1})
reward += 1
else:
info_dict.update({f'{agent.name}_item_pickup': 1})
reward += 0.1
else:
info_dict.update({f'{agent.name}_failed_item_action': 1})
reward -= 0.1
return reward, info_dict
def render_additional_assets(self, mode='human'):
additional_assets = super(self._super, self).render_additional_assets()
item_slice = self._slices.by_enum(c.ITEM).slice
items = [RenderEntity(DROP_OFF if item_slice[tile.pos] == ITEM_DROP_OFF else c.ITEM.value, tile.pos)
for tile in [tile for tile in self._tiles if item_slice[tile.pos] != NO_ITEM]]
additional_assets.extend(items)
return additional_assets
def spawn_items(self, n_items):
tiles = self._tiles.empty_tiles[:n_items]
item_slice = self._slices.by_enum(c.ITEM).slice
for idx, tile in enumerate(tiles, start=1):
item_slice[tile.pos] = idx
pass
if __name__ == '__main__':
import random
render = True
item_props = ItemProperties()
factory = DoubleTaskFactory(item_props, n_agents=1, done_at_collision=False, frames_to_stack=0,
level_name='rooms', max_steps=400,
omit_agent_slice_in_obs=True, parse_doors=True, pomdp_r=3,
record_episodes=False, verbose=False
)
n_actions = factory.action_space.n - 1
_ = factory.observation_space
for epoch in range(100):
random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)]
env_state = factory.reset()
rew = 0
for agent_i_action in random_actions:
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
rew += step_r
if render:
factory.render()
if done_bool:
break
print(f'Factory run {epoch} done, reward is:\n {rew}')

View File

@ -1,115 +0,0 @@
import time
from collections import deque
from typing import List, Union, NamedTuple
import numpy as np
from environments.helpers import Constants as c
from environments import helpers as h
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action, Object, Slice, Entity
from environments.factory.base.registers import Entities
from environments.factory.renderer import Renderer
from environments.utility_classes import MovementProperties
ITEM = 'item'
INVENTORY = 'inventory'
PICK_UP = 'pick_up'
DROP_DOWN = 'drop_down'
ITEM_ACTION = 'item_action'
NO_ITEM = 0
ITEM_DROP_OFF = -1
def inventory_slice_name(agent):
return f'{agent.name}_{INVENTORY}'
class DropOffLocation(Entity):
def __init__(self, *args, storage_size_until_full: int = 5, **kwargs):
super(DropOffLocation, self).__init__(*args, **kwargs)
self.storage = deque(maxlen=storage_size_until_full)
def place_item(self, item):
self.storage.append(item)
return True
@property
def is_full(self):
return self.storage.maxlen == len(self.storage)
class ItemProperties(NamedTuple):
n_items: int = 1 # How many items are there at the same time
spawn_frequency: int = 5 # Spawn Frequency in Steps
max_dropoff_storage_size: int = 5 # How many items are needed until the drop off is full
max_agent_storage_size: int = 5 # How many items are needed until the agent inventory is full
# noinspection PyAttributeOutsideInit
class ItemFactory(BaseFactory):
def __init__(self, item_properties: ItemProperties, *args, **kwargs):
self.item_properties = item_properties
self._item_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns()))
super(ItemFactory, self).__init__(*args, **kwargs)
@property
def additional_actions(self) -> Union[str, List[str]]:
return [ITEM_ACTION]
@property
def additional_entities(self) -> Union[Entities, List[Entities]]:
return []
@property
def additional_slices(self) -> Union[Slice, List[Slice]]:
return [Slice(ITEM, np.zeros(self._level_shape))] + [
Slice(inventory_slice_name(agent), np.zeros(self._level_shape)) for agent in self._agents]
def _is_item_action(self, action):
if isinstance(action, str):
action = self._actions.by_name(action)
return self._actions[action].name == ITEM_ACTION
def do_item_action(self, agent):
item_slice = self._slices.by_name(ITEM).slice
inventory_slice = self._slices.by_name(inventory_slice_name(agent)).slice
if item := item_slice[agent.pos]:
if item == ITEM_DROP_OFF:
valid = self._item_drop_off.place_item(inventory_slice.sum())
item_slice[agent.pos] = NO_ITEM
return True
else:
return False
def do_additional_actions(self, agent: Agent, action: int) -> bool:
if self._is_item_action(action):
valid = self.do_item_action(agent)
return valid
else:
raise RuntimeError('This should not happen!!!')
def do_additional_reset(self) -> None:
self.spawn_drop_off_location()
self.spawn_items(self.n_items)
if self.n_items > 1:
self._next_item_spawn = self.item_properties.spawn_frequency
def spawn_drop_off_location(self):
single_empty_tile = self._tiles.empty_tiles[0]
self._item_drop_off = DropOffLocation(storage_size_until_full=self.item_properties.max_dropoff_storage_size)
def calculate_reward(self) -> (int, dict):
pass
def render(self, mode='human'):
pass

View File

@ -8,7 +8,7 @@ from typing import NamedTuple, Any
import time import time
class Entity(NamedTuple): class RenderEntity(NamedTuple):
name: str name: str
pos: np.array pos: np.array
value: float = 1 value: float = 1
@ -108,7 +108,7 @@ class Renderer:
blits.extendleft(vis_rects) blits.extendleft(vis_rects)
if entity.state != 'blank': if entity.state != 'blank':
agent_state_blits = self.blit_params( agent_state_blits = self.blit_params(
Entity(entity.state, (entity.pos[0]+0.12, entity.pos[1]), 0.48, 'scale') RenderEntity(entity.state, (entity.pos[0] + 0.12, entity.pos[1]), 0.48, 'scale')
) )
textsurface = self.font.render(str(entity.id), False, (0, 0, 0)) textsurface = self.font.render(str(entity.id), False, (0, 0, 0))
text_blit = dict(source=textsurface, dest=(bp['dest'].center[0]-.07*self.cell_size, text_blit = dict(source=textsurface, dest=(bp['dest'].center[0]-.07*self.cell_size,
@ -125,6 +125,6 @@ class Renderer:
if __name__ == '__main__': if __name__ == '__main__':
renderer = Renderer(fps=2, cell_size=40) renderer = Renderer(fps=2, cell_size=40)
for i in range(15): for i in range(15):
entity_1 = Entity('agent', [5, i], 1, 'idle', 'idle') entity_1 = RenderEntity('agent', [5, i], 1, 'idle', 'idle')
renderer.render([entity_1]) renderer.render([entity_1])

View File

@ -1,4 +1,5 @@
import time import time
from enum import Enum
from typing import List, Union, NamedTuple from typing import List, Union, NamedTuple
import random import random
@ -7,24 +8,32 @@ import numpy as np
from environments.helpers import Constants as c from environments.helpers import Constants as c
from environments import helpers as h from environments import helpers as h
from environments.factory.base.base_factory import BaseFactory from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action, Object, Slice from environments.factory.base.objects import Agent, Action, Slice
from environments.factory.base.registers import Entities from environments.factory.base.registers import Entities
from environments.factory.renderer import Renderer, Entity from environments.factory.renderer import RenderEntity
from environments.utility_classes import MovementProperties from environments.utility_classes import MovementProperties
DIRT = "dirt"
CLEAN_UP_ACTION = 'clean_up' CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP
class ObsSlice(Enum):
OWN = -1
LEVEL = c.LEVEL.value
AGENT = c.AGENT.value
class DirtProperties(NamedTuple): class DirtProperties(NamedTuple):
clean_amount: int = 1 # How much does the robot clean with one actions. clean_amount: int = 1 # How much does the robot clean with one actions.
max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent. max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent.
gain_amount: float = 0.3 # How much dirt does spawn per tile gain_amount: float = 0.3 # How much dirt does spawn per tile.
spawn_frequency: int = 5 # Spawn Frequency in Steps spawn_frequency: int = 5 # Spawn Frequency in Steps.
max_local_amount: int = 2 # Max dirt amount per tile. max_local_amount: int = 2 # Max dirt amount per tile.
max_global_amount: int = 20 # Max dirt amount in the whole environment. max_global_amount: int = 20 # Max dirt amount in the whole environment.
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place.
agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment.
on_obs_slice: Enum = ObsSlice.LEVEL
def softmax(x): def softmax(x):
@ -41,69 +50,50 @@ def entropy(x):
class SimpleFactory(BaseFactory): class SimpleFactory(BaseFactory):
@property @property
def additional_actions(self) -> List[Object]: def additional_actions(self) -> Union[Action, List[Action]]:
return [Action(CLEAN_UP_ACTION)] super_actions = super(SimpleFactory, self).additional_actions
if self.dirt_properties.agent_can_interact:
super_actions.append(Action(CLEAN_UP_ACTION))
return super_actions
@property @property
def additional_entities(self) -> Union[Entities, List[Entities]]: def additional_entities(self) -> Union[Entities, List[Entities]]:
return [] super_entities = super(SimpleFactory, self).additional_entities
return super_entities
@property @property
def additional_slices(self) -> List[Slice]: def additional_slices(self) -> List[Slice]:
return [Slice('dirt', np.zeros(self._level_shape))] super_slices = super(SimpleFactory, self).additional_slices
super_slices.extend([Slice(c.DIRT, np.zeros(self._level_shape))])
return super_slices
def _is_clean_up_action(self, action: Union[str, int]): def _is_clean_up_action(self, action: Union[str, Action, int]):
if isinstance(action, str): if isinstance(action, int):
action = self._actions.by_name(action) action = self._actions[action]
return self._actions[action].name == CLEAN_UP_ACTION if isinstance(action, Action):
action = action.name
return action == CLEAN_UP_ACTION.name
def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), **kwargs): def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), env_seed=time.time_ns(), **kwargs):
self.dirt_properties = dirt_properties self.dirt_properties = dirt_properties
self._renderer = None # expensive - don't use it when not required ! self._dirt_rng = np.random.default_rng(env_seed)
self._dirt_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns())) kwargs.update(env_seed=env_seed)
super(SimpleFactory, self).__init__(*args, **kwargs) super(SimpleFactory, self).__init__(*args, **kwargs)
def _flush_state(self): def _flush_state(self):
super(SimpleFactory, self)._flush_state() super(SimpleFactory, self)._flush_state()
self._obs_cube[self._slices.get_idx_by_name(DIRT)] = self._slices.by_name(DIRT).slice self._obs_cube[self._slices.get_idx(c.DIRT)] = self._slices.by_enum(c.DIRT).slice
def render(self, mode='human'): def render_additional_assets(self, mode='human'):
additional_assets = super(SimpleFactory, self).render_additional_assets()
if not self._renderer: # lazy init dirt_slice = self._slices.by_enum(c.DIRT).slice
height, width = self._obs_cube.shape[1:] dirt = [RenderEntity('dirt', tile.pos, min(0.15 + dirt_slice[tile.pos], 1.5), 'scale')
self._renderer = Renderer(width, height, view_radius=self.pomdp_r, fps=5)
dirt_slice = self._slices.by_name(DIRT).slice
dirt = [Entity('dirt', tile.pos, min(0.15 + dirt_slice[tile.pos], 1.5), 'scale')
for tile in [tile for tile in self._tiles if dirt_slice[tile.pos]]] for tile in [tile for tile in self._tiles if dirt_slice[tile.pos]]]
walls = [Entity('wall', pos) additional_assets.extend(dirt)
for pos in np.argwhere(self._slices.by_enum(c.LEVEL).slice == c.OCCUPIED_CELL.value)] return additional_assets
def asset_str(agent):
# What does this abonimation do?
# if any([x is None for x in [self._slices[j] for j in agent.collisions]]):
# print('error')
col_names = [x.name for x in agent.temp_collisions]
if c.AGENT.value in col_names:
return 'agent_collision', 'blank'
elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names:
return c.AGENT.value, 'invalid'
elif self._is_clean_up_action(agent.temp_action):
return c.AGENT.value, 'valid'
else:
return c.AGENT.value, 'idle'
agents = []
for i, agent in enumerate(self._agents):
name, state = asset_str(agent)
agents.append(Entity(name, agent.pos, 1, 'none', state, i+1, agent.temp_light_map))
doors = []
if self.parse_doors:
for i, door in enumerate(self._doors):
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
agents.append(Entity(name, door.pos, 1, 'none', state, i+1))
self._renderer.render(dirt+walls+agents+doors)
def spawn_dirt(self) -> None: def spawn_dirt(self) -> None:
dirt_slice = self._slices.by_name(DIRT).slice dirt_slice = self._slices.by_enum(c.DIRT).slice
# dirty_tiles = [tile for tile in self._tiles if dirt_slice[tile.pos]] # dirty_tiles = [tile for tile in self._tiles if dirt_slice[tile.pos]]
curr_dirt_amount = dirt_slice.sum() curr_dirt_amount = dirt_slice.sum()
if not curr_dirt_amount > self.dirt_properties.max_global_amount: if not curr_dirt_amount > self.dirt_properties.max_global_amount:
@ -119,7 +109,7 @@ class SimpleFactory(BaseFactory):
pass pass
def clean_up(self, agent: Agent) -> bool: def clean_up(self, agent: Agent) -> bool:
dirt_slice = self._slices.by_name(DIRT).slice dirt_slice = self._slices.by_enum(c.DIRT).slice
if old_dirt_amount := dirt_slice[agent.pos]: if old_dirt_amount := dirt_slice[agent.pos]:
new_dirt_amount = old_dirt_amount - self.dirt_properties.clean_amount new_dirt_amount = old_dirt_amount - self.dirt_properties.clean_amount
dirt_slice[agent.pos] = max(new_dirt_amount, c.FREE_CELL.value) dirt_slice[agent.pos] = max(new_dirt_amount, c.FREE_CELL.value)
@ -128,10 +118,11 @@ class SimpleFactory(BaseFactory):
return False return False
def do_additional_step(self) -> dict: def do_additional_step(self) -> dict:
info_dict = super(SimpleFactory, self).do_additional_step()
if smear_amount := self.dirt_properties.dirt_smear_amount: if smear_amount := self.dirt_properties.dirt_smear_amount:
dirt_slice = self._slices.by_name(DIRT).slice dirt_slice = self._slices.by_enum(c.DIRT).slice
for agent in self._agents: for agent in self._agents:
if agent.temp_valid and agent.last_pos != h.NO_POS: if agent.temp_valid and agent.last_pos != c.NO_POS:
if dirt := dirt_slice[agent.last_pos]: if dirt := dirt_slice[agent.last_pos]:
if smeared_dirt := round(dirt * smear_amount, 2): if smeared_dirt := round(dirt * smear_amount, 2):
dirt_slice[agent.last_pos] = max(0, dirt_slice[agent.last_pos]-smeared_dirt) dirt_slice[agent.last_pos] = max(0, dirt_slice[agent.last_pos]-smeared_dirt)
@ -144,23 +135,30 @@ class SimpleFactory(BaseFactory):
self._next_dirt_spawn = self.dirt_properties.spawn_frequency self._next_dirt_spawn = self.dirt_properties.spawn_frequency
else: else:
self._next_dirt_spawn -= 1 self._next_dirt_spawn -= 1
return {} return info_dict
def do_additional_actions(self, agent: Agent, action: int) -> bool: def do_additional_actions(self, agent: Agent, action: int) -> Union[None, bool]:
if self._is_clean_up_action(action): valid = super(SimpleFactory, self).do_additional_actions(agent, action)
valid = self.clean_up(agent) if valid is None:
return valid if self._is_clean_up_action(action):
if self.dirt_properties.agent_can_interact:
valid = self.clean_up(agent)
return valid
else:
return False
else:
return None
else: else:
return c.NOT_VALID.value return valid
def do_additional_reset(self) -> None: def do_additional_reset(self) -> None:
super(SimpleFactory, self).do_additional_reset()
self.spawn_dirt() self.spawn_dirt()
self._next_dirt_spawn = self.dirt_properties.spawn_frequency self._next_dirt_spawn = self.dirt_properties.spawn_frequency
def calculate_reward(self) -> (int, dict): def calculate_additional_reward(self, agent: Agent) -> (int, dict):
info_dict = dict() reward, info_dict = super(SimpleFactory, self).calculate_additional_reward(agent)
dirt_slice = self._slices.by_enum(c.DIRT).slice
dirt_slice = self._slices.by_name(DIRT).slice
dirty_tiles = [dirt_slice[tile.pos] for tile in self._tiles if dirt_slice[tile.pos]] dirty_tiles = [dirt_slice[tile.pos] for tile in self._tiles if dirt_slice[tile.pos]]
current_dirt_amount = sum(dirty_tiles) current_dirt_amount = sum(dirty_tiles)
dirty_tile_count = len(dirty_tiles) dirty_tile_count = len(dirty_tiles)
@ -173,56 +171,21 @@ class SimpleFactory(BaseFactory):
info_dict.update(dirty_tile_count=dirty_tile_count) info_dict.update(dirty_tile_count=dirty_tile_count)
info_dict.update(dirt_distribution_score=dirt_distribution_score) info_dict.update(dirt_distribution_score=dirt_distribution_score)
try: if agent.temp_collisions:
# penalty = current_dirt_amount self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
reward = 0
except (ZeroDivisionError, RuntimeWarning):
reward = 0
for agent in self._agents:
if agent.temp_collisions:
self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
if self._is_clean_up_action(agent.temp_action):
if agent.temp_valid:
reward += 0.5
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
info_dict.update(dirt_cleaned=1)
else:
reward -= 0.01
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
elif self._actions.is_moving_action(agent.temp_action):
if agent.temp_valid:
# info_dict.update(movement=1)
reward -= 0.00
else:
# self.print('collision')
reward -= 0.01
self.print(f'{agent.name} just hit the wall at {agent.pos}.')
info_dict.update({f'{agent.name}_vs_LEVEL': 1})
elif self._actions.is_door_usage(agent.temp_action):
if agent.temp_valid:
self.print(f'{agent.name} did just use the door at {agent.pos}.')
info_dict.update(door_used=1)
else:
reward -= 0.01
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_door_open': 1})
if self._is_clean_up_action(agent.temp_action):
if agent.temp_valid:
reward += 0.5
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
info_dict.update(dirt_cleaned=1)
else: else:
info_dict.update(no_op=1) reward -= 0.01
reward -= 0.00 self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
for other_agent in agent.temp_collisions:
info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1})
self.print(f"reward is {reward}")
# Potential based rewards -> # Potential based rewards ->
# track the last reward , minus the current reward = potential # track the last reward , minus the current reward = potential
return reward, info_dict return reward, info_dict

View File

@ -5,58 +5,76 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from pathlib import Path from pathlib import Path
# Constants
class Constants(Enum):
WALL = '#'
DOOR = 'D'
DANGER_ZONE = 'x'
LEVEL = 'level'
AGENT = 'Agent'
FREE_CELL = 0
OCCUPIED_CELL = 1
DOORS = 'doors'
CLOSED_DOOR = 1
OPEN_DOOR = -1
ACTION = auto()
COLLISIONS = auto()
VALID = True
NOT_VALID = False
def __bool__(self):
return bool(self.value)
LEVELS_DIR = 'levels' LEVELS_DIR = 'levels'
TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles'] TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles']
IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount', IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount',
'dirty_tile_count', 'terminal_observation', 'episode'] 'dirty_tile_count', 'terminal_observation', 'episode']
MANHATTAN_MOVES = ['north', 'east', 'south', 'west']
DIAGONAL_MOVES = ['north_east', 'south_east', 'south_west', 'north_west']
NO_POS = (-9999, -9999) # Constants
class Constants(Enum):
WALL = '#'
DOOR = 'D'
DANGER_ZONE = 'x'
LEVEL = 'level'
AGENT = 'Agent'
FREE_CELL = 0
OCCUPIED_CELL = 1
NO_POS = (-9999, -9999)
ACTIONMAP = defaultdict(lambda: (0, 0), dict(north=(-1, 0), east=(0, 1), DOORS = 'doors'
south=(1, 0), west=(0, -1), CLOSED_DOOR = 1
north_east=(-1, +1), south_east=(1, 1), OPEN_DOOR = -1
south_west=(+1, -1), north_west=(-1, -1)
) ACTION = auto()
COLLISIONS = auto()
VALID = True
NOT_VALID = False
# Dirt Env
DIRT = 'dirt'
# Item Env
ITEM = 'item'
INVENTORY = 'inventory'
def __bool__(self):
return bool(self.value)
class ManhattanMoves(Enum):
NORTH = 'north'
EAST = 'east'
SOUTH = 'south'
WEST = 'west'
class DiagonalMoves(Enum):
NORTHEAST = 'north_east'
SOUTHEAST = 'south_east'
SOUTHWEST = 'south_west'
NORTHWEST = 'north_west'
class EnvActions(Enum):
NOOP = 'no_op'
USE_DOOR = 'use_door'
CLEAN_UP = 'clean_up'
ITEM_ACTION = 'item_action'
d = DiagonalMoves
m = ManhattanMoves
c = Constants
ACTIONMAP = defaultdict(lambda: (0, 0), {m.NORTH.name: (-1, 0), d.NORTHEAST.name: (-1, +1),
m.EAST.name: (0, 1), d.SOUTHEAST.name: (1, 1),
m.SOUTH.name: (1, 0), d.SOUTHWEST.name: (+1, -1),
m.WEST.name: (0, -1), d.NORTHWEST.name: (-1, -1)
}
) )
HORIZONTAL_DOOR_MAP = np.asarray([[0, 0, 0], [1, 0, 1], [0, 0, 0]])
VERTICAL_DOOR_MAP = np.asarray([[0, 1, 0], [0, 0, 0], [0, 1, 0]])
HORIZONTAL_DOOR_ZONE_1 = np.asarray([[1, 1, 1], [0, 0, 0], [0, 0, 0]])
HORIZONTAL_DOOR_ZONE_2 = np.asarray([[0, 0, 0], [0, 0, 0], [1, 1, 1]])
VERTICAL_DOOR_ZONE_1 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
VERTICAL_DOOR_ZONE_2 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
# Utility functions # Utility functions
def parse_level(path): def parse_level(path):
@ -67,13 +85,13 @@ def parse_level(path):
return level return level
def one_hot_level(level, wall_char: Union[Constants, str] = Constants.WALL): def one_hot_level(level, wall_char: Union[c, str] = c.WALL):
grid = np.array(level) grid = np.array(level)
binary_grid = np.zeros(grid.shape, dtype=np.int8) binary_grid = np.zeros(grid.shape, dtype=np.int8)
if wall_char in Constants: if wall_char in c:
binary_grid[grid == wall_char.value] = Constants.OCCUPIED_CELL.value binary_grid[grid == wall_char.value] = c.OCCUPIED_CELL.value
else: else:
binary_grid[grid == wall_char] = Constants.OCCUPIED_CELL.value binary_grid[grid == wall_char] = c.OCCUPIED_CELL.value
return binary_grid return binary_grid
@ -89,7 +107,22 @@ def check_position(slice_to_check_against: np.ndarray, position_to_check: Tuple[
# Check for collision with level walls # Check for collision with level walls
valid = valid and not slice_to_check_against[x_pos, y_pos] valid = valid and not slice_to_check_against[x_pos, y_pos]
return Constants.VALID if valid else Constants.NOT_VALID return c.VALID if valid else c.NOT_VALID
def asset_str(agent):
# What does this abonimation do?
# if any([x is None for x in [self._slices[j] for j in agent.collisions]]):
# print('error')
col_names = [x.name for x in agent.temp_collisions]
if c.AGENT.value in col_names:
return 'agent_collision', 'blank'
elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names:
return c.AGENT.value, 'invalid'
elif agent.temp_valid:
return c.AGENT.value, 'valid'
else:
return c.AGENT.value, 'idle'
if __name__ == '__main__': if __name__ == '__main__':

18
main.py
View File

@ -9,6 +9,7 @@ import pandas as pd
from stable_baselines3.common.callbacks import CallbackList from stable_baselines3.common.callbacks import CallbackList
from environments.factory.double_task_factory import DoubleTaskFactory, ItemProperties
from environments.factory.simple_factory import DirtProperties, SimpleFactory from environments.factory.simple_factory import DirtProperties, SimpleFactory
from environments.helpers import IGNORED_DF_COLUMNS from environments.helpers import IGNORED_DF_COLUMNS
from environments.logging.monitor import MonitorCallback from environments.logging.monitor import MonitorCallback
@ -94,11 +95,12 @@ if __name__ == '__main__':
dirt_props = DirtProperties(clean_amount=1, gain_amount=0.1, max_global_amount=20, dirt_props = DirtProperties(clean_amount=1, gain_amount=0.1, max_global_amount=20,
max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05, max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05,
dirt_smear_amount=0.0) dirt_smear_amount=0.0, agent_can_interact=False)
item_props = ItemProperties(n_items=5, agent_can_interact=True)
move_props = MovementProperties(allow_diagonal_movement=True, move_props = MovementProperties(allow_diagonal_movement=True,
allow_square_movement=True, allow_square_movement=True,
allow_no_op=False) allow_no_op=False)
train_steps = 2.5e6 train_steps = 6e5
time_stamp = int(time.time()) time_stamp = int(time.time())
out_path = None out_path = None
@ -106,11 +108,13 @@ if __name__ == '__main__':
for modeL_type in [A2C, PPO, DQN]: # ,RegDQN, QRDQN]: for modeL_type in [A2C, PPO, DQN]: # ,RegDQN, QRDQN]:
for seed in range(3): for seed in range(3):
with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=True, with DoubleTaskFactory(n_agents=1, with_dirt=False,
movement_properties=move_props, level_name='rooms', frames_to_stack=3, item_properties=item_props, dirt_properties=None, movement_properties=move_props,
omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False, pomdp_radius=2, max_steps=500, parse_doors=True,
cast_shadows=True, doors_have_area=False, seed=seed level_name='rooms', frames_to_stack=3,
) as env: omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False,
cast_shadows=True, doors_have_area=False, seed=seed
) as env:
if modeL_type.__name__ in ["PPO", "A2C"]: if modeL_type.__name__ in ["PPO", "A2C"]:
kwargs = dict(ent_coef=0.01) kwargs = dict(ent_coef=0.01)

View File

@ -27,4 +27,6 @@ gym~=0.18.0
PyYAML~=5.3.1 PyYAML~=5.3.1
pyglet~=1.5.0 pyglet~=1.5.0
optuna~=2.7.0 optuna~=2.7.0
natsort~=7.1.1 natsort~=7.1.1
tqdm~=4.60.0
networkx~=2.6.1