Items and combination of item and dirt

This commit is contained in:
Steffen Illium
2021-08-23 09:51:35 +02:00
parent 244d4eed68
commit d5e4d44823
12 changed files with 647 additions and 445 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.0 KiB

View File

@ -1,3 +1,4 @@
import abc
import time
from pathlib import Path
from typing import List, Union, Iterable
@ -10,6 +11,7 @@ import yaml
from gym.wrappers import FrameStack
from environments.factory.base.shadow_casting import Map
from environments.factory.renderer import Renderer, RenderEntity
from environments.helpers import Constants as c, Constants
from environments import helpers as h
from environments.factory.base.objects import Slice, Agent, Tile, Action
@ -28,20 +30,7 @@ class BaseFactory(gym.Env):
@property
def observation_space(self):
if self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs:
if self.n_agents > 1:
slices = self._slices.n - (self._agents.n - 1)
else:
slices = self._slices.n - 1
elif self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs:
slices = self._slices.n - (self._agents.n - 1)
elif not self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs:
slices = self._slices.n - self._agents.n
elif not self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs:
slices = self._slices.n
else:
raise RuntimeError('This should not happen!')
slices = self._slices.n_observable_slices
level_shape = (self.pomdp_r * 2 + 1, self.pomdp_r * 2 + 1) if self.pomdp_r else self._level_shape
space = spaces.Box(low=0, high=1, shape=(slices, *level_shape), dtype=np.float32)
return space
@ -54,36 +43,6 @@ class BaseFactory(gym.Env):
def movement_actions(self):
return self._actions.movement_actions
@property
def additional_actions(self) -> Union[str, List[str]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A list of Actions-object holding all additional actions.
:rtype: List[Action]
"""
raise NotImplementedError('Please register additional actions ')
@property
def additional_entities(self) -> Union[Entities, List[Entities]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A single Entites collection or a list of such.
:rtype: Union[Entities, List[Entities]]
"""
raise NotImplementedError('Please register additional entities.')
@property
def additional_slices(self) -> Union[Slice, List[Slice]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A list of Slice-objects.
:rtype: List[Slice]
"""
raise NotImplementedError('Please register additional slices.')
def __enter__(self):
return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
@ -94,17 +53,20 @@ class BaseFactory(gym.Env):
movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False,
omit_agent_slice_in_obs=False, done_at_collision=False, cast_shadows=True,
verbose=False, doors_have_area=True, **kwargs):
verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs):
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
# Attribute Assignment
self._base_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns()))
self.env_seed = env_seed
self._base_rng = np.random.default_rng(self.env_seed)
self.movement_properties = movement_properties
self.level_name = level_name
self._level_shape = None
self.verbose = verbose
self._renderer = None # expensive - don't use it when not required !
self.n_agents = n_agents
self.max_steps = max_steps
self.pomdp_r = pomdp_r
self.combin_agent_slices_in_obs = combin_agent_slices_in_obs
@ -132,25 +94,37 @@ class BaseFactory(gym.Env):
# Level
level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt'
parsed_level = h.parse_level(level_filepath)
level = [Slice(c.LEVEL.name, h.one_hot_level(parsed_level), is_blocking_light=True)]
level = [Slice(c.LEVEL, h.one_hot_level(parsed_level), is_blocking_light=True)]
self._level_shape = level[0].shape
# Doors
parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
if parsed_doors.any():
doors = [Slice(c.DOORS.name, parsed_doors, is_blocking_light=True)]
doors = [Slice(c.DOORS, parsed_doors, is_blocking_light=True)]
else:
doors = []
# Agents
agents = []
for i in range(self.n_agents):
agents.append(Slice(f'{c.AGENT.name}#{i}', np.zeros_like(level[0].slice, dtype=np.float32)))
state_slices.register_additional_items(level+doors+agents)
agent_names = [f'{c.AGENT.value}#{i}' for i in range(self.n_agents)]
# Additional Slices from SubDomains
if additional_slices := self.additional_slices:
state_slices.register_additional_items(additional_slices)
if self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs:
if self.n_agents == 1:
observables = [False]
else:
observables = [True] + ([False] * (self.n_agents - 1))
elif self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs:
observables = [True] + ([False] * (self.n_agents - 1))
elif not self.combin_agent_slices_in_obs and self.omit_agent_slice_in_obs:
observables = [False] + ([True] * (self.n_agents - 1))
elif not self.combin_agent_slices_in_obs and not self.omit_agent_slice_in_obs:
observables = [True] * self.n_agents
else:
raise RuntimeError('This should not happen!')
for observable, agent_name in zip(observables, agent_names):
agents.append(Slice(agent_name, np.zeros_like(level[0].slice, dtype=np.float32), is_observable=observable))
state_slices.register_additional_items(level+doors+agents+self.additional_slices)
return state_slices
def _init_obs_cube(self) -> np.ndarray:
@ -198,18 +172,6 @@ class BaseFactory(gym.Env):
obs = self._get_observations()
return obs
def pre_step(self) -> None:
pass
def do_additional_reset(self) -> None:
pass
def do_additional_step(self) -> dict:
return {}
def post_step(self) -> dict:
return {}
def step(self, actions):
actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
@ -217,31 +179,22 @@ class BaseFactory(gym.Env):
done = False
# Pre step Hook for later use
self.pre_step()
self.hook_pre_step()
# Move this in a seperate function?
for action, agent in zip(actions, self._agents):
agent.clear_temp_sate()
action_name = self._actions[action]
if self._actions.is_moving_action(action):
valid = self._move_or_colide(agent, action_name)
elif self._actions.is_no_op(action):
action_obj = self._actions[action]
if self._actions.is_moving_action(action_obj):
valid = self._move_or_colide(agent, action_obj)
elif self._actions.is_no_op(action_obj):
valid = c.VALID.value
elif self._actions.is_door_usage(action):
# Check if agent really is standing on a door:
if self.doors_have_area:
door = self._doors.get_near_position(agent.pos)
else:
door = self._doors.by_pos(agent.pos)
if door is not None:
door.use()
valid = c.VALID.value
# When he doesn't...
else:
valid = c.NOT_VALID.value
elif self._actions.is_door_usage(action_obj):
valid = self._handle_door_interaction(agent)
else:
valid = self.do_additional_actions(agent, action)
agent.temp_action = action
valid = self.do_additional_actions(agent, action_obj)
assert valid is not None, 'This should not happen, every Action musst be detected correctly!'
agent.temp_action = action_obj
agent.temp_valid = valid
# In-between step Hook for later use
@ -275,12 +228,25 @@ class BaseFactory(gym.Env):
info.update(self._summarize_state())
# Post step Hook for later use
info.update(self.post_step())
info.update(self.hook_post_step())
obs = self._get_observations()
return obs, reward, done, info
def _handle_door_interaction(self, agent):
# Check if agent really is standing on a door:
if self.doors_have_area:
door = self._doors.get_near_position(agent.pos)
else:
door = self._doors.by_pos(agent.pos)
if door is not None:
door.use()
return c.VALID.value
# When he doesn't...
else:
return c.NOT_VALID.value
def _flush_state(self):
self._obs_cube[np.arange(len(self._slices)) != self._slices.get_idx(c.LEVEL)] = c.FREE_CELL.value
if self.parse_doors:
@ -291,7 +257,7 @@ class BaseFactory(gym.Env):
self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.CLOSED_DOOR.value
for agent in self._agents:
self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.pos] = c.OCCUPIED_CELL.value
if agent.last_pos != h.NO_POS:
if agent.last_pos != c.NO_POS:
self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.last_pos] = c.FREE_CELL.value
def _get_observations(self) -> np.ndarray:
@ -318,8 +284,8 @@ class BaseFactory(gym.Env):
obs = self._obs_cube
if self.cast_shadows:
obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx, slice
in enumerate(self._slices) if slice.is_blocking_light]
obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx, obs_slice
in enumerate(self._slices) if obs_slice.is_blocking_light]
door_shadowing = False
if door := self._doors.by_pos(agent.pos):
if door.is_closed:
@ -332,6 +298,7 @@ class BaseFactory(gym.Env):
xs, ys = zip(*blocking)
else:
xs, ys = zip(*group)
# noinspection PyTypeChecker
obs_block_light[self._slices.get_idx(c.LEVEL)][xs, ys] = False
light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int))
@ -340,9 +307,14 @@ class BaseFactory(gym.Env):
else:
light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
if door_shadowing:
# noinspection PyUnboundLocalVariable
light_block_map[xs, ys] = 0
agent.temp_light_map = light_block_map
obs = (obs * light_block_map) - ((1 - light_block_map) * obs[self._slices.get_idx(c.LEVEL)])
for obs_idx in range(obs.shape[0]):
if self._slices[obs_idx].can_be_shadowed:
obs[obs_idx] = (obs[obs_idx] * light_block_map) - (
(1 - light_block_map) * obs[self._slices.get_idx(c.LEVEL)]
)
if self.combin_agent_slices_in_obs and self.n_agents > 1:
agent_obs = np.sum(obs[[key for key, l_slice in self._slices.items() if c.AGENT.name in l_slice.name and
@ -357,9 +329,6 @@ class BaseFactory(gym.Env):
else:
return obs
def do_additional_actions(self, agent: Agent, action: int) -> bool:
raise NotImplementedError
def get_all_tiles_with_collisions(self) -> List[Tile]:
tiles_with_collisions = list()
for tile in self._tiles:
@ -392,7 +361,7 @@ class BaseFactory(gym.Env):
valid = c.VALID
return tile, valid
if self.parse_doors and agent.last_pos != h.NO_POS:
if self.parse_doors and agent.last_pos != c.NO_POS:
if door := self._doors.by_pos(new_tile.pos):
if door.can_collide:
return agent.tile, c.NOT_VALID
@ -416,10 +385,63 @@ class BaseFactory(gym.Env):
def calculate_reward(self) -> (int, dict):
# Returns: Reward, Info
raise NotImplementedError
info_dict = dict()
reward = 0
for agent in self._agents:
if self._actions.is_moving_action(agent.temp_action):
if agent.temp_valid:
# info_dict.update(movement=1)
reward -= 0.00
else:
# self.print('collision')
reward -= 0.01
self.print(f'{agent.name} just hit the wall at {agent.pos}.')
info_dict.update({f'{agent.name}_vs_LEVEL': 1})
elif self._actions.is_door_usage(agent.temp_action):
if agent.temp_valid:
self.print(f'{agent.name} did just use the door at {agent.pos}.')
info_dict.update(door_used=1)
else:
reward -= 0.01
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_door_open': 1})
elif self._actions.is_no_op(agent.temp_action):
info_dict.update(no_op=1)
reward -= 0.00
additional_reward, additional_info_dict = self.calculate_additional_reward(agent)
reward += additional_reward
info_dict.update(additional_info_dict)
for other_agent in agent.temp_collisions:
info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1})
self.print(f"reward is {reward}")
return reward, info_dict
def render(self, mode='human'):
raise NotImplementedError
if not self._renderer: # lazy init
height, width = self._obs_cube.shape[1:]
self._renderer = Renderer(width, height, view_radius=self.pomdp_r, fps=5)
walls = [RenderEntity('wall', pos)
for pos in np.argwhere(self._slices.by_enum(c.LEVEL).slice == c.OCCUPIED_CELL.value)]
agents = []
for i, agent in enumerate(self._agents):
name, state = h.asset_str(agent)
agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.temp_light_map))
doors = []
if self.parse_doors:
for i, door in enumerate(self._doors):
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
additional_assets = self.render_additional_assets()
self._renderer.render(walls + doors + additional_assets + agents)
def save_params(self, filepath: Path):
# noinspection PyProtectedMember
@ -440,3 +462,66 @@ class BaseFactory(gym.Env):
def print(self, string):
if self.verbose:
print(string)
# Properties which are called by the base class to extend beyond attributes of the base class
@property
def additional_actions(self) -> Union[Action, List[Action]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A list of Actions-object holding all additional actions.
:rtype: List[Action]
"""
return []
@property
def additional_entities(self) -> Union[Entities, List[Entities]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A single Entites collection or a list of such.
:rtype: Union[Entities, List[Entities]]
"""
return []
@property
def additional_slices(self) -> Union[Slice, List[Slice]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A list of Slice-objects.
:rtype: List[Slice]
"""
return []
# Functions which provide additions to functions of the base class
# Always call super!!!!!!
@abc.abstractmethod
def do_additional_reset(self) -> None:
pass
@abc.abstractmethod
def do_additional_step(self) -> dict:
return {}
@abc.abstractmethod
def do_additional_actions(self, agent: Agent, action: int) -> Union[None, bool]:
return None
@abc.abstractmethod
def calculate_additional_reward(self, agent: Agent) -> (int, dict):
return 0, {}
@abc.abstractmethod
def render_additional_assets(self):
return []
# Hooks for in between operations.
# Always call super!!!!!!
@abc.abstractmethod
def hook_pre_step(self) -> None:
pass
@abc.abstractmethod
def hook_post_step(self) -> dict:
return {}

View File

@ -1,8 +1,5 @@
import itertools
import networkx as nx
import numpy as np
from environments import helpers as h
from environments.helpers import Constants as c
import itertools
@ -16,35 +13,32 @@ class Object:
def __bool__(self):
return True
@property
def i(self):
return self._identifier
@property
def name(self):
return self._identifier
return self._name
def __init__(self, identifier, **kwargs):
self._identifier = identifier
def __init__(self, name, name_is_identifier=False, **kwargs):
name = name.name if hasattr(name, 'name') else name
self._name = f'{self.__class__.__name__}#{name}' if name_is_identifier else name
if kwargs:
print(f'Following kwargs were passed, but ignored: {kwargs}')
def __repr__(self):
return f'{self.__class__.__name__}({self._identifier})'
return f'{self.__class__.__name__}({self.name})'
class Action(Object):
@property
def name(self):
return self.i
def __init__(self, *args):
super(Action, self).__init__(*args)
class Slice(Object):
@property
def is_observable(self):
return self._is_observable
@property
def shape(self):
return self.slice.shape
@ -57,10 +51,16 @@ class Slice(Object):
def free_tiles(self):
return np.argwhere(self.slice == c.FREE_CELL.value)
def __init__(self, identifier, arrayslice, is_blocking_light=False):
def __init__(self, identifier, arrayslice, is_blocking_light=False, can_be_shadowed=True, is_observable=True):
super(Slice, self).__init__(identifier)
self.slice = arrayslice
self.is_blocking_light = is_blocking_light
self.can_be_shadowed = can_be_shadowed
self._is_observable = is_observable
def set_slice(self, new_slice: np.ndarray):
assert self.slice.shape == new_slice.shape
self.slice = new_slice
class Wall(Object):
@ -89,8 +89,8 @@ class Tile(Object):
def pos(self):
return self._pos
def __init__(self, i, pos):
super(Tile, self).__init__(i)
def __init__(self, i, pos, **kwargs):
super(Tile, self).__init__(i, **kwargs)
self._guests = dict()
self._pos = tuple(pos)
@ -164,7 +164,7 @@ class MoveableEntity(Entity):
if self._last_tile:
return self._last_tile.pos
else:
return h.NO_POS
return c.NO_POS
@property
def direction_of_view(self):
@ -206,8 +206,8 @@ class Door(Entity):
return [node for node in self.connectivity.nodes
if node not in range(len(self.connectivity_subgroups)) and node != self.pos]
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False):
super(Door, self).__init__(*args)
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10, has_area=False, **kwargs):
super(Door, self).__init__(*args, **kwargs)
self._state = c.CLOSED_DOOR
self.has_area = has_area
self.auto_close_interval = auto_close_interval
@ -270,8 +270,8 @@ class Door(Entity):
class Agent(MoveableEntity):
def __init__(self, *args):
super(Agent, self).__init__(*args)
def __init__(self, *args, **kwargs):
super(Agent, self).__init__(*args, **kwargs)
self.clear_temp_sate()
# noinspection PyAttributeOutsideInit
@ -280,5 +280,5 @@ class Agent(MoveableEntity):
# if attr.startswith('temp'):
self.temp_collisions = []
self.temp_valid = None
self.temp_action = -1
self.temp_action = None
self.temp_light_map = None

View File

@ -1,9 +1,7 @@
import itertools
import random
from enum import Enum
from typing import List, Union
import networkx as nx
import numpy as np
from environments.factory.base.objects import Entity, Tile, Agent, Door, Slice, Action
@ -16,11 +14,8 @@ class Register:
_accepted_objects = Entity
@classmethod
def from_argwhere_coordinates(cls, positions: (int, int), tiles):
entities = [cls._accepted_objects(i, tiles.by_pos(position)) for i, position in enumerate(positions)]
registered_obj = cls()
registered_obj.register_additional_items(entities)
return registered_obj
def from_argwhere_coordinates(cls, positions: [(int, int)], tiles):
return cls.from_tiles([tiles.by_pos(position) for position in positions])
@property
def name(self):
@ -72,8 +67,8 @@ class Register:
def by_name(self, item):
return self[self._names[item]]
def by_enum(self, enum: Enum):
return self[self._names[enum.name]]
def by_enum(self, enum_obj: Enum):
return self[self._names[enum_obj.name]]
def __repr__(self):
return f'{self.__class__.__name__}({self._register})'
@ -84,13 +79,13 @@ class Register:
def get_idx_by_name(self, item):
return self._names[item]
def get_idx(self, enum: Enum):
return self._names[enum.name]
def get_idx(self, enum_obj: Enum):
return self._names[enum_obj.name]
@classmethod
def from_tiles(cls, tiles, **kwargs):
entities = [cls._accepted_objects(f'{cls._accepted_objects.__name__.upper()}#{i}', tile, **kwargs)
for i, tile in enumerate(tiles)]
# objects_name = cls._accepted_objects.__name__
entities = [cls._accepted_objects(i, tile, name_is_identifier=True, **kwargs) for i, tile in enumerate(tiles)]
registered_obj = cls()
registered_obj.register_additional_items(entities)
return registered_obj
@ -98,14 +93,6 @@ class Register:
class EntityRegister(Register):
@classmethod
def from_argwhere_coordinates(cls, argwhere_coordinates, **kwargs):
tiles = cls()
tiles.register_additional_items(
[cls._accepted_objects(i, pos, **kwargs) for i, pos in enumerate(argwhere_coordinates)]
)
return tiles
def __init__(self):
super(EntityRegister, self).__init__()
self._tiles = dict()
@ -141,6 +128,15 @@ class Entities(Register):
class FloorTiles(EntityRegister):
_accepted_objects = Tile
@classmethod
def from_argwhere_coordinates(cls, argwhere_coordinates):
tiles = cls()
# noinspection PyTypeChecker
tiles.register_additional_items(
[cls._accepted_objects(i, pos, name_is_identifier=True) for i, pos in enumerate(argwhere_coordinates)]
)
return tiles
@property
def occupied_tiles(self):
tiles = [tile for tile in self if tile.is_occupied()]
@ -148,7 +144,7 @@ class FloorTiles(EntityRegister):
return tiles
@property
def empty_tiles(self):
def empty_tiles(self) -> List[Tile]:
tiles = [tile for tile in self if tile.is_empty()]
random.shuffle(tiles)
return tiles
@ -185,6 +181,7 @@ class Actions(Register):
def movement_actions(self):
return self._movement_actions
# noinspection PyTypeChecker
def __init__(self, movement_properties: MovementProperties, can_use_doors=False):
self.allow_no_op = movement_properties.allow_no_op
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
@ -193,43 +190,47 @@ class Actions(Register):
super(Actions, self).__init__()
if self.allow_square_movement:
self.register_additional_items([self._accepted_objects(direction) for direction in h.MANHATTAN_MOVES])
self.register_additional_items([self._accepted_objects(direction) for direction in h.ManhattanMoves])
if self.allow_diagonal_movement:
self.register_additional_items([self._accepted_objects(direction) for direction in h.DIAGONAL_MOVES])
self.register_additional_items([self._accepted_objects(direction) for direction in h.DiagonalMoves])
self._movement_actions = self._register.copy()
if self.can_use_doors:
self.register_additional_items([self._accepted_objects('use_door')])
self.register_additional_items([self._accepted_objects(h.EnvActions.USE_DOOR)])
if self.allow_no_op:
self.register_additional_items([self._accepted_objects('no-op')])
self.register_additional_items([self._accepted_objects(h.EnvActions.NOOP)])
def is_moving_action(self, action: Union[int]):
#if isinstance(action, Action):
# return (action.name in h.MANHATTAN_MOVES and self.allow_square_movement) or \
# (action.name in h.DIAGONAL_MOVES and self.allow_diagonal_movement)
#else:
return action in self.movement_actions.keys()
return action in self.movement_actions.values()
def is_no_op(self, action: Union[str, int]):
if isinstance(action, str):
action = self.by_name(action)
return self[action].name == 'no-op'
def is_no_op(self, action: Union[str, Action, int]):
if isinstance(action, int):
action = self[action]
if isinstance(action, Action):
action = action.name
return action == h.EnvActions.NOOP.name
def is_door_usage(self, action: Union[str, int]):
if isinstance(action, str):
action = self.by_name(action)
return self[action].name == 'use_door'
if isinstance(action, int):
action = self[action]
if isinstance(action, Action):
action = action.name
return action == h.EnvActions.USE_DOOR.name
class StateSlices(Register):
_accepted_objects = Slice
@property
def n_observable_slices(self):
return len([x for x in self if x.is_observable])
@property
def AGENTSTARTIDX(self):
if self._agent_start_idx:
return self._agent_start_idx
else:
self._agent_start_idx = min([idx for idx, x in self.items() if c.AGENT.name in x.name])
self._agent_start_idx = min([idx for idx, x in self.items() if c.AGENT.value in x.name])
return self._agent_start_idx
def __init__(self):

View File

@ -0,0 +1,229 @@
import time
from collections import deque
from enum import Enum
from typing import List, Union, NamedTuple
import numpy as np
from environments.factory.simple_factory import SimpleFactory
from environments.helpers import Constants as c
from environments import helpers as h
from environments.factory.base.objects import Agent, Slice, Entity, Action
from environments.factory.base.registers import Entities
from environments.factory.renderer import RenderEntity
PICK_UP = 'pick_up'
DROP_OFF = 'drop_off'
NO_ITEM = 0
ITEM_DROP_OFF = -1
def inventory_slice_name(agent_i):
if isinstance(agent_i, int):
return f'{c.INVENTORY.name}_{c.AGENT.value}#{agent_i}'
else:
return f'{c.INVENTORY.name}_{agent_i}'
class DropOffLocation(Entity):
def __init__(self, *args, storage_size_until_full: int = 5, **kwargs):
super(DropOffLocation, self).__init__(DROP_OFF, *args, **kwargs)
self.storage = deque(maxlen=storage_size_until_full)
def place_item(self, item):
self.storage.append(item)
return True
@property
def is_full(self):
return self.storage.maxlen == len(self.storage)
class ItemProperties(NamedTuple):
n_items: int = 1 # How many items are there at the same time
spawn_frequency: int = 5 # Spawn Frequency in Steps
max_dropoff_storage_size: int = 5 # How many items are needed until the drop off is full
max_agent_storage_size: int = 5 # How many items are needed until the agent inventory is full
agent_can_interact: bool = True # Whether agents have the possibility to interact with the domain items
# noinspection PyAttributeOutsideInit,PyUnresolvedReferences
class DoubleTaskFactory(SimpleFactory):
# noinspection PyMissingConstructor
def __init__(self, item_properties: ItemProperties, *args, with_dirt=False, env_seed=time.time_ns(), **kwargs):
self.item_properties = item_properties
kwargs.update(env_seed=env_seed)
self._item_rng = np.random.default_rng(env_seed)
assert item_properties.n_items < kwargs.get('pomdp_r', 0) ** 2 or not kwargs.get('pomdp_r', 0)
self._super = self.__class__ if with_dirt else SimpleFactory
super(self._super, self).__init__(*args, **kwargs)
@property
def additional_actions(self) -> Union[Action, List[Action]]:
super_actions = super(self._super, self).additional_actions
super_actions.append(Action(h.EnvActions.ITEM_ACTION))
return super_actions
@property
def additional_entities(self) -> Union[Entities, List[Entities]]:
super_entities = super(self._super, self).additional_entities
return super_entities
@property
def additional_slices(self) -> Union[Slice, List[Slice]]:
super_slices = super(self._super, self).additional_slices
super_slices.append(Slice(c.ITEM, np.zeros(self._level_shape)))
super_slices.extend([Slice(inventory_slice_name(agent_i), np.zeros(self._level_shape), can_be_shadowed=False)
for agent_i in range(self.n_agents)])
return super_slices
def _flush_state(self):
super(self._super, self)._flush_state()
# Flush environmental item state
slice_idx = self._slices.get_idx(c.ITEM)
self._obs_cube[slice_idx] = self._slices[slice_idx].slice
# Flush per agent inventory state
for agent in self._agents:
agent_slice_idx = self._slices.get_idx_by_name(inventory_slice_name(agent.name))
self._slices[agent_slice_idx].slice[:] = 0
if len(agent.inventory) > 0:
max_x = self.pomdp_r if self.pomdp_r else self._level_shape[0]
x, y = (0, 0) if not self.pomdp_r else (max(agent.x - max_x, 0), max(agent.y - max_x, 0))
for item in agent.inventory:
x_diff, y_diff = divmod(item, max_x)
self._slices[agent_slice_idx].slice[int(x+x_diff), int(y+y_diff)] = item
self._obs_cube[agent_slice_idx] = self._slices[agent_slice_idx].slice
def _is_item_action(self, action):
if isinstance(action, int):
action = self._actions[action]
if isinstance(action, Action):
action = action.name
return action == h.EnvActions.ITEM_ACTION.name
def do_item_action(self, agent: Agent):
item_slice = self._slices.by_enum(c.ITEM).slice
if item := item_slice[agent.pos]:
if item == ITEM_DROP_OFF:
if agent.inventory:
valid = self._item_drop_off.place_item(agent.inventory.pop(0))
return valid
else:
return c.NOT_VALID
elif item != NO_ITEM:
if len(agent.inventory) < self.item_properties.max_agent_storage_size:
agent.inventory.append(item_slice[agent.pos])
item_slice[agent.pos] = NO_ITEM
else:
return c.NOT_VALID
return c.VALID
else:
return c.NOT_VALID
def do_additional_actions(self, agent: Agent, action: int) -> Union[None, bool]:
valid = super(self._super, self).do_additional_actions(agent, action)
if valid is None:
if self._is_item_action(action):
if self.item_properties.agent_can_interact:
valid = self.do_item_action(agent)
return bool(valid)
else:
return False
else:
return None
else:
return valid
def do_additional_reset(self) -> None:
super(self._super, self).do_additional_reset()
self.spawn_drop_off_location()
self.spawn_items(self.item_properties.n_items)
self._next_item_spawn = self.item_properties.spawn_frequency
for agent in self._agents:
agent.inventory = list()
def do_additional_step(self) -> dict:
info_dict = super(self._super, self).do_additional_step()
if not self._next_item_spawn:
if item_to_spawn := (self.item_properties.n_items -
(np.sum(self._slices.by_enum(c.ITEM).slice.astype(bool)) - 1)):
self.spawn_items(item_to_spawn)
self._next_item_spawn = self.item_properties.spawn_frequency
else:
self.print('No Items are spawning, limit is reached.')
else:
self._next_item_spawn -= 1
return info_dict
def spawn_drop_off_location(self):
single_empty_tile = self._tiles.empty_tiles[0]
self._item_drop_off = DropOffLocation(single_empty_tile,
storage_size_until_full=self.item_properties.max_dropoff_storage_size)
single_empty_tile.enter(self._item_drop_off)
self._slices.by_enum(c.ITEM).slice[single_empty_tile.pos] = ITEM_DROP_OFF
def calculate_additional_reward(self, agent: Agent) -> (int, dict):
reward, info_dict = super(self._super, self).calculate_additional_reward(agent)
if self._is_item_action(agent.temp_action):
if agent.temp_valid:
if agent.pos == self._item_drop_off.pos:
info_dict.update({f'{agent.name}_item_dropoff': 1})
reward += 1
else:
info_dict.update({f'{agent.name}_item_pickup': 1})
reward += 0.1
else:
info_dict.update({f'{agent.name}_failed_item_action': 1})
reward -= 0.1
return reward, info_dict
def render_additional_assets(self, mode='human'):
additional_assets = super(self._super, self).render_additional_assets()
item_slice = self._slices.by_enum(c.ITEM).slice
items = [RenderEntity(DROP_OFF if item_slice[tile.pos] == ITEM_DROP_OFF else c.ITEM.value, tile.pos)
for tile in [tile for tile in self._tiles if item_slice[tile.pos] != NO_ITEM]]
additional_assets.extend(items)
return additional_assets
def spawn_items(self, n_items):
tiles = self._tiles.empty_tiles[:n_items]
item_slice = self._slices.by_enum(c.ITEM).slice
for idx, tile in enumerate(tiles, start=1):
item_slice[tile.pos] = idx
pass
if __name__ == '__main__':
import random
render = True
item_props = ItemProperties()
factory = DoubleTaskFactory(item_props, n_agents=1, done_at_collision=False, frames_to_stack=0,
level_name='rooms', max_steps=400,
omit_agent_slice_in_obs=True, parse_doors=True, pomdp_r=3,
record_episodes=False, verbose=False
)
n_actions = factory.action_space.n - 1
_ = factory.observation_space
for epoch in range(100):
random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)]
env_state = factory.reset()
rew = 0
for agent_i_action in random_actions:
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
rew += step_r
if render:
factory.render()
if done_bool:
break
print(f'Factory run {epoch} done, reward is:\n {rew}')

View File

@ -1,115 +0,0 @@
import time
from collections import deque
from typing import List, Union, NamedTuple
import numpy as np
from environments.helpers import Constants as c
from environments import helpers as h
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action, Object, Slice, Entity
from environments.factory.base.registers import Entities
from environments.factory.renderer import Renderer
from environments.utility_classes import MovementProperties
ITEM = 'item'
INVENTORY = 'inventory'
PICK_UP = 'pick_up'
DROP_DOWN = 'drop_down'
ITEM_ACTION = 'item_action'
NO_ITEM = 0
ITEM_DROP_OFF = -1
def inventory_slice_name(agent):
return f'{agent.name}_{INVENTORY}'
class DropOffLocation(Entity):
def __init__(self, *args, storage_size_until_full: int = 5, **kwargs):
super(DropOffLocation, self).__init__(*args, **kwargs)
self.storage = deque(maxlen=storage_size_until_full)
def place_item(self, item):
self.storage.append(item)
return True
@property
def is_full(self):
return self.storage.maxlen == len(self.storage)
class ItemProperties(NamedTuple):
n_items: int = 1 # How many items are there at the same time
spawn_frequency: int = 5 # Spawn Frequency in Steps
max_dropoff_storage_size: int = 5 # How many items are needed until the drop off is full
max_agent_storage_size: int = 5 # How many items are needed until the agent inventory is full
# noinspection PyAttributeOutsideInit
class ItemFactory(BaseFactory):
def __init__(self, item_properties: ItemProperties, *args, **kwargs):
self.item_properties = item_properties
self._item_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns()))
super(ItemFactory, self).__init__(*args, **kwargs)
@property
def additional_actions(self) -> Union[str, List[str]]:
return [ITEM_ACTION]
@property
def additional_entities(self) -> Union[Entities, List[Entities]]:
return []
@property
def additional_slices(self) -> Union[Slice, List[Slice]]:
return [Slice(ITEM, np.zeros(self._level_shape))] + [
Slice(inventory_slice_name(agent), np.zeros(self._level_shape)) for agent in self._agents]
def _is_item_action(self, action):
if isinstance(action, str):
action = self._actions.by_name(action)
return self._actions[action].name == ITEM_ACTION
def do_item_action(self, agent):
item_slice = self._slices.by_name(ITEM).slice
inventory_slice = self._slices.by_name(inventory_slice_name(agent)).slice
if item := item_slice[agent.pos]:
if item == ITEM_DROP_OFF:
valid = self._item_drop_off.place_item(inventory_slice.sum())
item_slice[agent.pos] = NO_ITEM
return True
else:
return False
def do_additional_actions(self, agent: Agent, action: int) -> bool:
if self._is_item_action(action):
valid = self.do_item_action(agent)
return valid
else:
raise RuntimeError('This should not happen!!!')
def do_additional_reset(self) -> None:
self.spawn_drop_off_location()
self.spawn_items(self.n_items)
if self.n_items > 1:
self._next_item_spawn = self.item_properties.spawn_frequency
def spawn_drop_off_location(self):
single_empty_tile = self._tiles.empty_tiles[0]
self._item_drop_off = DropOffLocation(storage_size_until_full=self.item_properties.max_dropoff_storage_size)
def calculate_reward(self) -> (int, dict):
pass
def render(self, mode='human'):
pass

View File

@ -8,7 +8,7 @@ from typing import NamedTuple, Any
import time
class Entity(NamedTuple):
class RenderEntity(NamedTuple):
name: str
pos: np.array
value: float = 1
@ -108,7 +108,7 @@ class Renderer:
blits.extendleft(vis_rects)
if entity.state != 'blank':
agent_state_blits = self.blit_params(
Entity(entity.state, (entity.pos[0]+0.12, entity.pos[1]), 0.48, 'scale')
RenderEntity(entity.state, (entity.pos[0] + 0.12, entity.pos[1]), 0.48, 'scale')
)
textsurface = self.font.render(str(entity.id), False, (0, 0, 0))
text_blit = dict(source=textsurface, dest=(bp['dest'].center[0]-.07*self.cell_size,
@ -125,6 +125,6 @@ class Renderer:
if __name__ == '__main__':
renderer = Renderer(fps=2, cell_size=40)
for i in range(15):
entity_1 = Entity('agent', [5, i], 1, 'idle', 'idle')
entity_1 = RenderEntity('agent', [5, i], 1, 'idle', 'idle')
renderer.render([entity_1])

View File

@ -1,4 +1,5 @@
import time
from enum import Enum
from typing import List, Union, NamedTuple
import random
@ -7,24 +8,32 @@ import numpy as np
from environments.helpers import Constants as c
from environments import helpers as h
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action, Object, Slice
from environments.factory.base.objects import Agent, Action, Slice
from environments.factory.base.registers import Entities
from environments.factory.renderer import Renderer, Entity
from environments.factory.renderer import RenderEntity
from environments.utility_classes import MovementProperties
DIRT = "dirt"
CLEAN_UP_ACTION = 'clean_up'
CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP
class ObsSlice(Enum):
OWN = -1
LEVEL = c.LEVEL.value
AGENT = c.AGENT.value
class DirtProperties(NamedTuple):
clean_amount: int = 1 # How much does the robot clean with one actions.
max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent.
gain_amount: float = 0.3 # How much dirt does spawn per tile
spawn_frequency: int = 5 # Spawn Frequency in Steps
gain_amount: float = 0.3 # How much dirt does spawn per tile.
spawn_frequency: int = 5 # Spawn Frequency in Steps.
max_local_amount: int = 2 # Max dirt amount per tile.
max_global_amount: int = 20 # Max dirt amount in the whole environment.
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place.
agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment.
on_obs_slice: Enum = ObsSlice.LEVEL
def softmax(x):
@ -41,69 +50,50 @@ def entropy(x):
class SimpleFactory(BaseFactory):
@property
def additional_actions(self) -> List[Object]:
return [Action(CLEAN_UP_ACTION)]
def additional_actions(self) -> Union[Action, List[Action]]:
super_actions = super(SimpleFactory, self).additional_actions
if self.dirt_properties.agent_can_interact:
super_actions.append(Action(CLEAN_UP_ACTION))
return super_actions
@property
def additional_entities(self) -> Union[Entities, List[Entities]]:
return []
super_entities = super(SimpleFactory, self).additional_entities
return super_entities
@property
def additional_slices(self) -> List[Slice]:
return [Slice('dirt', np.zeros(self._level_shape))]
super_slices = super(SimpleFactory, self).additional_slices
super_slices.extend([Slice(c.DIRT, np.zeros(self._level_shape))])
return super_slices
def _is_clean_up_action(self, action: Union[str, int]):
if isinstance(action, str):
action = self._actions.by_name(action)
return self._actions[action].name == CLEAN_UP_ACTION
def _is_clean_up_action(self, action: Union[str, Action, int]):
if isinstance(action, int):
action = self._actions[action]
if isinstance(action, Action):
action = action.name
return action == CLEAN_UP_ACTION.name
def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), **kwargs):
def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), env_seed=time.time_ns(), **kwargs):
self.dirt_properties = dirt_properties
self._renderer = None # expensive - don't use it when not required !
self._dirt_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns()))
self._dirt_rng = np.random.default_rng(env_seed)
kwargs.update(env_seed=env_seed)
super(SimpleFactory, self).__init__(*args, **kwargs)
def _flush_state(self):
super(SimpleFactory, self)._flush_state()
self._obs_cube[self._slices.get_idx_by_name(DIRT)] = self._slices.by_name(DIRT).slice
self._obs_cube[self._slices.get_idx(c.DIRT)] = self._slices.by_enum(c.DIRT).slice
def render(self, mode='human'):
if not self._renderer: # lazy init
height, width = self._obs_cube.shape[1:]
self._renderer = Renderer(width, height, view_radius=self.pomdp_r, fps=5)
dirt_slice = self._slices.by_name(DIRT).slice
dirt = [Entity('dirt', tile.pos, min(0.15 + dirt_slice[tile.pos], 1.5), 'scale')
def render_additional_assets(self, mode='human'):
additional_assets = super(SimpleFactory, self).render_additional_assets()
dirt_slice = self._slices.by_enum(c.DIRT).slice
dirt = [RenderEntity('dirt', tile.pos, min(0.15 + dirt_slice[tile.pos], 1.5), 'scale')
for tile in [tile for tile in self._tiles if dirt_slice[tile.pos]]]
walls = [Entity('wall', pos)
for pos in np.argwhere(self._slices.by_enum(c.LEVEL).slice == c.OCCUPIED_CELL.value)]
def asset_str(agent):
# What does this abonimation do?
# if any([x is None for x in [self._slices[j] for j in agent.collisions]]):
# print('error')
col_names = [x.name for x in agent.temp_collisions]
if c.AGENT.value in col_names:
return 'agent_collision', 'blank'
elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names:
return c.AGENT.value, 'invalid'
elif self._is_clean_up_action(agent.temp_action):
return c.AGENT.value, 'valid'
else:
return c.AGENT.value, 'idle'
agents = []
for i, agent in enumerate(self._agents):
name, state = asset_str(agent)
agents.append(Entity(name, agent.pos, 1, 'none', state, i+1, agent.temp_light_map))
doors = []
if self.parse_doors:
for i, door in enumerate(self._doors):
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
agents.append(Entity(name, door.pos, 1, 'none', state, i+1))
self._renderer.render(dirt+walls+agents+doors)
additional_assets.extend(dirt)
return additional_assets
def spawn_dirt(self) -> None:
dirt_slice = self._slices.by_name(DIRT).slice
dirt_slice = self._slices.by_enum(c.DIRT).slice
# dirty_tiles = [tile for tile in self._tiles if dirt_slice[tile.pos]]
curr_dirt_amount = dirt_slice.sum()
if not curr_dirt_amount > self.dirt_properties.max_global_amount:
@ -119,7 +109,7 @@ class SimpleFactory(BaseFactory):
pass
def clean_up(self, agent: Agent) -> bool:
dirt_slice = self._slices.by_name(DIRT).slice
dirt_slice = self._slices.by_enum(c.DIRT).slice
if old_dirt_amount := dirt_slice[agent.pos]:
new_dirt_amount = old_dirt_amount - self.dirt_properties.clean_amount
dirt_slice[agent.pos] = max(new_dirt_amount, c.FREE_CELL.value)
@ -128,10 +118,11 @@ class SimpleFactory(BaseFactory):
return False
def do_additional_step(self) -> dict:
info_dict = super(SimpleFactory, self).do_additional_step()
if smear_amount := self.dirt_properties.dirt_smear_amount:
dirt_slice = self._slices.by_name(DIRT).slice
dirt_slice = self._slices.by_enum(c.DIRT).slice
for agent in self._agents:
if agent.temp_valid and agent.last_pos != h.NO_POS:
if agent.temp_valid and agent.last_pos != c.NO_POS:
if dirt := dirt_slice[agent.last_pos]:
if smeared_dirt := round(dirt * smear_amount, 2):
dirt_slice[agent.last_pos] = max(0, dirt_slice[agent.last_pos]-smeared_dirt)
@ -144,23 +135,30 @@ class SimpleFactory(BaseFactory):
self._next_dirt_spawn = self.dirt_properties.spawn_frequency
else:
self._next_dirt_spawn -= 1
return {}
return info_dict
def do_additional_actions(self, agent: Agent, action: int) -> bool:
if self._is_clean_up_action(action):
valid = self.clean_up(agent)
return valid
def do_additional_actions(self, agent: Agent, action: int) -> Union[None, bool]:
valid = super(SimpleFactory, self).do_additional_actions(agent, action)
if valid is None:
if self._is_clean_up_action(action):
if self.dirt_properties.agent_can_interact:
valid = self.clean_up(agent)
return valid
else:
return False
else:
return None
else:
return c.NOT_VALID.value
return valid
def do_additional_reset(self) -> None:
super(SimpleFactory, self).do_additional_reset()
self.spawn_dirt()
self._next_dirt_spawn = self.dirt_properties.spawn_frequency
def calculate_reward(self) -> (int, dict):
info_dict = dict()
dirt_slice = self._slices.by_name(DIRT).slice
def calculate_additional_reward(self, agent: Agent) -> (int, dict):
reward, info_dict = super(SimpleFactory, self).calculate_additional_reward(agent)
dirt_slice = self._slices.by_enum(c.DIRT).slice
dirty_tiles = [dirt_slice[tile.pos] for tile in self._tiles if dirt_slice[tile.pos]]
current_dirt_amount = sum(dirty_tiles)
dirty_tile_count = len(dirty_tiles)
@ -173,56 +171,21 @@ class SimpleFactory(BaseFactory):
info_dict.update(dirty_tile_count=dirty_tile_count)
info_dict.update(dirt_distribution_score=dirt_distribution_score)
try:
# penalty = current_dirt_amount
reward = 0
except (ZeroDivisionError, RuntimeWarning):
reward = 0
for agent in self._agents:
if agent.temp_collisions:
self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
if self._is_clean_up_action(agent.temp_action):
if agent.temp_valid:
reward += 0.5
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
info_dict.update(dirt_cleaned=1)
else:
reward -= 0.01
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
elif self._actions.is_moving_action(agent.temp_action):
if agent.temp_valid:
# info_dict.update(movement=1)
reward -= 0.00
else:
# self.print('collision')
reward -= 0.01
self.print(f'{agent.name} just hit the wall at {agent.pos}.')
info_dict.update({f'{agent.name}_vs_LEVEL': 1})
elif self._actions.is_door_usage(agent.temp_action):
if agent.temp_valid:
self.print(f'{agent.name} did just use the door at {agent.pos}.')
info_dict.update(door_used=1)
else:
reward -= 0.01
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_door_open': 1})
if agent.temp_collisions:
self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
if self._is_clean_up_action(agent.temp_action):
if agent.temp_valid:
reward += 0.5
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
info_dict.update(dirt_cleaned=1)
else:
info_dict.update(no_op=1)
reward -= 0.00
reward -= 0.01
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
for other_agent in agent.temp_collisions:
info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1})
self.print(f"reward is {reward}")
# Potential based rewards ->
# track the last reward , minus the current reward = potential
return reward, info_dict

View File

@ -5,58 +5,76 @@ from typing import Tuple, Union
import numpy as np
from pathlib import Path
# Constants
class Constants(Enum):
WALL = '#'
DOOR = 'D'
DANGER_ZONE = 'x'
LEVEL = 'level'
AGENT = 'Agent'
FREE_CELL = 0
OCCUPIED_CELL = 1
DOORS = 'doors'
CLOSED_DOOR = 1
OPEN_DOOR = -1
ACTION = auto()
COLLISIONS = auto()
VALID = True
NOT_VALID = False
def __bool__(self):
return bool(self.value)
LEVELS_DIR = 'levels'
TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles']
IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount',
'dirty_tile_count', 'terminal_observation', 'episode']
MANHATTAN_MOVES = ['north', 'east', 'south', 'west']
DIAGONAL_MOVES = ['north_east', 'south_east', 'south_west', 'north_west']
NO_POS = (-9999, -9999)
# Constants
class Constants(Enum):
WALL = '#'
DOOR = 'D'
DANGER_ZONE = 'x'
LEVEL = 'level'
AGENT = 'Agent'
FREE_CELL = 0
OCCUPIED_CELL = 1
NO_POS = (-9999, -9999)
ACTIONMAP = defaultdict(lambda: (0, 0), dict(north=(-1, 0), east=(0, 1),
south=(1, 0), west=(0, -1),
north_east=(-1, +1), south_east=(1, 1),
south_west=(+1, -1), north_west=(-1, -1)
)
DOORS = 'doors'
CLOSED_DOOR = 1
OPEN_DOOR = -1
ACTION = auto()
COLLISIONS = auto()
VALID = True
NOT_VALID = False
# Dirt Env
DIRT = 'dirt'
# Item Env
ITEM = 'item'
INVENTORY = 'inventory'
def __bool__(self):
return bool(self.value)
class ManhattanMoves(Enum):
NORTH = 'north'
EAST = 'east'
SOUTH = 'south'
WEST = 'west'
class DiagonalMoves(Enum):
NORTHEAST = 'north_east'
SOUTHEAST = 'south_east'
SOUTHWEST = 'south_west'
NORTHWEST = 'north_west'
class EnvActions(Enum):
NOOP = 'no_op'
USE_DOOR = 'use_door'
CLEAN_UP = 'clean_up'
ITEM_ACTION = 'item_action'
d = DiagonalMoves
m = ManhattanMoves
c = Constants
ACTIONMAP = defaultdict(lambda: (0, 0), {m.NORTH.name: (-1, 0), d.NORTHEAST.name: (-1, +1),
m.EAST.name: (0, 1), d.SOUTHEAST.name: (1, 1),
m.SOUTH.name: (1, 0), d.SOUTHWEST.name: (+1, -1),
m.WEST.name: (0, -1), d.NORTHWEST.name: (-1, -1)
}
)
HORIZONTAL_DOOR_MAP = np.asarray([[0, 0, 0], [1, 0, 1], [0, 0, 0]])
VERTICAL_DOOR_MAP = np.asarray([[0, 1, 0], [0, 0, 0], [0, 1, 0]])
HORIZONTAL_DOOR_ZONE_1 = np.asarray([[1, 1, 1], [0, 0, 0], [0, 0, 0]])
HORIZONTAL_DOOR_ZONE_2 = np.asarray([[0, 0, 0], [0, 0, 0], [1, 1, 1]])
VERTICAL_DOOR_ZONE_1 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
VERTICAL_DOOR_ZONE_2 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
# Utility functions
def parse_level(path):
@ -67,13 +85,13 @@ def parse_level(path):
return level
def one_hot_level(level, wall_char: Union[Constants, str] = Constants.WALL):
def one_hot_level(level, wall_char: Union[c, str] = c.WALL):
grid = np.array(level)
binary_grid = np.zeros(grid.shape, dtype=np.int8)
if wall_char in Constants:
binary_grid[grid == wall_char.value] = Constants.OCCUPIED_CELL.value
if wall_char in c:
binary_grid[grid == wall_char.value] = c.OCCUPIED_CELL.value
else:
binary_grid[grid == wall_char] = Constants.OCCUPIED_CELL.value
binary_grid[grid == wall_char] = c.OCCUPIED_CELL.value
return binary_grid
@ -89,7 +107,22 @@ def check_position(slice_to_check_against: np.ndarray, position_to_check: Tuple[
# Check for collision with level walls
valid = valid and not slice_to_check_against[x_pos, y_pos]
return Constants.VALID if valid else Constants.NOT_VALID
return c.VALID if valid else c.NOT_VALID
def asset_str(agent):
# What does this abonimation do?
# if any([x is None for x in [self._slices[j] for j in agent.collisions]]):
# print('error')
col_names = [x.name for x in agent.temp_collisions]
if c.AGENT.value in col_names:
return 'agent_collision', 'blank'
elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names:
return c.AGENT.value, 'invalid'
elif agent.temp_valid:
return c.AGENT.value, 'valid'
else:
return c.AGENT.value, 'idle'
if __name__ == '__main__':

18
main.py
View File

@ -9,6 +9,7 @@ import pandas as pd
from stable_baselines3.common.callbacks import CallbackList
from environments.factory.double_task_factory import DoubleTaskFactory, ItemProperties
from environments.factory.simple_factory import DirtProperties, SimpleFactory
from environments.helpers import IGNORED_DF_COLUMNS
from environments.logging.monitor import MonitorCallback
@ -94,11 +95,12 @@ if __name__ == '__main__':
dirt_props = DirtProperties(clean_amount=1, gain_amount=0.1, max_global_amount=20,
max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05,
dirt_smear_amount=0.0)
dirt_smear_amount=0.0, agent_can_interact=False)
item_props = ItemProperties(n_items=5, agent_can_interact=True)
move_props = MovementProperties(allow_diagonal_movement=True,
allow_square_movement=True,
allow_no_op=False)
train_steps = 2.5e6
train_steps = 6e5
time_stamp = int(time.time())
out_path = None
@ -106,11 +108,13 @@ if __name__ == '__main__':
for modeL_type in [A2C, PPO, DQN]: # ,RegDQN, QRDQN]:
for seed in range(3):
with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=True,
movement_properties=move_props, level_name='rooms', frames_to_stack=3,
omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False,
cast_shadows=True, doors_have_area=False, seed=seed
) as env:
with DoubleTaskFactory(n_agents=1, with_dirt=False,
item_properties=item_props, dirt_properties=None, movement_properties=move_props,
pomdp_radius=2, max_steps=500, parse_doors=True,
level_name='rooms', frames_to_stack=3,
omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False,
cast_shadows=True, doors_have_area=False, seed=seed
) as env:
if modeL_type.__name__ in ["PPO", "A2C"]:
kwargs = dict(ent_coef=0.01)

View File

@ -28,3 +28,5 @@ PyYAML~=5.3.1
pyglet~=1.5.0
optuna~=2.7.0
natsort~=7.1.1
tqdm~=4.60.0
networkx~=2.6.1