Restructuring and Testing Done

This commit is contained in:
steffen-illium
2021-07-13 11:12:03 +02:00
parent eee4760e72
commit 35f5bdeed4
14 changed files with 1160 additions and 842 deletions

View File

@ -0,0 +1,370 @@
from pathlib import Path
from typing import List, Union, Iterable
import gym
import numpy as np
from gym import spaces
import yaml
from gym.wrappers import FrameStack
from environments.helpers import Constants as c, Constants
from environments import helpers as h
from environments.factory.base.objects import Slice, Agent, Tile, Action, MoveableEntity
from environments.factory.base.registers import StateSlices, Actions, Entities, Agents, Doors, FloorTiles
from environments.utility_classes import MovementProperties
REC_TAC = 'rec'
# noinspection PyAttributeOutsideInit
class BaseFactory(gym.Env):
@property
def action_space(self):
return spaces.Discrete(self._actions.n)
@property
def observation_space(self):
agent_slice = self.n_agents if self.omit_agent_slice_in_obs else 0
agent_slice = (self.n_agents - 1) if self.combin_agent_slices_in_obs else agent_slice
if self.pomdp_radius:
shape = (self._obs_cube.shape[0] - agent_slice, self.pomdp_radius * 2 + 1, self.pomdp_radius * 2 + 1)
space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
return space
else:
shape = [x-agent_slice if idx == 0 else x for idx, x in enumerate(self._obs_cube.shape)]
space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
return space
@property
def pomdp_diameter(self):
return self.pomdp_radius * 2 + 1
@property
def movement_actions(self):
return self._actions.movement_actions
@property
def additional_actions(self) -> Union[str, List[str]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A list of Actions-object holding all additional actions.
:rtype: List[Action]
"""
raise NotImplementedError('Please register additional actions ')
@property
def additional_entities(self) -> Union[Entities, List[Entities]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A single Entites collection or a list of such.
:rtype: Union[Entities, List[Entities]]
"""
raise NotImplementedError('Please register additional entities.')
@property
def additional_slices(self) -> Union[Slice, List[Slice]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A list of Slice-objects.
:rtype: List[Slice]
"""
raise NotImplementedError('Please register additional slices.')
def __enter__(self):
return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_radius: Union[None, int] = 0,
movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False,
omit_agent_slice_in_obs=False, done_at_collision=False, **kwargs):
assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
(not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
'Both options are exclusive'
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
# Attribute Assignment
self.movement_properties = movement_properties
self.level_name = level_name
self._level_shape = None
self.n_agents = n_agents
self.max_steps = max_steps
self.pomdp_radius = pomdp_radius
self.combin_agent_slices_in_obs = combin_agent_slices_in_obs
self.omit_agent_slice_in_obs = omit_agent_slice_in_obs
self.frames_to_stack = frames_to_stack
self.done_at_collision = done_at_collision
self.record_episodes = record_episodes
self.parse_doors = parse_doors
# Actions
self._actions = Actions(self.movement_properties, can_use_doors=self.parse_doors)
if additional_actions := self.additional_actions:
self._actions.register_additional_items(additional_actions)
self.reset()
def _init_state_slices(self) -> StateSlices:
state_slices = StateSlices()
# Objects
# Level
level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt'
parsed_level = h.parse_level(level_filepath)
level = [Slice(c.LEVEL.name, h.one_hot_level(parsed_level))]
self._level_shape = level[0].shape
# Doors
parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
doors = [Slice(c.DOORS.value, parsed_doors)] if parsed_doors.any() and self.parse_doors else []
# Agents
agents = []
for i in range(self.n_agents):
agents.append(Slice(f'{c.AGENT.name}#{i}', np.zeros_like(level[0].slice)))
state_slices.register_additional_items(level+doors+agents)
# Additional Slices from SubDomains
if additional_slices := self.additional_slices:
state_slices.register_additional_items(additional_slices)
return state_slices
def _init_obs_cube(self) -> np.ndarray:
x, y = self._slices.by_enum(c.LEVEL).shape
state = np.zeros((len(self._slices), x, y))
state[0] = self._slices.by_enum(c.LEVEL).slice
if r := self.pomdp_radius:
self._padded_obs_cube = np.full((len(self._slices), x + r*2, y + r*2), c.FREE_CELL.value)
self._padded_obs_cube[0] = c.OCCUPIED_CELL.value
self._padded_obs_cube[:, r:r+x, r:r+y] = state
return state
def _init_entities(self):
# Tile Init
self._tiles = FloorTiles.from_argwhere_coordinates(self._slices.by_enum(c.LEVEL).free_tiles)
# Door Init
if self.parse_doors:
tiles = [self._tiles.by_pos(x) for x in self._slices.by_enum(c.DOORS).occupied_tiles]
self._doors = Doors.from_tiles(tiles, context=self._tiles)
# Agent Init on random positions
self._agents = Agents.from_tiles(np.random.choice(self._tiles, self.n_agents))
entities = Entities()
entities.register_additional_items([self._agents])
if self.parse_doors:
entities.register_additional_items([self._doors])
if additional_entities := self.additional_entities:
entities.register_additional_items([additional_entities])
return entities
def reset(self) -> (np.ndarray, int, bool, dict):
self._slices = self._init_state_slices()
self._obs_cube = self._init_obs_cube()
self._entitites = self._init_entities()
self._flush_state()
self._steps = 0
info = self._summarize_state() if self.record_episodes else {}
return None, None, None, info
def pre_step(self) -> None:
pass
def post_step(self) -> dict:
pass
def step(self, actions):
actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
self._steps += 1
done = False
# Pre step Hook for later use
self.pre_step()
# Move this in a seperate function?
for action, agent in zip(actions, self._agents):
agent.clear_temp_sate()
action_name = self._actions[action]
if self._actions.is_moving_action(action):
valid = self._move_or_colide(agent, action_name)
elif self._actions.is_no_op(action):
valid = c.VALID.value
elif self._actions.is_door_usage(action):
# Check if agent raly stands on a door:
if door := self._doors.by_pos(agent.pos):
door.use()
valid = c.VALID.value
# When he doesn't...
else:
valid = c.NOT_VALID.value
else:
valid = self.do_additional_actions(agent, action)
agent.temp_action = action
agent.temp_valid = valid
self._flush_state()
tiles_with_collisions = self.get_all_tiles_with_collisions()
for tile in tiles_with_collisions:
guests = tile.guests_that_can_collide
for i, guest in enumerate(guests):
this_collisions = guests[:]
del this_collisions[i]
guest.temp_collisions = this_collisions
if self.done_at_collision and tiles_with_collisions:
done = True
# Step the door close intervall
if self.parse_doors:
self._doors.tick_doors()
# Finalize
reward, info = self.calculate_reward()
if self._steps >= self.max_steps:
done = True
info.update(step_reward=reward, step=self._steps)
if self.record_episodes:
info.update(self._summarize_state())
# Post step Hook for later use
info.update(self.post_step())
obs = self._get_observations()
return obs, reward, done, info
def _flush_state(self):
self._obs_cube[np.arange(len(self._slices)) != self._slices.get_idx(c.LEVEL)] = c.FREE_CELL.value
if self.parse_doors:
for door in self._doors:
if door.is_open:
self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.IS_OPEN_DOOR.value
else:
self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.IS_CLOSED_DOOR.value
for agent in self._agents:
self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.pos] = c.OCCUPIED_CELL.value
if agent.last_pos != h.NO_POS:
self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.last_pos] = c.FREE_CELL.value
def _get_observations(self) -> np.ndarray:
if self.n_agents == 1:
obs = self._build_per_agent_obs(self._agents[0])
elif self.n_agents >= 2:
obs = np.stack([self._build_per_agent_obs(agent) for agent in self._agents])
else:
raise ValueError('n_agents cannot be smaller than 1!!')
return obs
def _build_per_agent_obs(self, agent: Agent) -> np.ndarray:
first_agent_slice = self._slices.AGENTSTARTIDX
if r := self.pomdp_radius:
x, y = self._level_shape
self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube
global_x, global_y = agent.pos
global_x += r
global_y += r
x0, x1 = max(0, global_x - self.pomdp_radius), global_x + self.pomdp_radius + 1
y0, y1 = max(0, global_y - self.pomdp_radius), global_y + self.pomdp_radius + 1
obs = self._padded_obs_cube[:, x0:x1, y0:y1]
else:
obs = self._obs_cube
if self.omit_agent_slice_in_obs:
obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val]]
return obs_new
else:
if self.combin_agent_slices_in_obs:
agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name]],
axis=0, keepdims=True)
obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
return obs
else:
return obs
def do_additional_actions(self, agent_i: int, action: int) -> bool:
raise NotImplementedError
def get_all_tiles_with_collisions(self) -> List[Tile]:
tiles_with_collisions = list()
for tile in self._tiles:
if tile.is_occupied():
guests = [guest for guest in tile.guests if guest.can_collide]
if len(guests) >= 2:
tiles_with_collisions.append(tile)
return tiles_with_collisions
def _move_or_colide(self, agent: Agent, action: Action) -> Constants:
new_tile, valid = self._check_agent_move(agent, action)
if valid:
# Does not collide width level boundaries
return agent.move(new_tile)
else:
# Agent seems to be trying to collide in this step
return c.NOT_VALID
def _check_agent_move(self, agent, action: Action) -> (Tile, bool):
# Actions
x_diff, y_diff = h.ACTIONMAP[action.name]
x_new = agent.x + x_diff
y_new = agent.y + y_diff
new_tile = self._tiles.by_pos((x_new, y_new))
if new_tile:
valid = c.VALID
else:
tile = agent.tile
valid = c.VALID
return tile, valid
if self.parse_doors and agent.last_pos != h.NO_POS:
if door := self._doors.by_pos(agent.pos):
if door.is_open:
pass
else: # door.is_closed:
if door.is_linked(agent.last_pos, new_tile.pos):
pass
else:
return agent.tile, c.NOT_VALID
else:
pass
else:
pass
return new_tile, valid
def calculate_reward(self) -> (int, dict):
# Returns: Reward, Info
raise NotImplementedError
def render(self, mode='human'):
raise NotImplementedError
def save_params(self, filepath: Path):
# noinspection PyProtectedMember
# d = {key: val._asdict() if hasattr(val, '_asdict') else val for key, val in self.__dict__.items()
d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')}
filepath.parent.mkdir(parents=True, exist_ok=True)
with filepath.open('w') as f:
yaml.dump(d, f)
# pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL)
def _summarize_state(self):
summary = {f'{REC_TAC}_step': self._steps}
for entity in self._entitites:
if hasattr(entity, 'summarize_state'):
summary.update({f'{REC_TAC}_{entity.name}': entity.summarize_state()})
return summary

View File

@ -0,0 +1,266 @@
import itertools
import networkx as nx
import numpy as np
from environments import helpers as h
from environments.helpers import Constants as c
import itertools
def sub(p, q):
return p - q
class Object:
def __bool__(self):
return True
@property
def i(self):
return self._identifier
@property
def name(self):
return self._identifier
def __init__(self, identifier, **kwargs):
self._identifier = identifier
if kwargs:
print(f'Following kwargs were passed, but ignored: {kwargs}')
def __repr__(self):
return f'{self.__class__.__name__}({self._identifier})'
class Action(Object):
@property
def name(self):
return self.i
def __init__(self, *args):
super(Action, self).__init__(*args)
class Slice(Object):
@property
def shape(self):
return self.slice.shape
@property
def occupied_tiles(self):
return np.argwhere(self.slice == c.OCCUPIED_CELL.value)
@property
def free_tiles(self):
return np.argwhere(self.slice == c.FREE_CELL.value)
def __init__(self, identifier, arrayslice):
super(Slice, self).__init__(identifier)
self.slice = arrayslice
class Wall(Object):
pass
class Tile(Object):
@property
def guests_that_can_collide(self):
return [x for x in self.guests if x.can_collide]
@property
def guests(self):
return self._guests.values()
@property
def x(self):
return self.pos[0]
@property
def y(self):
return self.pos[1]
@property
def pos(self):
return self._pos
def __init__(self, i, pos):
super(Tile, self).__init__(i)
self._guests = dict()
self._pos = tuple(pos)
def __len__(self):
return len(self._guests)
def is_empty(self):
return not len(self._guests)
def is_occupied(self):
return len(self._guests)
def enter(self, guest):
if guest.name not in self._guests:
self._guests.update({guest.name: guest})
return True
else:
return False
def leave(self, guest):
try:
del self._guests[guest.name]
except (ValueError, KeyError):
return False
return True
class Entity(Object):
@property
def can_collide(self):
return True
@property
def encoding(self):
return 1
@property
def x(self):
return self.pos[0]
@property
def y(self):
return self.pos[1]
@property
def pos(self):
return self._tile.pos
@property
def tile(self):
return self._tile
def __init__(self, identifier, tile: Tile, **kwargs):
super(Entity, self).__init__(identifier, **kwargs)
self._tile = tile
def summarize_state(self):
return self.__dict__.copy()
class MoveableEntity(Entity):
@property
def last_tile(self):
return self._last_tile
@property
def last_pos(self):
if self._last_tile:
return self._last_tile.pos
else:
return h.NO_POS
@property
def direction_of_view(self):
last_x, last_y = self.last_pos
curr_x, curr_y = self.pos
return last_x-curr_x, last_y-curr_y
def __init__(self, *args, **kwargs):
super(MoveableEntity, self).__init__(*args, **kwargs)
self._last_tile = None
def move(self, next_tile):
curr_tile = self.tile
if curr_tile != next_tile:
next_tile.enter(self)
curr_tile.leave(self)
self._tile = next_tile
self._last_tile = curr_tile
return True
else:
return False
class Door(Entity):
@property
def can_collide(self):
return False
@property
def encoding(self):
return 1 if self.is_closed else -1
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=500):
super(Door, self).__init__(*args)
self._state = c.IS_CLOSED_DOOR
self.auto_close_interval = auto_close_interval
self.time_to_close = -1
neighbor_pos = list(itertools.product([-1, 1, 0], repeat=2))[:-1]
neighbor_tiles = [context.by_pos(tuple([sum(x) for x in zip(self.pos, diff)])) for diff in neighbor_pos]
neighbor_pos = [x.pos for x in neighbor_tiles if x]
possible_connections = itertools.combinations(neighbor_pos, 2)
self.connectivity = nx.Graph()
for a, b in possible_connections:
if not max(abs(np.subtract(a, b))) > 1:
self.connectivity.add_edge(a, b)
if not closed_on_init:
self._open()
@property
def is_closed(self):
return self._state == c.IS_CLOSED_DOOR
@property
def is_open(self):
return self._state == c.IS_OPEN_DOOR
@property
def status(self):
return self._state
def use(self):
if self._state == c.IS_OPEN_DOOR:
self._close()
else:
self._open()
def tick(self):
if self.is_open and len(self.tile) == 1 and self.time_to_close:
self.time_to_close -= 1
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
self.use()
def _open(self):
self.connectivity.add_edges_from([(self.pos, x) for x in self.connectivity.nodes])
self._state = c.IS_OPEN_DOOR
self.time_to_close = self.auto_close_interval
def _close(self):
self.connectivity.remove_node(self.pos)
self._state = c.IS_CLOSED_DOOR
def is_linked(self, old_pos, new_pos):
try:
_ = nx.shortest_path(self.connectivity, old_pos, new_pos)
return True
except nx.exception.NetworkXNoPath:
return False
class Agent(MoveableEntity):
def __init__(self, *args):
super(Agent, self).__init__(*args)
self.clear_temp_sate()
# noinspection PyAttributeOutsideInit
def clear_temp_sate(self):
self.temp_collisions = []
self.temp_valid = None
self.temp_action = -1

View File

@ -0,0 +1,292 @@
import itertools
import random
from enum import Enum
from typing import List, Union
import networkx as nx
import numpy as np
from environments.factory.base.objects import Entity, Tile, Agent, Door, Slice, Action
from environments.utility_classes import MovementProperties
from environments import helpers as h
from environments.helpers import Constants as c
class Register:
_accepted_objects = Entity
@classmethod
def from_argwhere_coordinates(cls, positions: (int, int), tiles):
entities = [cls._accepted_objects(i, tiles.by_pos(position)) for i, position in enumerate(positions)]
registered_obj = cls()
registered_obj.register_additional_items(entities)
return registered_obj
@property
def name(self):
return self.__class__.__name__
@property
def n(self):
return len(self)
def __init__(self):
self._register = dict()
self._names = dict()
def __len__(self):
return len(self._register)
def __iter__(self):
return iter(self.values())
def __add__(self, other: _accepted_objects):
assert isinstance(other, self._accepted_objects), f'All item names have to be of type ' \
f'{self._accepted_objects}, ' \
f'but were {other.__class__}.,'
self._names.update({other.name: len(self._register)})
self._register.update({len(self._register): other})
return self
def register_additional_items(self, others: List[_accepted_objects]):
for other in others:
self + other
return self
def keys(self):
return self._register.keys()
def values(self):
return self._register.values()
def items(self):
return self._register.items()
def __getitem__(self, item):
try:
return self._register[item]
except KeyError:
print('NO')
raise
def by_name(self, item):
return self[self._names[item]]
def by_enum(self, enum: Enum):
return self[self._names[enum.name]]
def __repr__(self):
return f'{self.__class__.__name__}({self._register})'
def get_name(self, item):
return self._register[item].name
def get_idx_by_name(self, item):
return self._names[item]
def get_idx(self, enum: Enum):
return self._names[enum.name]
@classmethod
def from_tiles(cls, tiles, **kwargs):
entities = [cls._accepted_objects(f'{cls._accepted_objects.__name__.upper()}#{i}', tile, **kwargs)
for i, tile in enumerate(tiles)]
registered_obj = cls()
registered_obj.register_additional_items(entities)
return registered_obj
class EntityRegister(Register):
@classmethod
def from_argwhere_coordinates(cls, argwhere_coordinates):
tiles = cls()
tiles.register_additional_items([cls._accepted_objects(i, pos) for i, pos in enumerate(argwhere_coordinates)])
return tiles
def __init__(self):
super(EntityRegister, self).__init__()
self._tiles = dict()
def __add__(self, other):
super(EntityRegister, self).__add__(other)
self._tiles[other.pos] = other
def by_pos(self, pos):
if isinstance(pos, np.ndarray):
pos = tuple(pos)
try:
return self._tiles[pos]
except KeyError:
return None
class Entities(Register):
_accepted_objects = Register
def __init__(self):
super(Entities, self).__init__()
def __iter__(self):
return iter([x for sublist in self.values() for x in sublist])
@classmethod
def from_argwhere_coordinates(cls, positions):
raise AttributeError()
class FloorTiles(EntityRegister):
_accepted_objects = Tile
@property
def occupied_tiles(self):
tiles = [tile for tile in self if tile.is_occupied()]
random.shuffle(tiles)
return tiles
@property
def empty_tiles(self):
tiles = [tile for tile in self if tile.is_empty()]
random.shuffle(tiles)
return tiles
class Agents(Register):
_accepted_objects = Agent
@property
def positions(self):
return [agent.pos for agent in self]
class Doors(EntityRegister):
_accepted_objects = Door
def tick_doors(self):
for door in self:
door.tick()
class Actions(Register):
_accepted_objects = Action
@property
def movement_actions(self):
return self._movement_actions
def __init__(self, movement_properties: MovementProperties, can_use_doors=False):
self.allow_no_op = movement_properties.allow_no_op
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
self.allow_square_movement = movement_properties.allow_square_movement
self.can_use_doors = can_use_doors
super(Actions, self).__init__()
if self.allow_square_movement:
self.register_additional_items([self._accepted_objects(direction) for direction in h.MANHATTAN_MOVES])
if self.allow_diagonal_movement:
self.register_additional_items([self._accepted_objects(direction) for direction in h.DIAGONAL_MOVES])
self._movement_actions = self._register.copy()
if self.can_use_doors:
self.register_additional_items([self._accepted_objects('use_door')])
if self.allow_no_op:
self.register_additional_items([self._accepted_objects('no-op')])
def is_moving_action(self, action: Union[int]):
#if isinstance(action, Action):
# return (action.name in h.MANHATTAN_MOVES and self.allow_square_movement) or \
# (action.name in h.DIAGONAL_MOVES and self.allow_diagonal_movement)
#else:
return action in self.movement_actions.keys()
def is_no_op(self, action: Union[str, int]):
if isinstance(action, str):
action = self.by_name(action)
return self[action].name == 'no-op'
def is_door_usage(self, action: Union[str, int]):
if isinstance(action, str):
action = self.by_name(action)
return self[action].name == 'use_door'
class StateSlices(Register):
_accepted_objects = Slice
@property
def AGENTSTARTIDX(self):
if self._agent_start_idx:
return self._agent_start_idx
else:
self._agent_start_idx = min([idx for idx, x in self.items() if c.AGENT.name in x.name])
return self._agent_start_idx
def __init__(self):
super(StateSlices, self).__init__()
self._agent_start_idx = None
def _gather_occupation(self, excluded_slices):
exclusion = excluded_slices or []
assert isinstance(exclusion, (int, list))
exclusion = exclusion if isinstance(exclusion, list) else [exclusion]
result = np.sum([x for i, x in self.items() if i not in exclusion], axis=0)
return result
def free_cells(self, excluded_slices: Union[None, List[int], int] = None) -> np.array:
occupation = self._gather_occupation(excluded_slices)
free_cells = np.argwhere(occupation == c.IS_FREE_CELL)
np.random.shuffle(free_cells)
return free_cells
def occupied_cells(self, excluded_slices: Union[None, List[int], int] = None) -> np.array:
occupation = self._gather_occupation(excluded_slices)
occupied_cells = np.argwhere(occupation == c.IS_OCCUPIED_CELL.value)
np.random.shuffle(occupied_cells)
return occupied_cells
class Zones(Register):
@property
def danger_zone(self):
return self._zone_slices[self.by_enum(c.DANGER_ZONE)]
@property
def accounting_zones(self):
return [self[idx] for idx, name in self.items() if name != c.DANGER_ZONE.value]
def __init__(self, parsed_level):
raise NotImplementedError('This needs a Rework')
super(Zones, self).__init__()
slices = list()
self._accounting_zones = list()
self._danger_zones = list()
for symbol in np.unique(parsed_level):
if symbol == h.WALL:
continue
elif symbol == h.DANGER_ZONE:
self + symbol
slices.append(h.one_hot_level(parsed_level, symbol))
self._danger_zones.append(symbol)
else:
self + symbol
slices.append(h.one_hot_level(parsed_level, symbol))
self._accounting_zones.append(symbol)
self._zone_slices = np.stack(slices)
def __getitem__(self, item):
return self._zone_slices[item]
def get_name(self, item):
return self._register[item]
def by_name(self, item):
return self[super(Zones, self).by_name(item)]
def register_additional_items(self, other: Union[str, List[str]]):
raise AttributeError('You are not allowed to add additional Zones in runtime.')

View File

@ -1,364 +0,0 @@
from pathlib import Path
from typing import List, Union, Iterable
import gym
import numpy as np
from gym import spaces
import yaml
from gym.wrappers import FrameStack
from environments import helpers as h
from environments.utility_classes import Actions, StateSlices, AgentState, MovementProperties, Zones, DoorState
# noinspection PyAttributeOutsideInit
class BaseFactory(gym.Env):
@property
def action_space(self):
return spaces.Discrete(self._actions.n)
@property
def observation_space(self):
agent_slice = self.n_agents if self.omit_agent_slice_in_obs else 0
agent_slice = (self.n_agents - 1) if self.combin_agent_slices_in_obs else agent_slice
if self.pomdp_radius:
shape = (self._state.shape[0] - agent_slice, self.pomdp_radius * 2 + 1, self.pomdp_radius * 2 + 1)
space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
return space
else:
shape = [x-agent_slice if idx == 0 else x for idx, x in enumerate(self._state.shape)]
space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
return space
@property
def movement_actions(self):
return self._actions.movement_actions
@property
def has_doors(self):
return hasattr(self, '_doors')
def __enter__(self):
return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_radius: Union[None, int] = 0,
movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
combin_agent_slices_in_obs: bool = False, frames_to_stack=0,
omit_agent_slice_in_obs=False, **kwargs):
assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
(not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
'Both options are exclusive'
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
self.movement_properties = movement_properties
self.level_name = level_name
self.n_agents = n_agents
self.max_steps = max_steps
self.pomdp_radius = pomdp_radius
self.combin_agent_slices_in_obs = combin_agent_slices_in_obs
self.omit_agent_slice_in_obs = omit_agent_slice_in_obs
self.frames_to_stack = frames_to_stack
self.done_at_collision = False
self._state_slices = StateSlices()
# Level
level_filepath = Path(__file__).parent / h.LEVELS_DIR / f'{self.level_name}.txt'
parsed_level = h.parse_level(level_filepath)
self._level = h.one_hot_level(parsed_level)
level_slices = [h.LEVEL]
# Doors
if parse_doors:
parsed_doors = h.one_hot_level(parsed_level, h.DOOR)
if parsed_doors.any():
self._doors = parsed_doors
level_slices.append(h.DOORS)
# Agents
offset = len(level_slices)
self._state_slices.register_additional_items([*level_slices,
*[f'agent#{i}' for i in range(offset, n_agents + offset)]])
# Additional Slices from SubDomains
if 'additional_slices' in kwargs:
self._state_slices.register_additional_items(kwargs.get('additional_slices'))
self._zones = Zones(parsed_level)
self._actions = Actions(self.movement_properties, can_use_doors=self.has_doors)
self._actions.register_additional_items(self.additional_actions)
self.reset()
@property
def additional_actions(self) -> Union[str, List[str]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
Please return a dict with the given types -> {int: str}.
The int should start at 0.
:return: An Actions-object holding all actions with keys in range 0-n.
:rtype: Actions
"""
raise NotImplementedError('Please register additional actions ')
def reset(self) -> (np.ndarray, int, bool, dict):
slices = [np.expand_dims(self._level, 0)]
self._steps = 0
self._agent_states = list()
# Door Init
if self.has_doors:
self._door_states = [DoorState(i, tuple(pos)) for i, pos
in enumerate(np.argwhere(self._doors == h.IS_OCCUPIED_CELL))]
slices.append(np.expand_dims(self._doors, 0))
# Agent placement ...
floor_tiles = np.argwhere(self._level == h.IS_FREE_CELL)
# ... on random positions
np.random.shuffle(floor_tiles)
agents = np.zeros((self.n_agents, *self._level.shape), dtype=np.int8)
for i, (x, y) in enumerate(floor_tiles[:self.n_agents]):
agents[i, x, y] = h.IS_OCCUPIED_CELL
agent_state = AgentState(i, -1, pos=(x, y))
self._agent_states.append(agent_state)
slices.append(agents)
# GLOBAL STATE
self._state = np.concatenate(slices, axis=0)
return None
def _get_observations(self) -> np.ndarray:
if self.n_agents == 1:
obs = self._build_per_agent_obs(0)
elif self.n_agents >= 2:
obs = np.stack([self._build_per_agent_obs(agent_i) for agent_i in range(self.n_agents)])
else:
raise ValueError('n_agents cannot be smaller than 1!!')
return obs
def _build_per_agent_obs(self, agent_i: int) -> np.ndarray:
first_agent_slice = self._state_slices.AGENTSTARTIDX
# Todo: make this more efficient!
if self.pomdp_radius:
pomdp_diameter = self.pomdp_radius * 2 + 1
global_x, global_y = self._agent_states[agent_i].pos
x0, x1 = max(0, global_x - self.pomdp_radius), global_x + self.pomdp_radius + 1
y0, y1 = max(0, global_y - self.pomdp_radius), global_y + self.pomdp_radius + 1
obs = self._state[:, x0:x1, y0:y1]
if obs.shape[1] != pomdp_diameter or obs.shape[2] != pomdp_diameter:
obs_padded = np.full((obs.shape[0], pomdp_diameter, pomdp_diameter), h.IS_OCCUPIED_CELL)
local_x, local_y = np.argwhere(obs[first_agent_slice + agent_i] == h.IS_OCCUPIED_CELL)[0]
obs_padded[:,
abs(local_x-self.pomdp_radius):abs(local_x-self.pomdp_radius)+obs.shape[1],
abs(local_y-self.pomdp_radius):abs(local_y-self.pomdp_radius)+obs.shape[2]] = obs
obs = obs_padded
else:
obs = self._state
if self.omit_agent_slice_in_obs:
obs_new = obs[[key for key, val in self._state_slices.items() if h.AGENT not in val]]
return obs_new
else:
if self.combin_agent_slices_in_obs:
agent_obs = np.sum(obs[[key for key, val in self._state_slices.items() if 'agent' in val]],
axis=0, keepdims=True)
obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
return obs
else:
return obs
def do_additional_actions(self, agent_i: int, action: int) -> ((int, int), bool):
raise NotImplementedError
def step(self, actions):
actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
self._steps += 1
done = False
# Move this in a seperate function?
for agent_i, action in enumerate(actions):
agent = self._agent_states[agent_i]
if self._actions.is_moving_action(action):
pos, valid = self.move_or_colide(agent_i, action)
elif self._actions.is_no_op(action):
pos, valid = agent.pos, h.VALID
elif self._actions.is_door_usage(action):
# Check if agent raly stands on a door:
if self._state[self._state_slices.by_name(h.DOORS)][agent.pos] in [h.IS_OCCUPIED_CELL, ]:
door = [door for door in self._door_states if door.pos == self._agent_states[agent_i].pos][0]
door.use()
pos, valid = self._agent_states[agent_i].pos, h.VALID
# When he doesn't...
else:
pos, valid = self._agent_states[agent_i].pos, h.NOT_VALID
else:
pos, valid = self.do_additional_actions(agent_i, action)
# Update state accordingly
self._agent_states[agent_i].update(pos=pos, action_valid=valid, action=action)
for i, collision_vec in enumerate(self.check_all_collisions(self._agent_states, self._state.shape[0])):
self._agent_states[i].update(collision_vector=collision_vec)
if self.done_at_collision and collision_vec.any():
done = True
# Step the door close intervall
agents_pos = [agent.pos for agent in self._agent_states]
if self.has_doors:
for door_i, door in enumerate(self._door_states):
if door.is_open and door.time_to_close and door.pos not in agents_pos:
door.time_to_close -= 1
elif door.is_open and not door.time_to_close and door.pos not in agents_pos:
door.use()
self._state[self._state_slices.by_name(h.DOORS)] = 1 if door.is_closed else -1
reward, info = self.calculate_reward(self._agent_states)
if self._steps >= self.max_steps:
done = True
info.update(step_reward=reward, step=self._steps)
return None, reward, done, info
def check_all_collisions(self, agent_states: List[AgentState], collisions: int) -> np.ndarray:
collision_vecs = np.zeros((len(agent_states), collisions)) # n_agents x n_slices
for agent_state in agent_states:
# Register only collisions of moving agents
if self._actions.is_moving_action(agent_state.action):
collision_vecs[agent_state.i] = self.check_collisions(agent_state)
return collision_vecs
def check_collisions(self, agent_state: AgentState) -> np.ndarray:
pos_x, pos_y = agent_state.pos
# FixMe: We need to find a way to spare out some dimensions, eg. an info dimension etc... a[?,]
# https://numpy.org/doc/stable/reference/arrays.indexing.html#boolean-array-indexing
collisions_vec = self._state[:, pos_x, pos_y].copy() # "vertical fiber" at position of agent i
collisions_vec[self._state_slices.AGENTSTARTIDX + agent_state.i] = h.IS_FREE_CELL # no self-collisions
if 'door' in self._state_slices.values():
collisions_vec[self._state_slices.by_name('doors')] = h.IS_FREE_CELL # no door-collisions
if agent_state.action_valid:
# All well, no collision.
# Place a function hook here if needed.
pass
else:
# Place a marker to indicate a collision with the level boundrys
collisions_vec[self._state_slices.by_name(h.LEVEL)] = h.IS_OCCUPIED_CELL
return collisions_vec
def do_move(self, agent_i: int, old_pos: (int, int), new_pos: (int, int)) -> None:
(x, y), (x_new, y_new) = old_pos, new_pos
self._state[agent_i + self._state_slices.AGENTSTARTIDX, x, y] = h.IS_FREE_CELL
self._state[agent_i + self._state_slices.AGENTSTARTIDX, x_new, y_new] = h.IS_OCCUPIED_CELL
def move_or_colide(self, agent_i: int, action: int) -> ((int, int), bool):
old_pos, new_pos, valid = self._check_agent_move(agent_i=agent_i, action=self._actions[action])
if valid:
# Does not collide width level boundaries
self.do_move(agent_i, old_pos, new_pos)
return new_pos, valid
else:
# Agent seems to be trying to collide in this step
return old_pos, valid
def _check_agent_move(self, agent_i, action: str):
agent_slice_idx = self._state_slices.AGENTSTARTIDX + agent_i
agent_slice = self._state[agent_slice_idx] # horizontal slice from state tensor
agent_pos = np.argwhere(agent_slice == 1)
if len(agent_pos) > 1:
raise AssertionError('Only one agent per slice is allowed.')
x, y = agent_pos[0]
# Actions
x_diff, y_diff = h.ACTIONMAP[action]
x_new = x + x_diff
y_new = y + y_diff
if self.has_doors and self._agent_states[agent_i]._last_pos != (-1, -1):
door = [door for door in self._door_states if door.pos == (x, y)]
if door:
door = door[0]
if door.is_open:
pass
else: # door.is_closed:
local_door_map = self._state[self._state_slices.by_name(h.LEVEL)][door.pos[0]-1:door.pos[0]+2,
door.pos[1]-1:door.pos[1]+2]
local_agent_map = np.zeros_like(local_door_map)
local_agent_map[tuple(np.subtract(door.pos, self._agent_states[agent_i]._last_pos))] += 1
local_agent_map[tuple(np.subtract(door.pos, (x_new, y_new)))] += 1
if np.all(local_door_map == h.HORIZONTAL_DOOR_MAP):
# This is a horizontal Door Configuration
if np.sum(local_agent_map[0]) >= 2 or np.sum(local_agent_map[-1]) >= 2:
# The Agent goes back to where he came from
pass
else:
# The Agent tries to go through a closed door
return (x, y), (x, y), h.NOT_VALID
else:
# This is a vertical Door Configuration
if np.sum(local_agent_map[:, 0]) >= 2 or np.sum(local_agent_map[:, -1]) >= 2:
# The Agent goes back to where he came from
pass
else:
# The Agent tries to go through a closed door
return (x, y), (x, y), h.NOT_VALID
else:
pass
else:
pass
valid = h.check_position(self._state[self._state_slices.by_name(h.LEVEL)], (x_new, y_new))
return (x, y), (x_new, y_new), valid
def agent_i_position(self, agent_i: int) -> (int, int):
positions = np.argwhere(self._state[self._state_slices.AGENTSTARTIDX + agent_i] == h.IS_OCCUPIED_CELL)
assert positions.shape[0] == 1
pos_x, pos_y = positions[0] # a.flatten()
return pos_x, pos_y
def free_cells(self, excluded_slices: Union[None, List[int], int] = None) -> np.array:
excluded_slices = excluded_slices or []
assert isinstance(excluded_slices, (int, list))
excluded_slices = excluded_slices if isinstance(excluded_slices, list) else [excluded_slices]
state = self._state
if excluded_slices:
# Todo: Is there a cleaner way?
# inds = list(range(self._state.shape[0]))
# excluded_slices = [inds[x] if x < 0 else x for x in excluded_slices]
# state = self._state[[x for x in inds if x not in excluded_slices]]
# Yes there is!
bool_array = np.full(self._state.shape[0], True)
bool_array[excluded_slices] = False
state = self._state[bool_array]
free_cells = np.argwhere(state.sum(0) == h.IS_FREE_CELL)
np.random.shuffle(free_cells)
return free_cells
def calculate_reward(self, agent_states: List[AgentState]) -> (int, dict):
# Returns: Reward, Info
raise NotImplementedError
def render(self, mode='human'):
raise NotImplementedError
def save_params(self, filepath: Path):
# noinspection PyProtectedMember
# d = {key: val._asdict() if hasattr(val, '_asdict') else val for key, val in self.__dict__.items()
d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')}
filepath.parent.mkdir(parents=True, exist_ok=True)
with filepath.open('w') as f:
yaml.dump(d, f)
# pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL)

View File

@ -53,7 +53,7 @@ class Renderer:
def blit_params(self, entity): def blit_params(self, entity):
r, c = entity.pos r, c = entity.pos
img = self.assets[entity.name] img = self.assets[entity.name.lower()]
if entity.value_operation == 'opacity': if entity.value_operation == 'opacity':
img.set_alpha(255*entity.value) img.set_alpha(255*entity.value)
elif entity.value_operation == 'scale': elif entity.value_operation == 'scale':

View File

@ -3,13 +3,17 @@ import random
import numpy as np import numpy as np
from environments.factory.base_factory import BaseFactory
from environments import helpers as h from environments import helpers as h
from environments.helpers import Constants as c
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action, Object, Slice
from environments.factory.base.registers import Entities
from environments.factory.renderer import Renderer, Entity from environments.factory.renderer import Renderer, Entity
from environments.utility_classes import AgentState, MovementProperties from environments.utility_classes import MovementProperties
DIRT_INDEX = -1 DIRT = "dirt"
CLEAN_UP_ACTION = 'clean_up' CLEAN_UP_ACTION = 'clean_up'
@ -26,95 +30,104 @@ class DirtProperties(NamedTuple):
class SimpleFactory(BaseFactory): class SimpleFactory(BaseFactory):
@property @property
def additional_actions(self) -> List[str]: def additional_actions(self) -> List[Object]:
return [CLEAN_UP_ACTION] return [Action(CLEAN_UP_ACTION)]
@property
def additional_entities(self) -> Union[Entities, List[Entities]]:
return []
@property
def additional_slices(self) -> List[Slice]:
return [Slice('dirt', np.zeros(self._level_shape))]
def _is_clean_up_action(self, action: Union[str, int]): def _is_clean_up_action(self, action: Union[str, int]):
if isinstance(action, str): if isinstance(action, str):
action = self._actions.by_name(action) action = self._actions.by_name(action)
return self._actions[action] == CLEAN_UP_ACTION return self._actions[action].name == CLEAN_UP_ACTION
def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), verbose=False, **kwargs): def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), verbose=False, **kwargs):
self.dirt_properties = dirt_properties self.dirt_properties = dirt_properties
self.verbose = verbose self.verbose = verbose
self.max_dirt = 20
self._renderer = None # expensive - don't use it when not required ! self._renderer = None # expensive - don't use it when not required !
super(SimpleFactory, self).__init__(*args, additional_slices=['dirt'], **kwargs) super(SimpleFactory, self).__init__(*args, **kwargs)
def _flush_state(self):
super(SimpleFactory, self)._flush_state()
self._obs_cube[self._slices.get_idx_by_name(DIRT)] = self._slices.by_name(DIRT).slice
def render(self, mode='human'): def render(self, mode='human'):
if not self._renderer: # lazy init if not self._renderer: # lazy init
height, width = self._state.shape[1:] height, width = self._obs_cube.shape[1:]
self._renderer = Renderer(width, height, view_radius=self.pomdp_radius, fps=5) self._renderer = Renderer(width, height, view_radius=self.pomdp_radius, fps=5)
dirt_slice = self._slices.by_name(DIRT).slice
dirt = [Entity('dirt', [x, y], min(0.15 + self._state[DIRT_INDEX, x, y], 1.5), 'scale') dirt = [Entity('dirt', tile.pos, min(0.15 + dirt_slice[tile.pos], 1.5), 'scale')
for x, y in np.argwhere(self._state[DIRT_INDEX] > h.IS_FREE_CELL)] for tile in [tile for tile in self._tiles if dirt_slice[tile.pos]]]
walls = [Entity('wall', pos) walls = [Entity('wall', pos)
for pos in np.argwhere(self._state[self._state_slices.by_name(h.LEVEL)] > h.IS_FREE_CELL)] for pos in np.argwhere(self._slices.by_enum(c.LEVEL).slice == c.OCCUPIED_CELL.value)]
def asset_str(agent): def asset_str(agent):
if any([x is None for x in [self._state_slices[j] for j in agent.collisions]]): # What does this abonimation do?
print('error') # if any([x is None for x in [self._slices[j] for j in agent.collisions]]):
cols = ' '.join([self._state_slices[j] for j in agent.collisions]) # print('error')
if h.AGENT in cols: col_names = [x.name for x in agent.temp_collisions]
if c.AGENT.value in col_names:
return 'agent_collision', 'blank' return 'agent_collision', 'blank'
elif not agent.action_valid or 'level' in cols or h.AGENT in cols: elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names:
return h.AGENT, 'invalid' return c.AGENT.value, 'invalid'
elif self._is_clean_up_action(agent.action): elif self._is_clean_up_action(agent.temp_action):
return h.AGENT, 'valid' return c.AGENT.value, 'valid'
else: else:
return h.AGENT, 'idle' return c.AGENT.value, 'idle'
agents = [] agents = []
for i, agent in enumerate(self._agent_states): for i, agent in enumerate(self._agents):
name, state = asset_str(agent) name, state = asset_str(agent)
agents.append(Entity(name, agent.pos, 1, 'none', state, i+1)) agents.append(Entity(name, agent.pos, 1, 'none', state, i+1))
doors = [] doors = []
if self.has_doors: if self.parse_doors:
for i, door in enumerate(self._door_states): for i, door in enumerate(self._doors):
name, state = 'door_open' if door.is_open else 'door_closed', 'blank' name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
agents.append(Entity(name, door.pos, 1, 'none', state, i+1)) agents.append(Entity(name, door.pos, 1, 'none', state, i+1))
self._renderer.render(dirt+walls+agents+doors) self._renderer.render(dirt+walls+agents+doors)
def spawn_dirt(self) -> None: def spawn_dirt(self) -> None:
if not np.argwhere(self._state[DIRT_INDEX] != h.IS_FREE_CELL).shape[0] > self.dirt_properties.max_global_amount: dirt_slice = self._slices.by_name(DIRT).slice
free_for_dirt = self.free_cells(excluded_slices=DIRT_INDEX) # dirty_tiles = [tile for tile in self._tiles if dirt_slice[tile.pos]]
curr_dirt_amount = dirt_slice.sum()
if not curr_dirt_amount > self.dirt_properties.max_global_amount:
free_for_dirt = self._tiles.empty_tiles
# randomly distribute dirt across the grid # randomly distribute dirt across the grid
n_dirt_tiles = int(random.uniform(0, self.dirt_properties.max_spawn_ratio) * len(free_for_dirt)) n_dirt_tiles = int(random.uniform(0, self.dirt_properties.max_spawn_ratio) * len(free_for_dirt))
for x, y in free_for_dirt[:n_dirt_tiles]: for tile in free_for_dirt[:n_dirt_tiles]:
new_value = self._state[DIRT_INDEX, x, y] + self.dirt_properties.gain_amount new_value = dirt_slice[tile.pos] + self.dirt_properties.gain_amount
self._state[DIRT_INDEX, x, y] = max(new_value, self.dirt_properties.max_local_amount) dirt_slice[tile.pos] = min(new_value, self.dirt_properties.max_local_amount)
else: else:
pass pass
def clean_up(self, pos: (int, int)) -> ((int, int), bool): def clean_up(self, agent: Agent) -> bool:
new_dirt_amount = self._state[DIRT_INDEX][pos] - self.dirt_properties.clean_amount dirt_slice = self._slices.by_name(DIRT).slice
cleanup_was_sucessfull: bool if dirt_slice[agent.pos]:
if self._state[DIRT_INDEX][pos] == h.IS_FREE_CELL: new_dirt_amount = dirt_slice[agent.pos] - self.dirt_properties.clean_amount
cleanup_was_sucessfull = False dirt_slice[agent.pos] = max(new_dirt_amount, c.FREE_CELL.value)
return pos, cleanup_was_sucessfull return True
else: else:
cleanup_was_sucessfull = True return False
self._state[DIRT_INDEX][pos] = max(new_dirt_amount, h.IS_FREE_CELL)
return pos, cleanup_was_sucessfull
def step(self, actions): def post_step(self) -> dict:
_, reward, done, info = super(SimpleFactory, self).step(actions)
if not self._next_dirt_spawn: if not self._next_dirt_spawn:
self.spawn_dirt() self.spawn_dirt()
self._next_dirt_spawn = self.dirt_properties.spawn_frequency self._next_dirt_spawn = self.dirt_properties.spawn_frequency
else: else:
self._next_dirt_spawn -= 1 self._next_dirt_spawn -= 1
return {}
obs = self._get_observations() def do_additional_actions(self, agent: Agent, action: int) -> bool:
return obs, reward, done, info
def do_additional_actions(self, agent_i: int, action: int) -> ((int, int), bool):
if action != self._actions.is_moving_action(action): if action != self._actions.is_moving_action(action):
if self._is_clean_up_action(action): if self._is_clean_up_action(action):
agent_i_pos = self.agent_i_position(agent_i) valid = self.clean_up(agent)
_, valid = self.clean_up(agent_i_pos) return valid
return agent_i_pos, valid
else: else:
raise RuntimeError('This should not happen!!!') raise RuntimeError('This should not happen!!!')
else: else:
@ -122,19 +135,21 @@ class SimpleFactory(BaseFactory):
def reset(self) -> (np.ndarray, int, bool, dict): def reset(self) -> (np.ndarray, int, bool, dict):
_ = super().reset() # state, reward, done, info ... = _ = super().reset() # state, reward, done, info ... =
dirt_slice = np.zeros((1, *self._state.shape[1:]))
self._state = np.concatenate((self._state, dirt_slice)) # dirt is now the last slice
self.spawn_dirt() self.spawn_dirt()
self._next_dirt_spawn = self.dirt_properties.spawn_frequency self._next_dirt_spawn = self.dirt_properties.spawn_frequency
obs = self._get_observations() obs = self._get_observations()
return obs return obs
def calculate_reward(self, agent_states: List[AgentState]) -> (int, dict): def calculate_reward(self) -> (int, dict):
info_dict = dict() info_dict = dict()
current_dirt_amount = self._state[DIRT_INDEX].sum()
dirty_tiles = np.argwhere(self._state[DIRT_INDEX] != h.IS_FREE_CELL).shape[0] dirt_slice = self._slices.by_name(DIRT).slice
dirty_tiles = [dirt_slice[tile.pos] for tile in self._tiles if dirt_slice[tile.pos]]
current_dirt_amount = sum(dirty_tiles)
dirty_tile_count = len(dirty_tiles)
info_dict.update(dirt_amount=current_dirt_amount) info_dict.update(dirt_amount=current_dirt_amount)
info_dict.update(dirty_tile_count=dirty_tiles) info_dict.update(dirty_tile_count=dirty_tile_count)
try: try:
# penalty = current_dirt_amount # penalty = current_dirt_amount
@ -142,52 +157,47 @@ class SimpleFactory(BaseFactory):
except (ZeroDivisionError, RuntimeWarning): except (ZeroDivisionError, RuntimeWarning):
reward = 0 reward = 0
for agent_state in agent_states: for agent in self._agents:
agent_name = f'{h.AGENT.capitalize()} {agent_state.i}' if agent.temp_collisions:
cols = agent_state.collisions self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
list_of_collisions = [self._state_slices[entity] for entity in cols if self._is_clean_up_action(agent.temp_action):
if entity != self._state_slices.by_name('dirt')] if agent.temp_valid:
if list_of_collisions:
self.print(f't = {self._steps}\t{agent_name} has collisions with {list_of_collisions}')
if self._is_clean_up_action(agent_state.action):
if agent_state.action_valid:
reward += 1 reward += 1
self.print(f'{agent_name} did just clean up some dirt at {agent_state.pos}.') self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
info_dict.update(dirt_cleaned=1) info_dict.update(dirt_cleaned=1)
else: else:
reward -= 0.01 reward -= 0.01
self.print(f'{agent_name} just tried to clean up some dirt at {agent_state.pos}, but failed.') self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_action': 1}) info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_dirt_cleanup': 1}) info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
elif self._actions.is_moving_action(agent_state.action): elif self._actions.is_moving_action(agent.temp_action):
if agent_state.action_valid: if agent.temp_valid:
# info_dict.update(movement=1) # info_dict.update(movement=1)
reward -= 0.00 reward -= 0.00
else: else:
# self.print('collision') # self.print('collision')
reward -= 0.01 reward -= 0.05
self.print(f'{agent.name} just hit the wall at {agent.pos}.')
info_dict.update({f'{agent.name}_vs_LEVEL': 1})
elif self._actions.is_door_usage(agent_state.action): elif self._actions.is_door_usage(agent.temp_action):
if agent_state.action_valid: if agent.temp_valid:
reward += 0.1 self.print(f'{agent.name} did just use the door at {agent.pos}.')
self.print(f'{agent_name} did just use the door at {agent_state.pos}.')
info_dict.update(door_used=1) info_dict.update(door_used=1)
else: else:
self.print(f'{agent_name} just tried to use a door at {agent_state.pos}, but failed.') reward -= 0.01
info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_action': 1}) self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_door_open': 1}) info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_door_open': 1})
else: else:
info_dict.update(no_op=1) info_dict.update(no_op=1)
reward -= 0.00 reward -= 0.00
for entity in list_of_collisions: for other_agent in agent.temp_collisions:
entity = h.AGENT if h.AGENT in entity else entity info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1})
info_dict.update({f'{h.AGENT}_{agent_state.i}_vs_{entity}': 1})
self.print(f"reward is {reward}") self.print(f"reward is {reward}")
# Potential based rewards -> # Potential based rewards ->
@ -205,13 +215,13 @@ if __name__ == '__main__':
move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True) move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True)
dirt_props = DirtProperties() dirt_props = DirtProperties()
factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=10, factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=10,
combin_agent_slices_in_obs=True, level_name='rooms', combin_agent_slices_in_obs=False, level_name='rooms', parse_doors=True,
pomdp_radius=3) pomdp_radius=3)
n_actions = factory.action_space.n - 1 n_actions = factory.action_space.n - 1
_ = factory.observation_space _ = factory.observation_space
for epoch in range(10000): for epoch in range(100):
random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)] random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)]
env_state = factory.reset() env_state = factory.reset()
r = 0 r = 0

View File

@ -1,27 +1,46 @@
from collections import defaultdict from collections import defaultdict
from typing import Tuple from enum import Enum, auto
from typing import Tuple, Union
import numpy as np import numpy as np
from pathlib import Path from pathlib import Path
# Constants # Constants
class Constants(Enum):
WALL = '#' WALL = '#'
DOOR = 'D' DOOR = 'D'
DANGER_ZONE = 'x' DANGER_ZONE = 'x'
LEVELS_DIR = 'levels'
LEVEL = 'level' LEVEL = 'level'
AGENT = 'agent' AGENT = 'Agent'
IS_FREE_CELL = 0 FREE_CELL = 0
IS_OCCUPIED_CELL = 1 OCCUPIED_CELL = 1
DOORS = 'doors' DOORS = 'doors'
IS_CLOSED_DOOR = IS_OCCUPIED_CELL IS_CLOSED_DOOR = 1
IS_OPEN_DOOR = -1 IS_OPEN_DOOR = -1
LEVEL_IDX = 0 LEVEL_IDX = 0
ACTION = auto()
COLLISIONS = auto()
VALID = True
NOT_VALID = False
def __bool__(self):
return bool(self.value)
LEVELS_DIR = 'levels'
TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles'] TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles']
IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count'] IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount',
'dirty_tile_count', 'terminal_observation', 'episode']
MANHATTAN_MOVES = ['north', 'east', 'south', 'west']
DIAGONAL_MOVES = ['north_east', 'south_east', 'south_west', 'north_west']
NO_POS = (-9999, -9999)
ACTIONMAP = defaultdict(lambda: (0, 0), dict(north=(-1, 0), east=(0, 1), ACTIONMAP = defaultdict(lambda: (0, 0), dict(north=(-1, 0), east=(0, 1),
south=(1, 0), west=(0, -1), south=(1, 0), west=(0, -1),
@ -38,8 +57,7 @@ HORIZONTAL_DOOR_ZONE_2 = np.asarray([[0, 0, 0], [0, 0, 0], [1, 1, 1]])
VERTICAL_DOOR_ZONE_1 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]]) VERTICAL_DOOR_ZONE_1 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
VERTICAL_DOOR_ZONE_2 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]]) VERTICAL_DOOR_ZONE_2 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
NOT_VALID = False
VALID = True
# Utility functions # Utility functions
@ -51,10 +69,13 @@ def parse_level(path):
return level return level
def one_hot_level(level, wall_char=WALL): def one_hot_level(level, wall_char: Union[Constants, str] = Constants.WALL):
grid = np.array(level) grid = np.array(level)
binary_grid = np.zeros(grid.shape, dtype=np.int8) binary_grid = np.zeros(grid.shape, dtype=np.int8)
binary_grid[grid == wall_char] = 1 if wall_char in Constants:
binary_grid[grid == wall_char.value] = Constants.OCCUPIED_CELL.value
else:
binary_grid[grid == wall_char] = Constants.OCCUPIED_CELL.value
return binary_grid return binary_grid
@ -70,7 +91,7 @@ def check_position(slice_to_check_against: np.ndarray, position_to_check: Tuple[
# Check for collision with level walls # Check for collision with level walls
valid = valid and not slice_to_check_against[x_pos, y_pos] valid = valid and not slice_to_check_against[x_pos, y_pos]
return valid return Constants.VALID if valid else Constants.NOT_VALID
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,5 +1,6 @@
import pickle import pickle
from pathlib import Path from pathlib import Path
from typing import List, Dict
from stable_baselines3.common.callbacks import BaseCallback from stable_baselines3.common.callbacks import BaseCallback
@ -66,13 +67,15 @@ class MonitorCallback(BaseCallback):
print('Plotting done.') print('Plotting done.')
self.closed = True self.closed = True
def _on_step(self) -> bool: def _on_step(self, alt_infos: List[Dict] = None, alt_dones: List[bool] = None) -> bool:
for _, info in enumerate(self.locals.get('infos', [])): infos = alt_infos or self.locals.get('infos', [])
dones = alt_dones or self.locals.get('dones', None) or self.locals.get('done', [None])
for _, info in enumerate(infos):
self._monitor_dict[self.num_timesteps] = {key: val for key, val in info.items() self._monitor_dict[self.num_timesteps] = {key: val for key, val in info.items()
if key not in ['terminal_observation', 'episode']} if key not in ['terminal_observation', 'episode']
and not key.startswith('rec_')}
for env_idx, done in list(enumerate(self.locals.get('dones', []))) + \ for env_idx, done in enumerate(dones):
list(enumerate(self.locals.get('done', []))):
if done: if done:
env_monitor_df = pd.DataFrame.from_dict(self._monitor_dict, orient='index') env_monitor_df = pd.DataFrame.from_dict(self._monitor_dict, orient='index')
self._monitor_dict = dict() self._monitor_dict = dict()

View File

@ -0,0 +1,74 @@
import json
from pathlib import Path
from typing import Union
import pandas as pd
from stable_baselines3.common.callbacks import BaseCallback
from environments.factory.base.base_factory import REC_TAC
from environments.helpers import IGNORED_DF_COLUMNS
class RecorderCallback(BaseCallback):
def __init__(self, filepath: Union[str, Path], occupation_map: bool = False, trajectory_map: bool = False):
super(RecorderCallback, self).__init__()
self.trajectory_map = trajectory_map
self.occupation_map = occupation_map
self.filepath = Path(filepath)
self._recorder_dict = dict()
self._recorder_df = pd.DataFrame()
self.started = False
self.closed = False
def _on_step(self) -> bool:
for _, info in enumerate(self.locals.get('infos', [])):
self._recorder_dict[self.num_timesteps] = {key: val for key, val in info.items()
if not key.startswith(f'{REC_TAC}_')}
for env_idx, done in list(enumerate(self.locals.get('dones', []))) + \
list(enumerate(self.locals.get('done', []))):
if done:
env_monitor_df = pd.DataFrame.from_dict(self._recorder_dict, orient='index')
self._recorder_dict = dict()
columns = [col for col in env_monitor_df.columns if col not in IGNORED_DF_COLUMNS]
env_monitor_df = env_monitor_df.aggregate(
{col: 'mean' if col.endswith('ount') else 'sum' for col in columns}
)
env_monitor_df['episode'] = len(self._recorder_df)
self._recorder_df = self._recorder_df.append([env_monitor_df])
else:
pass
return True
def __enter__(self):
self._on_training_start()
def __exit__(self, exc_type, exc_val, exc_tb):
self._on_training_end()
def _on_training_start(self) -> None:
if self.started:
pass
else:
self.filepath.parent.mkdir(exist_ok=True, parents=True)
self.started = True
pass
def _on_training_end(self) -> None:
if self.closed:
pass
else:
# self.out_file.unlink(missing_ok=True)
with self.filepath.open('w') as f:
json_df = self._recorder_df.to_json(orient="table")
parsed = json.loads(json_df)
json.dump(parsed, f, indent=4)
if self.occupation_map:
print('Recorder files were dumped to disk, now plotting the occupation map...')
if self.trajectory_map:
print('Recorder files were dumped to disk, now plotting the occupation map...')
self.closed = True

View File

@ -1,68 +0,0 @@
from typing import List, Union
import gym
class Entities():
def __init__(self):
pass
# noinspection PyAttributeOutsideInit
class BaseFactory(gym.Env):
def __enter__(self):
return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_radius: Union[None, int] = 0,
movement_properties: MovementProperties = MovementProperties(),
combin_agent_slices_in_obs: bool = False, frames_to_stack=0,
omit_agent_slice_in_obs=False, **kwargs):
assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
(not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
'Both options are exclusive'
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
self.movement_properties = movement_properties
self.level_name = level_name
self.n_agents = n_agents
self.max_steps = max_steps
self.pomdp_radius = pomdp_radius
self.combin_agent_slices_in_obs = combin_agent_slices_in_obs
self.omit_agent_slice_in_obs = omit_agent_slice_in_obs
self.frames_to_stack = frames_to_stack
self.done_at_collision = False
self._state_slices = StateSlices()
level_filepath = Path(__file__).parent / h.LEVELS_DIR / f'{self.level_name}.txt'
parsed_level = h.parse_level(level_filepath)
self._level = h.one_hot_level(parsed_level)
parsed_doors = h.one_hot_level(parsed_level, h.DOOR)
if parsed_doors.any():
self._doors = parsed_doors
level_slices = ['level', 'doors']
can_use_doors = True
else:
level_slices = ['level']
can_use_doors = False
offset = len(level_slices)
self._state_slices.register_additional_items([*level_slices,
*[f'agent#{i}' for i in range(offset, n_agents + offset)]])
if 'additional_slices' in kwargs:
self._state_slices.register_additional_items(kwargs.get('additional_slices'))
self._zones = Zones(parsed_level)
self._actions = Actions(self.movement_properties, can_use_doors=can_use_doors)
self._actions.register_additional_items(self.additional_actions)
self.reset()
def step(self, actions: Union[int, List[int]]):
actions = actions if isinstance(actions, list) else [actions]
self.entities.step()

View File

@ -1,298 +1,7 @@
from typing import Union, List, NamedTuple, Tuple from typing import NamedTuple
import numpy as np
from environments import helpers as h
IS_CLOSED = 'CLOSED'
IS_OPEN = 'OPEN'
class MovementProperties(NamedTuple): class MovementProperties(NamedTuple):
allow_square_movement: bool = True allow_square_movement: bool = True
allow_diagonal_movement: bool = False allow_diagonal_movement: bool = False
allow_no_op: bool = False allow_no_op: bool = False
# Preperations for Entities (not used yet)
class Entity:
@property
def pos(self):
return self._pos
@property
def identifier(self):
return self._identifier
def __init__(self, identifier, pos):
self._pos = pos
self._identifier = identifier
class Door(Entity):
@property
def is_closed(self):
return self._state == IS_CLOSED
@property
def is_open(self):
return self._state == IS_OPEN
@property
def status(self):
return self._state
def __init__(self, *args, closed_on_init=True, **kwargs):
super(Door, self).__init__(*args, **kwargs)
self._state = IS_CLOSED if closed_on_init else IS_OPEN
def use(self):
self._state: str = IS_CLOSED if self._state == IS_OPEN else IS_OPEN
pass
class Agent(Entity):
@property
def direction_of_vision(self):
return self._direction_of_vision
def __init__(self, *args, **kwargs):
super(Agent, self).__init__(*args, **kwargs)
self._direction_of_vision = (None, None)
def move(self, new_pos: Tuple[int, int]):
x_old, y_old = self.pos
self._pos = new_pos
x_new, y_new = new_pos
self._direction_of_vision = (x_old-x_new, y_old-y_new)
return self.pos
class AgentState:
@property
def collisions(self):
return np.argwhere(self.collision_vector != 0).flatten()
@property
def direction_of_view(self):
last_x, last_y = self._last_pos
curr_x, curr_y = self.pos
return last_x-curr_x, last_y-curr_y
def __init__(self, i: int, action: int, pos=None):
self.i = i
self.action = action
self.collision_vector = None
self.action_valid = None
self.pos = pos
self._last_pos = (-1, -1)
def update(self, **kwargs): # is this hacky?? o.0
last_pos = self.pos
for key, value in kwargs.items():
if hasattr(self, key):
self.__setattr__(key, value)
else:
raise AttributeError(f'"{key}" cannot be updated, this attr is not a part of {self.__name__}')
if self.action_valid and last_pos != self.pos:
self._last_pos = last_pos
def reset(self):
self.__init__(self.i, self.action)
class DoorState:
def __init__(self, i: int, pos: Tuple[int, int], closed_on_init=True, auto_close_interval=10):
self.i = i
self.pos = pos
self._state = self._state = IS_CLOSED if closed_on_init else IS_OPEN
self.auto_close_interval = auto_close_interval
self.time_to_close = -1
@property
def is_closed(self):
return self._state == IS_CLOSED
@property
def is_open(self):
return self._state == IS_OPEN
@property
def status(self):
return self._state
def use(self):
if self._state == IS_OPEN:
self._state = IS_CLOSED
else:
self._state = IS_OPEN
self.time_to_close = self.auto_close_interval
class Register:
@property
def n(self):
return len(self)
def __init__(self):
self._register = dict()
def __len__(self):
return len(self._register)
def __add__(self, other: str):
assert isinstance(other, str), f'All item names have to be of type {str}'
self._register.update({len(self._register): other})
return self
def register_additional_items(self, others: List[str]):
for other in others:
self + other
return self
def keys(self):
return self._register.keys()
def values(self):
return self._register.values()
def items(self):
return self._register.items()
def __getitem__(self, item):
try:
return self._register[item]
except KeyError:
print('NO')
raise
def by_name(self, item):
return list(self._register.keys())[list(self._register.values()).index(item)]
def __repr__(self):
return f'{self.__class__.__name__}({self._register})'
class Agents(Register):
def __init__(self, n_agents):
super(Agents, self).__init__()
self.register_additional_items([f'agent#{i}' for i in range(n_agents)])
self._agents = [Agent(x, (-1, -1)) for x in self.keys()]
pass
def __getitem__(self, item):
return self._agents[item]
def get_name(self, item):
return self._register[item]
def by_name(self, item):
return self[super(Agents, self).by_name(item)]
def __add__(self, other):
super(Agents, self).__add__(other)
self._agents.append(Agent(len(self)+1, (-1, -1)))
class Actions(Register):
@property
def movement_actions(self):
return self._movement_actions
def __init__(self, movement_properties: MovementProperties, can_use_doors=False):
self.allow_no_op = movement_properties.allow_no_op
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
self.allow_square_movement = movement_properties.allow_square_movement
self.can_use_doors = can_use_doors
super(Actions, self).__init__()
if self.allow_square_movement:
self.register_additional_items(['north', 'east', 'south', 'west'])
if self.allow_diagonal_movement:
self.register_additional_items(['north_east', 'south_east', 'south_west', 'north_west'])
self._movement_actions = self._register.copy()
if self.can_use_doors:
self.register_additional_items(['use_door'])
if self.allow_no_op:
self.register_additional_items(['no-op'])
def is_moving_action(self, action: Union[str, int]):
if isinstance(action, str):
return action in self.movement_actions.values()
else:
return self[action] in self.movement_actions.values()
def is_no_op(self, action: Union[str, int]):
if isinstance(action, str):
action = self.by_name(action)
return self[action] == 'no-op'
def is_door_usage(self, action: Union[str, int]):
if isinstance(action, str):
action = self.by_name(action)
return self[action] == 'use_door'
class StateSlices(Register):
@property
def AGENTSTARTIDX(self):
if self._agent_start_idx:
return self._agent_start_idx
else:
self._agent_start_idx = min([idx for idx, x in self.items() if h.AGENT in x])
return self._agent_start_idx
def __init__(self):
super(StateSlices, self).__init__()
self._agent_start_idx = None
class Zones(Register):
@property
def danger_zone(self):
return self._zone_slices[self.by_name(h.DANGER_ZONE)]
@property
def accounting_zones(self):
return [self[idx] for idx, name in self.items() if name != h.DANGER_ZONE]
def __init__(self, parsed_level):
super(Zones, self).__init__()
slices = list()
self._accounting_zones = list()
self._danger_zones = list()
for symbol in np.unique(parsed_level):
if symbol == h.WALL:
continue
elif symbol == h.DANGER_ZONE:
self + symbol
slices.append(h.one_hot_level(parsed_level, symbol))
self._danger_zones.append(symbol)
else:
self + symbol
slices.append(h.one_hot_level(parsed_level, symbol))
self._accounting_zones.append(symbol)
self._zone_slices = np.stack(slices)
def __getitem__(self, item):
return self._zone_slices[item]
def get_name(self, item):
return self._register[item]
def by_name(self, item):
return self[super(Zones, self).by_name(item)]
def register_additional_items(self, other: Union[str, List[str]]):
raise AttributeError('You are not allowed to add additional Zones in runtime.')

19
main.py
View File

@ -9,11 +9,12 @@ import pandas as pd
from stable_baselines3.common.callbacks import CallbackList from stable_baselines3.common.callbacks import CallbackList
from environments.factory.base_factory import MovementProperties
from environments.factory.simple_factory import DirtProperties, SimpleFactory from environments.factory.simple_factory import DirtProperties, SimpleFactory
from environments.helpers import IGNORED_DF_COLUMNS from environments.helpers import IGNORED_DF_COLUMNS
from environments.logging.monitor import MonitorCallback from environments.logging.monitor import MonitorCallback
from environments.logging.plotting import prepare_plot from environments.logging.plotting import prepare_plot
from environments.logging.recorder import RecorderCallback
from environments.utility_classes import MovementProperties
warnings.filterwarnings('ignore', category=FutureWarning) warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning) warnings.filterwarnings('ignore', category=UserWarning)
@ -91,8 +92,8 @@ if __name__ == '__main__':
from algorithms.reg_dqn import RegDQN from algorithms.reg_dqn import RegDQN
# from sb3_contrib import QRDQN # from sb3_contrib import QRDQN
dirt_props = DirtProperties(clean_amount=3, gain_amount=0.2, max_global_amount=30, dirt_props = DirtProperties(clean_amount=3, gain_amount=1, max_global_amount=30,
max_local_amount=5, spawn_frequency=1, max_spawn_ratio=0.05) max_local_amount=5, spawn_frequency=3, max_spawn_ratio=0.05)
move_props = MovementProperties(allow_diagonal_movement=True, move_props = MovementProperties(allow_diagonal_movement=True,
allow_square_movement=True, allow_square_movement=True,
allow_no_op=False) allow_no_op=False)
@ -103,9 +104,10 @@ if __name__ == '__main__':
for modeL_type in [A2C, PPO, RegDQN, DQN]: # , QRDQN]: for modeL_type in [A2C, PPO, RegDQN, DQN]: # , QRDQN]:
for seed in range(3): for seed in range(3):
with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=False,
movement_properties=move_props, level_name='rooms', frames_to_stack=4, movement_properties=move_props, level_name='rooms', frames_to_stack=4,
omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True) as env: omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True, record_episodes=False
) as env:
if modeL_type.__name__ in ["PPO", "A2C"]: if modeL_type.__name__ in ["PPO", "A2C"]:
kwargs = dict(ent_coef=0.01) kwargs = dict(ent_coef=0.01)
@ -127,10 +129,13 @@ if __name__ == '__main__':
out_path /= identifier out_path /= identifier
callbacks = CallbackList( callbacks = CallbackList(
[MonitorCallback(filepath=out_path / f'monitor_{identifier}.pick', plotting=False)] [MonitorCallback(filepath=out_path / f'monitor_{identifier}.pick', plotting=False),
RecorderCallback(filepath=out_path / f'recorder_{identifier}.json', occupation_map=False,
trajectory_map=False
)]
) )
model.learn(total_timesteps=int(1e5), callback=callbacks) model.learn(total_timesteps=int(5e5), callback=callbacks)
save_path = out_path / f'model_{identifier}.zip' save_path = out_path / f'model_{identifier}.zip'
save_path.parent.mkdir(parents=True, exist_ok=True) save_path.parent.mkdir(parents=True, exist_ok=True)

View File

@ -14,7 +14,7 @@ warnings.filterwarnings('ignore', category=UserWarning)
if __name__ == '__main__': if __name__ == '__main__':
model_name = 'A2C_1623923982' model_name = 'PPO_1626075586'
run_id = 0 run_id = 0
out_path = Path(__file__).parent / 'debug_out' out_path = Path(__file__).parent / 'debug_out'
model_path = out_path / model_name model_path = out_path / model_name