mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-07-06 17:41:36 +02:00
Restructuring and Testing Done
This commit is contained in:
370
environments/factory/base/base_factory.py
Normal file
370
environments/factory/base/base_factory.py
Normal file
@ -0,0 +1,370 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Union, Iterable
|
||||||
|
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
from gym import spaces
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from gym.wrappers import FrameStack
|
||||||
|
|
||||||
|
from environments.helpers import Constants as c, Constants
|
||||||
|
from environments import helpers as h
|
||||||
|
from environments.factory.base.objects import Slice, Agent, Tile, Action, MoveableEntity
|
||||||
|
from environments.factory.base.registers import StateSlices, Actions, Entities, Agents, Doors, FloorTiles
|
||||||
|
from environments.utility_classes import MovementProperties
|
||||||
|
|
||||||
|
REC_TAC = 'rec'
|
||||||
|
|
||||||
|
|
||||||
|
# noinspection PyAttributeOutsideInit
|
||||||
|
class BaseFactory(gym.Env):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
return spaces.Discrete(self._actions.n)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
agent_slice = self.n_agents if self.omit_agent_slice_in_obs else 0
|
||||||
|
agent_slice = (self.n_agents - 1) if self.combin_agent_slices_in_obs else agent_slice
|
||||||
|
if self.pomdp_radius:
|
||||||
|
shape = (self._obs_cube.shape[0] - agent_slice, self.pomdp_radius * 2 + 1, self.pomdp_radius * 2 + 1)
|
||||||
|
space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
|
||||||
|
return space
|
||||||
|
else:
|
||||||
|
shape = [x-agent_slice if idx == 0 else x for idx, x in enumerate(self._obs_cube.shape)]
|
||||||
|
space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
|
||||||
|
return space
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pomdp_diameter(self):
|
||||||
|
return self.pomdp_radius * 2 + 1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def movement_actions(self):
|
||||||
|
return self._actions.movement_actions
|
||||||
|
|
||||||
|
@property
|
||||||
|
def additional_actions(self) -> Union[str, List[str]]:
|
||||||
|
"""
|
||||||
|
When heriting from this Base Class, you musst implement this methode!!!
|
||||||
|
|
||||||
|
:return: A list of Actions-object holding all additional actions.
|
||||||
|
:rtype: List[Action]
|
||||||
|
"""
|
||||||
|
raise NotImplementedError('Please register additional actions ')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def additional_entities(self) -> Union[Entities, List[Entities]]:
|
||||||
|
"""
|
||||||
|
When heriting from this Base Class, you musst implement this methode!!!
|
||||||
|
|
||||||
|
:return: A single Entites collection or a list of such.
|
||||||
|
:rtype: Union[Entities, List[Entities]]
|
||||||
|
"""
|
||||||
|
raise NotImplementedError('Please register additional entities.')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def additional_slices(self) -> Union[Slice, List[Slice]]:
|
||||||
|
"""
|
||||||
|
When heriting from this Base Class, you musst implement this methode!!!
|
||||||
|
|
||||||
|
:return: A list of Slice-objects.
|
||||||
|
:rtype: List[Slice]
|
||||||
|
"""
|
||||||
|
raise NotImplementedError('Please register additional slices.')
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_radius: Union[None, int] = 0,
|
||||||
|
movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
|
||||||
|
combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False,
|
||||||
|
omit_agent_slice_in_obs=False, done_at_collision=False, **kwargs):
|
||||||
|
assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
|
||||||
|
(not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
|
||||||
|
'Both options are exclusive'
|
||||||
|
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
|
||||||
|
|
||||||
|
# Attribute Assignment
|
||||||
|
self.movement_properties = movement_properties
|
||||||
|
self.level_name = level_name
|
||||||
|
self._level_shape = None
|
||||||
|
|
||||||
|
self.n_agents = n_agents
|
||||||
|
self.max_steps = max_steps
|
||||||
|
self.pomdp_radius = pomdp_radius
|
||||||
|
self.combin_agent_slices_in_obs = combin_agent_slices_in_obs
|
||||||
|
self.omit_agent_slice_in_obs = omit_agent_slice_in_obs
|
||||||
|
self.frames_to_stack = frames_to_stack
|
||||||
|
|
||||||
|
self.done_at_collision = done_at_collision
|
||||||
|
self.record_episodes = record_episodes
|
||||||
|
self.parse_doors = parse_doors
|
||||||
|
|
||||||
|
# Actions
|
||||||
|
self._actions = Actions(self.movement_properties, can_use_doors=self.parse_doors)
|
||||||
|
if additional_actions := self.additional_actions:
|
||||||
|
self._actions.register_additional_items(additional_actions)
|
||||||
|
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def _init_state_slices(self) -> StateSlices:
|
||||||
|
state_slices = StateSlices()
|
||||||
|
|
||||||
|
# Objects
|
||||||
|
# Level
|
||||||
|
level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt'
|
||||||
|
parsed_level = h.parse_level(level_filepath)
|
||||||
|
level = [Slice(c.LEVEL.name, h.one_hot_level(parsed_level))]
|
||||||
|
self._level_shape = level[0].shape
|
||||||
|
|
||||||
|
# Doors
|
||||||
|
parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
|
||||||
|
doors = [Slice(c.DOORS.value, parsed_doors)] if parsed_doors.any() and self.parse_doors else []
|
||||||
|
|
||||||
|
# Agents
|
||||||
|
agents = []
|
||||||
|
for i in range(self.n_agents):
|
||||||
|
agents.append(Slice(f'{c.AGENT.name}#{i}', np.zeros_like(level[0].slice)))
|
||||||
|
state_slices.register_additional_items(level+doors+agents)
|
||||||
|
|
||||||
|
# Additional Slices from SubDomains
|
||||||
|
if additional_slices := self.additional_slices:
|
||||||
|
state_slices.register_additional_items(additional_slices)
|
||||||
|
return state_slices
|
||||||
|
|
||||||
|
def _init_obs_cube(self) -> np.ndarray:
|
||||||
|
x, y = self._slices.by_enum(c.LEVEL).shape
|
||||||
|
state = np.zeros((len(self._slices), x, y))
|
||||||
|
state[0] = self._slices.by_enum(c.LEVEL).slice
|
||||||
|
if r := self.pomdp_radius:
|
||||||
|
self._padded_obs_cube = np.full((len(self._slices), x + r*2, y + r*2), c.FREE_CELL.value)
|
||||||
|
self._padded_obs_cube[0] = c.OCCUPIED_CELL.value
|
||||||
|
self._padded_obs_cube[:, r:r+x, r:r+y] = state
|
||||||
|
return state
|
||||||
|
|
||||||
|
def _init_entities(self):
|
||||||
|
# Tile Init
|
||||||
|
self._tiles = FloorTiles.from_argwhere_coordinates(self._slices.by_enum(c.LEVEL).free_tiles)
|
||||||
|
|
||||||
|
# Door Init
|
||||||
|
if self.parse_doors:
|
||||||
|
tiles = [self._tiles.by_pos(x) for x in self._slices.by_enum(c.DOORS).occupied_tiles]
|
||||||
|
self._doors = Doors.from_tiles(tiles, context=self._tiles)
|
||||||
|
|
||||||
|
# Agent Init on random positions
|
||||||
|
self._agents = Agents.from_tiles(np.random.choice(self._tiles, self.n_agents))
|
||||||
|
entities = Entities()
|
||||||
|
entities.register_additional_items([self._agents])
|
||||||
|
|
||||||
|
if self.parse_doors:
|
||||||
|
entities.register_additional_items([self._doors])
|
||||||
|
|
||||||
|
if additional_entities := self.additional_entities:
|
||||||
|
entities.register_additional_items([additional_entities])
|
||||||
|
|
||||||
|
return entities
|
||||||
|
|
||||||
|
def reset(self) -> (np.ndarray, int, bool, dict):
|
||||||
|
self._slices = self._init_state_slices()
|
||||||
|
self._obs_cube = self._init_obs_cube()
|
||||||
|
self._entitites = self._init_entities()
|
||||||
|
self._flush_state()
|
||||||
|
self._steps = 0
|
||||||
|
|
||||||
|
info = self._summarize_state() if self.record_episodes else {}
|
||||||
|
return None, None, None, info
|
||||||
|
|
||||||
|
def pre_step(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def post_step(self) -> dict:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def step(self, actions):
|
||||||
|
actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions
|
||||||
|
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
|
||||||
|
self._steps += 1
|
||||||
|
done = False
|
||||||
|
|
||||||
|
# Pre step Hook for later use
|
||||||
|
self.pre_step()
|
||||||
|
|
||||||
|
# Move this in a seperate function?
|
||||||
|
for action, agent in zip(actions, self._agents):
|
||||||
|
agent.clear_temp_sate()
|
||||||
|
action_name = self._actions[action]
|
||||||
|
if self._actions.is_moving_action(action):
|
||||||
|
valid = self._move_or_colide(agent, action_name)
|
||||||
|
elif self._actions.is_no_op(action):
|
||||||
|
valid = c.VALID.value
|
||||||
|
elif self._actions.is_door_usage(action):
|
||||||
|
# Check if agent raly stands on a door:
|
||||||
|
if door := self._doors.by_pos(agent.pos):
|
||||||
|
door.use()
|
||||||
|
valid = c.VALID.value
|
||||||
|
# When he doesn't...
|
||||||
|
else:
|
||||||
|
valid = c.NOT_VALID.value
|
||||||
|
else:
|
||||||
|
valid = self.do_additional_actions(agent, action)
|
||||||
|
agent.temp_action = action
|
||||||
|
agent.temp_valid = valid
|
||||||
|
|
||||||
|
self._flush_state()
|
||||||
|
|
||||||
|
tiles_with_collisions = self.get_all_tiles_with_collisions()
|
||||||
|
for tile in tiles_with_collisions:
|
||||||
|
guests = tile.guests_that_can_collide
|
||||||
|
for i, guest in enumerate(guests):
|
||||||
|
this_collisions = guests[:]
|
||||||
|
del this_collisions[i]
|
||||||
|
guest.temp_collisions = this_collisions
|
||||||
|
|
||||||
|
if self.done_at_collision and tiles_with_collisions:
|
||||||
|
done = True
|
||||||
|
|
||||||
|
# Step the door close intervall
|
||||||
|
if self.parse_doors:
|
||||||
|
self._doors.tick_doors()
|
||||||
|
|
||||||
|
# Finalize
|
||||||
|
reward, info = self.calculate_reward()
|
||||||
|
if self._steps >= self.max_steps:
|
||||||
|
done = True
|
||||||
|
info.update(step_reward=reward, step=self._steps)
|
||||||
|
if self.record_episodes:
|
||||||
|
info.update(self._summarize_state())
|
||||||
|
|
||||||
|
# Post step Hook for later use
|
||||||
|
info.update(self.post_step())
|
||||||
|
|
||||||
|
obs = self._get_observations()
|
||||||
|
|
||||||
|
return obs, reward, done, info
|
||||||
|
|
||||||
|
def _flush_state(self):
|
||||||
|
self._obs_cube[np.arange(len(self._slices)) != self._slices.get_idx(c.LEVEL)] = c.FREE_CELL.value
|
||||||
|
if self.parse_doors:
|
||||||
|
for door in self._doors:
|
||||||
|
if door.is_open:
|
||||||
|
self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.IS_OPEN_DOOR.value
|
||||||
|
else:
|
||||||
|
self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.IS_CLOSED_DOOR.value
|
||||||
|
for agent in self._agents:
|
||||||
|
self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.pos] = c.OCCUPIED_CELL.value
|
||||||
|
if agent.last_pos != h.NO_POS:
|
||||||
|
self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.last_pos] = c.FREE_CELL.value
|
||||||
|
|
||||||
|
def _get_observations(self) -> np.ndarray:
|
||||||
|
if self.n_agents == 1:
|
||||||
|
obs = self._build_per_agent_obs(self._agents[0])
|
||||||
|
elif self.n_agents >= 2:
|
||||||
|
obs = np.stack([self._build_per_agent_obs(agent) for agent in self._agents])
|
||||||
|
else:
|
||||||
|
raise ValueError('n_agents cannot be smaller than 1!!')
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def _build_per_agent_obs(self, agent: Agent) -> np.ndarray:
|
||||||
|
first_agent_slice = self._slices.AGENTSTARTIDX
|
||||||
|
if r := self.pomdp_radius:
|
||||||
|
x, y = self._level_shape
|
||||||
|
self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube
|
||||||
|
global_x, global_y = agent.pos
|
||||||
|
global_x += r
|
||||||
|
global_y += r
|
||||||
|
x0, x1 = max(0, global_x - self.pomdp_radius), global_x + self.pomdp_radius + 1
|
||||||
|
y0, y1 = max(0, global_y - self.pomdp_radius), global_y + self.pomdp_radius + 1
|
||||||
|
obs = self._padded_obs_cube[:, x0:x1, y0:y1]
|
||||||
|
else:
|
||||||
|
obs = self._obs_cube
|
||||||
|
if self.omit_agent_slice_in_obs:
|
||||||
|
obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val]]
|
||||||
|
return obs_new
|
||||||
|
else:
|
||||||
|
if self.combin_agent_slices_in_obs:
|
||||||
|
agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name]],
|
||||||
|
axis=0, keepdims=True)
|
||||||
|
obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
|
||||||
|
return obs
|
||||||
|
else:
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def do_additional_actions(self, agent_i: int, action: int) -> bool:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def get_all_tiles_with_collisions(self) -> List[Tile]:
|
||||||
|
tiles_with_collisions = list()
|
||||||
|
for tile in self._tiles:
|
||||||
|
if tile.is_occupied():
|
||||||
|
guests = [guest for guest in tile.guests if guest.can_collide]
|
||||||
|
if len(guests) >= 2:
|
||||||
|
tiles_with_collisions.append(tile)
|
||||||
|
return tiles_with_collisions
|
||||||
|
|
||||||
|
def _move_or_colide(self, agent: Agent, action: Action) -> Constants:
|
||||||
|
new_tile, valid = self._check_agent_move(agent, action)
|
||||||
|
if valid:
|
||||||
|
# Does not collide width level boundaries
|
||||||
|
return agent.move(new_tile)
|
||||||
|
else:
|
||||||
|
# Agent seems to be trying to collide in this step
|
||||||
|
return c.NOT_VALID
|
||||||
|
|
||||||
|
def _check_agent_move(self, agent, action: Action) -> (Tile, bool):
|
||||||
|
# Actions
|
||||||
|
x_diff, y_diff = h.ACTIONMAP[action.name]
|
||||||
|
x_new = agent.x + x_diff
|
||||||
|
y_new = agent.y + y_diff
|
||||||
|
|
||||||
|
new_tile = self._tiles.by_pos((x_new, y_new))
|
||||||
|
if new_tile:
|
||||||
|
valid = c.VALID
|
||||||
|
else:
|
||||||
|
tile = agent.tile
|
||||||
|
valid = c.VALID
|
||||||
|
return tile, valid
|
||||||
|
|
||||||
|
if self.parse_doors and agent.last_pos != h.NO_POS:
|
||||||
|
if door := self._doors.by_pos(agent.pos):
|
||||||
|
if door.is_open:
|
||||||
|
pass
|
||||||
|
else: # door.is_closed:
|
||||||
|
if door.is_linked(agent.last_pos, new_tile.pos):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
return agent.tile, c.NOT_VALID
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return new_tile, valid
|
||||||
|
|
||||||
|
def calculate_reward(self) -> (int, dict):
|
||||||
|
# Returns: Reward, Info
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def render(self, mode='human'):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def save_params(self, filepath: Path):
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
# d = {key: val._asdict() if hasattr(val, '_asdict') else val for key, val in self.__dict__.items()
|
||||||
|
d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')}
|
||||||
|
filepath.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with filepath.open('w') as f:
|
||||||
|
yaml.dump(d, f)
|
||||||
|
# pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL)
|
||||||
|
|
||||||
|
def _summarize_state(self):
|
||||||
|
summary = {f'{REC_TAC}_step': self._steps}
|
||||||
|
for entity in self._entitites:
|
||||||
|
if hasattr(entity, 'summarize_state'):
|
||||||
|
summary.update({f'{REC_TAC}_{entity.name}': entity.summarize_state()})
|
||||||
|
return summary
|
266
environments/factory/base/objects.py
Normal file
266
environments/factory/base/objects.py
Normal file
@ -0,0 +1,266 @@
|
|||||||
|
import itertools
|
||||||
|
|
||||||
|
import networkx as nx
|
||||||
|
import numpy as np
|
||||||
|
from environments import helpers as h
|
||||||
|
from environments.helpers import Constants as c
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
|
||||||
|
def sub(p, q):
|
||||||
|
return p - q
|
||||||
|
|
||||||
|
|
||||||
|
class Object:
|
||||||
|
|
||||||
|
def __bool__(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def i(self):
|
||||||
|
return self._identifier
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
return self._identifier
|
||||||
|
|
||||||
|
def __init__(self, identifier, **kwargs):
|
||||||
|
self._identifier = identifier
|
||||||
|
if kwargs:
|
||||||
|
print(f'Following kwargs were passed, but ignored: {kwargs}')
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'{self.__class__.__name__}({self._identifier})'
|
||||||
|
|
||||||
|
|
||||||
|
class Action(Object):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
return self.i
|
||||||
|
|
||||||
|
def __init__(self, *args):
|
||||||
|
super(Action, self).__init__(*args)
|
||||||
|
|
||||||
|
|
||||||
|
class Slice(Object):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def shape(self):
|
||||||
|
return self.slice.shape
|
||||||
|
|
||||||
|
@property
|
||||||
|
def occupied_tiles(self):
|
||||||
|
return np.argwhere(self.slice == c.OCCUPIED_CELL.value)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def free_tiles(self):
|
||||||
|
return np.argwhere(self.slice == c.FREE_CELL.value)
|
||||||
|
|
||||||
|
def __init__(self, identifier, arrayslice):
|
||||||
|
super(Slice, self).__init__(identifier)
|
||||||
|
self.slice = arrayslice
|
||||||
|
|
||||||
|
|
||||||
|
class Wall(Object):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Tile(Object):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def guests_that_can_collide(self):
|
||||||
|
return [x for x in self.guests if x.can_collide]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def guests(self):
|
||||||
|
return self._guests.values()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def x(self):
|
||||||
|
return self.pos[0]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def y(self):
|
||||||
|
return self.pos[1]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pos(self):
|
||||||
|
return self._pos
|
||||||
|
|
||||||
|
def __init__(self, i, pos):
|
||||||
|
super(Tile, self).__init__(i)
|
||||||
|
self._guests = dict()
|
||||||
|
self._pos = tuple(pos)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._guests)
|
||||||
|
|
||||||
|
def is_empty(self):
|
||||||
|
return not len(self._guests)
|
||||||
|
|
||||||
|
def is_occupied(self):
|
||||||
|
return len(self._guests)
|
||||||
|
|
||||||
|
def enter(self, guest):
|
||||||
|
if guest.name not in self._guests:
|
||||||
|
self._guests.update({guest.name: guest})
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def leave(self, guest):
|
||||||
|
try:
|
||||||
|
del self._guests[guest.name]
|
||||||
|
except (ValueError, KeyError):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class Entity(Object):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def can_collide(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def encoding(self):
|
||||||
|
return 1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def x(self):
|
||||||
|
return self.pos[0]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def y(self):
|
||||||
|
return self.pos[1]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pos(self):
|
||||||
|
return self._tile.pos
|
||||||
|
|
||||||
|
@property
|
||||||
|
def tile(self):
|
||||||
|
return self._tile
|
||||||
|
|
||||||
|
def __init__(self, identifier, tile: Tile, **kwargs):
|
||||||
|
super(Entity, self).__init__(identifier, **kwargs)
|
||||||
|
self._tile = tile
|
||||||
|
|
||||||
|
def summarize_state(self):
|
||||||
|
return self.__dict__.copy()
|
||||||
|
|
||||||
|
|
||||||
|
class MoveableEntity(Entity):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def last_tile(self):
|
||||||
|
return self._last_tile
|
||||||
|
|
||||||
|
@property
|
||||||
|
def last_pos(self):
|
||||||
|
if self._last_tile:
|
||||||
|
return self._last_tile.pos
|
||||||
|
else:
|
||||||
|
return h.NO_POS
|
||||||
|
|
||||||
|
@property
|
||||||
|
def direction_of_view(self):
|
||||||
|
last_x, last_y = self.last_pos
|
||||||
|
curr_x, curr_y = self.pos
|
||||||
|
return last_x-curr_x, last_y-curr_y
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(MoveableEntity, self).__init__(*args, **kwargs)
|
||||||
|
self._last_tile = None
|
||||||
|
|
||||||
|
def move(self, next_tile):
|
||||||
|
curr_tile = self.tile
|
||||||
|
if curr_tile != next_tile:
|
||||||
|
next_tile.enter(self)
|
||||||
|
curr_tile.leave(self)
|
||||||
|
self._tile = next_tile
|
||||||
|
self._last_tile = curr_tile
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class Door(Entity):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def can_collide(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def encoding(self):
|
||||||
|
return 1 if self.is_closed else -1
|
||||||
|
|
||||||
|
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=500):
|
||||||
|
super(Door, self).__init__(*args)
|
||||||
|
self._state = c.IS_CLOSED_DOOR
|
||||||
|
self.auto_close_interval = auto_close_interval
|
||||||
|
self.time_to_close = -1
|
||||||
|
neighbor_pos = list(itertools.product([-1, 1, 0], repeat=2))[:-1]
|
||||||
|
neighbor_tiles = [context.by_pos(tuple([sum(x) for x in zip(self.pos, diff)])) for diff in neighbor_pos]
|
||||||
|
neighbor_pos = [x.pos for x in neighbor_tiles if x]
|
||||||
|
possible_connections = itertools.combinations(neighbor_pos, 2)
|
||||||
|
self.connectivity = nx.Graph()
|
||||||
|
for a, b in possible_connections:
|
||||||
|
if not max(abs(np.subtract(a, b))) > 1:
|
||||||
|
self.connectivity.add_edge(a, b)
|
||||||
|
if not closed_on_init:
|
||||||
|
self._open()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_closed(self):
|
||||||
|
return self._state == c.IS_CLOSED_DOOR
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_open(self):
|
||||||
|
return self._state == c.IS_OPEN_DOOR
|
||||||
|
|
||||||
|
@property
|
||||||
|
def status(self):
|
||||||
|
return self._state
|
||||||
|
|
||||||
|
def use(self):
|
||||||
|
if self._state == c.IS_OPEN_DOOR:
|
||||||
|
self._close()
|
||||||
|
else:
|
||||||
|
self._open()
|
||||||
|
|
||||||
|
def tick(self):
|
||||||
|
if self.is_open and len(self.tile) == 1 and self.time_to_close:
|
||||||
|
self.time_to_close -= 1
|
||||||
|
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
|
||||||
|
self.use()
|
||||||
|
|
||||||
|
def _open(self):
|
||||||
|
self.connectivity.add_edges_from([(self.pos, x) for x in self.connectivity.nodes])
|
||||||
|
self._state = c.IS_OPEN_DOOR
|
||||||
|
self.time_to_close = self.auto_close_interval
|
||||||
|
|
||||||
|
def _close(self):
|
||||||
|
self.connectivity.remove_node(self.pos)
|
||||||
|
self._state = c.IS_CLOSED_DOOR
|
||||||
|
|
||||||
|
def is_linked(self, old_pos, new_pos):
|
||||||
|
try:
|
||||||
|
_ = nx.shortest_path(self.connectivity, old_pos, new_pos)
|
||||||
|
return True
|
||||||
|
except nx.exception.NetworkXNoPath:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class Agent(MoveableEntity):
|
||||||
|
|
||||||
|
def __init__(self, *args):
|
||||||
|
super(Agent, self).__init__(*args)
|
||||||
|
self.clear_temp_sate()
|
||||||
|
|
||||||
|
# noinspection PyAttributeOutsideInit
|
||||||
|
def clear_temp_sate(self):
|
||||||
|
self.temp_collisions = []
|
||||||
|
self.temp_valid = None
|
||||||
|
self.temp_action = -1
|
292
environments/factory/base/registers.py
Normal file
292
environments/factory/base/registers.py
Normal file
@ -0,0 +1,292 @@
|
|||||||
|
import itertools
|
||||||
|
import random
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List, Union
|
||||||
|
|
||||||
|
import networkx as nx
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from environments.factory.base.objects import Entity, Tile, Agent, Door, Slice, Action
|
||||||
|
from environments.utility_classes import MovementProperties
|
||||||
|
from environments import helpers as h
|
||||||
|
from environments.helpers import Constants as c
|
||||||
|
|
||||||
|
|
||||||
|
class Register:
|
||||||
|
_accepted_objects = Entity
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_argwhere_coordinates(cls, positions: (int, int), tiles):
|
||||||
|
entities = [cls._accepted_objects(i, tiles.by_pos(position)) for i, position in enumerate(positions)]
|
||||||
|
registered_obj = cls()
|
||||||
|
registered_obj.register_additional_items(entities)
|
||||||
|
return registered_obj
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
return self.__class__.__name__
|
||||||
|
|
||||||
|
@property
|
||||||
|
def n(self):
|
||||||
|
return len(self)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._register = dict()
|
||||||
|
self._names = dict()
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._register)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.values())
|
||||||
|
|
||||||
|
def __add__(self, other: _accepted_objects):
|
||||||
|
assert isinstance(other, self._accepted_objects), f'All item names have to be of type ' \
|
||||||
|
f'{self._accepted_objects}, ' \
|
||||||
|
f'but were {other.__class__}.,'
|
||||||
|
self._names.update({other.name: len(self._register)})
|
||||||
|
self._register.update({len(self._register): other})
|
||||||
|
return self
|
||||||
|
|
||||||
|
def register_additional_items(self, others: List[_accepted_objects]):
|
||||||
|
for other in others:
|
||||||
|
self + other
|
||||||
|
return self
|
||||||
|
|
||||||
|
def keys(self):
|
||||||
|
return self._register.keys()
|
||||||
|
|
||||||
|
def values(self):
|
||||||
|
return self._register.values()
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
return self._register.items()
|
||||||
|
|
||||||
|
def __getitem__(self, item):
|
||||||
|
try:
|
||||||
|
return self._register[item]
|
||||||
|
except KeyError:
|
||||||
|
print('NO')
|
||||||
|
raise
|
||||||
|
|
||||||
|
def by_name(self, item):
|
||||||
|
return self[self._names[item]]
|
||||||
|
|
||||||
|
def by_enum(self, enum: Enum):
|
||||||
|
return self[self._names[enum.name]]
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f'{self.__class__.__name__}({self._register})'
|
||||||
|
|
||||||
|
def get_name(self, item):
|
||||||
|
return self._register[item].name
|
||||||
|
|
||||||
|
def get_idx_by_name(self, item):
|
||||||
|
return self._names[item]
|
||||||
|
|
||||||
|
def get_idx(self, enum: Enum):
|
||||||
|
return self._names[enum.name]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_tiles(cls, tiles, **kwargs):
|
||||||
|
entities = [cls._accepted_objects(f'{cls._accepted_objects.__name__.upper()}#{i}', tile, **kwargs)
|
||||||
|
for i, tile in enumerate(tiles)]
|
||||||
|
registered_obj = cls()
|
||||||
|
registered_obj.register_additional_items(entities)
|
||||||
|
return registered_obj
|
||||||
|
|
||||||
|
|
||||||
|
class EntityRegister(Register):
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_argwhere_coordinates(cls, argwhere_coordinates):
|
||||||
|
tiles = cls()
|
||||||
|
tiles.register_additional_items([cls._accepted_objects(i, pos) for i, pos in enumerate(argwhere_coordinates)])
|
||||||
|
return tiles
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(EntityRegister, self).__init__()
|
||||||
|
self._tiles = dict()
|
||||||
|
|
||||||
|
def __add__(self, other):
|
||||||
|
super(EntityRegister, self).__add__(other)
|
||||||
|
self._tiles[other.pos] = other
|
||||||
|
|
||||||
|
def by_pos(self, pos):
|
||||||
|
if isinstance(pos, np.ndarray):
|
||||||
|
pos = tuple(pos)
|
||||||
|
try:
|
||||||
|
return self._tiles[pos]
|
||||||
|
except KeyError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class Entities(Register):
|
||||||
|
|
||||||
|
_accepted_objects = Register
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(Entities, self).__init__()
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter([x for sublist in self.values() for x in sublist])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_argwhere_coordinates(cls, positions):
|
||||||
|
raise AttributeError()
|
||||||
|
|
||||||
|
|
||||||
|
class FloorTiles(EntityRegister):
|
||||||
|
_accepted_objects = Tile
|
||||||
|
|
||||||
|
@property
|
||||||
|
def occupied_tiles(self):
|
||||||
|
tiles = [tile for tile in self if tile.is_occupied()]
|
||||||
|
random.shuffle(tiles)
|
||||||
|
return tiles
|
||||||
|
|
||||||
|
@property
|
||||||
|
def empty_tiles(self):
|
||||||
|
tiles = [tile for tile in self if tile.is_empty()]
|
||||||
|
random.shuffle(tiles)
|
||||||
|
return tiles
|
||||||
|
|
||||||
|
|
||||||
|
class Agents(Register):
|
||||||
|
|
||||||
|
_accepted_objects = Agent
|
||||||
|
|
||||||
|
@property
|
||||||
|
def positions(self):
|
||||||
|
return [agent.pos for agent in self]
|
||||||
|
|
||||||
|
|
||||||
|
class Doors(EntityRegister):
|
||||||
|
_accepted_objects = Door
|
||||||
|
|
||||||
|
def tick_doors(self):
|
||||||
|
for door in self:
|
||||||
|
door.tick()
|
||||||
|
|
||||||
|
|
||||||
|
class Actions(Register):
|
||||||
|
|
||||||
|
_accepted_objects = Action
|
||||||
|
|
||||||
|
@property
|
||||||
|
def movement_actions(self):
|
||||||
|
return self._movement_actions
|
||||||
|
|
||||||
|
def __init__(self, movement_properties: MovementProperties, can_use_doors=False):
|
||||||
|
self.allow_no_op = movement_properties.allow_no_op
|
||||||
|
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
|
||||||
|
self.allow_square_movement = movement_properties.allow_square_movement
|
||||||
|
self.can_use_doors = can_use_doors
|
||||||
|
super(Actions, self).__init__()
|
||||||
|
|
||||||
|
if self.allow_square_movement:
|
||||||
|
self.register_additional_items([self._accepted_objects(direction) for direction in h.MANHATTAN_MOVES])
|
||||||
|
if self.allow_diagonal_movement:
|
||||||
|
self.register_additional_items([self._accepted_objects(direction) for direction in h.DIAGONAL_MOVES])
|
||||||
|
self._movement_actions = self._register.copy()
|
||||||
|
if self.can_use_doors:
|
||||||
|
self.register_additional_items([self._accepted_objects('use_door')])
|
||||||
|
if self.allow_no_op:
|
||||||
|
self.register_additional_items([self._accepted_objects('no-op')])
|
||||||
|
|
||||||
|
def is_moving_action(self, action: Union[int]):
|
||||||
|
#if isinstance(action, Action):
|
||||||
|
# return (action.name in h.MANHATTAN_MOVES and self.allow_square_movement) or \
|
||||||
|
# (action.name in h.DIAGONAL_MOVES and self.allow_diagonal_movement)
|
||||||
|
#else:
|
||||||
|
return action in self.movement_actions.keys()
|
||||||
|
|
||||||
|
def is_no_op(self, action: Union[str, int]):
|
||||||
|
if isinstance(action, str):
|
||||||
|
action = self.by_name(action)
|
||||||
|
return self[action].name == 'no-op'
|
||||||
|
|
||||||
|
def is_door_usage(self, action: Union[str, int]):
|
||||||
|
if isinstance(action, str):
|
||||||
|
action = self.by_name(action)
|
||||||
|
return self[action].name == 'use_door'
|
||||||
|
|
||||||
|
|
||||||
|
class StateSlices(Register):
|
||||||
|
|
||||||
|
_accepted_objects = Slice
|
||||||
|
|
||||||
|
@property
|
||||||
|
def AGENTSTARTIDX(self):
|
||||||
|
if self._agent_start_idx:
|
||||||
|
return self._agent_start_idx
|
||||||
|
else:
|
||||||
|
self._agent_start_idx = min([idx for idx, x in self.items() if c.AGENT.name in x.name])
|
||||||
|
return self._agent_start_idx
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(StateSlices, self).__init__()
|
||||||
|
self._agent_start_idx = None
|
||||||
|
|
||||||
|
def _gather_occupation(self, excluded_slices):
|
||||||
|
exclusion = excluded_slices or []
|
||||||
|
assert isinstance(exclusion, (int, list))
|
||||||
|
exclusion = exclusion if isinstance(exclusion, list) else [exclusion]
|
||||||
|
|
||||||
|
result = np.sum([x for i, x in self.items() if i not in exclusion], axis=0)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def free_cells(self, excluded_slices: Union[None, List[int], int] = None) -> np.array:
|
||||||
|
occupation = self._gather_occupation(excluded_slices)
|
||||||
|
free_cells = np.argwhere(occupation == c.IS_FREE_CELL)
|
||||||
|
np.random.shuffle(free_cells)
|
||||||
|
return free_cells
|
||||||
|
|
||||||
|
def occupied_cells(self, excluded_slices: Union[None, List[int], int] = None) -> np.array:
|
||||||
|
occupation = self._gather_occupation(excluded_slices)
|
||||||
|
occupied_cells = np.argwhere(occupation == c.IS_OCCUPIED_CELL.value)
|
||||||
|
np.random.shuffle(occupied_cells)
|
||||||
|
return occupied_cells
|
||||||
|
|
||||||
|
|
||||||
|
class Zones(Register):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def danger_zone(self):
|
||||||
|
return self._zone_slices[self.by_enum(c.DANGER_ZONE)]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def accounting_zones(self):
|
||||||
|
return [self[idx] for idx, name in self.items() if name != c.DANGER_ZONE.value]
|
||||||
|
|
||||||
|
def __init__(self, parsed_level):
|
||||||
|
raise NotImplementedError('This needs a Rework')
|
||||||
|
super(Zones, self).__init__()
|
||||||
|
slices = list()
|
||||||
|
self._accounting_zones = list()
|
||||||
|
self._danger_zones = list()
|
||||||
|
for symbol in np.unique(parsed_level):
|
||||||
|
if symbol == h.WALL:
|
||||||
|
continue
|
||||||
|
elif symbol == h.DANGER_ZONE:
|
||||||
|
self + symbol
|
||||||
|
slices.append(h.one_hot_level(parsed_level, symbol))
|
||||||
|
self._danger_zones.append(symbol)
|
||||||
|
else:
|
||||||
|
self + symbol
|
||||||
|
slices.append(h.one_hot_level(parsed_level, symbol))
|
||||||
|
self._accounting_zones.append(symbol)
|
||||||
|
|
||||||
|
self._zone_slices = np.stack(slices)
|
||||||
|
|
||||||
|
def __getitem__(self, item):
|
||||||
|
return self._zone_slices[item]
|
||||||
|
|
||||||
|
def get_name(self, item):
|
||||||
|
return self._register[item]
|
||||||
|
|
||||||
|
def by_name(self, item):
|
||||||
|
return self[super(Zones, self).by_name(item)]
|
||||||
|
|
||||||
|
def register_additional_items(self, other: Union[str, List[str]]):
|
||||||
|
raise AttributeError('You are not allowed to add additional Zones in runtime.')
|
@ -1,364 +0,0 @@
|
|||||||
from pathlib import Path
|
|
||||||
from typing import List, Union, Iterable
|
|
||||||
|
|
||||||
import gym
|
|
||||||
import numpy as np
|
|
||||||
from gym import spaces
|
|
||||||
|
|
||||||
import yaml
|
|
||||||
from gym.wrappers import FrameStack
|
|
||||||
|
|
||||||
from environments import helpers as h
|
|
||||||
from environments.utility_classes import Actions, StateSlices, AgentState, MovementProperties, Zones, DoorState
|
|
||||||
|
|
||||||
|
|
||||||
# noinspection PyAttributeOutsideInit
|
|
||||||
class BaseFactory(gym.Env):
|
|
||||||
|
|
||||||
@property
|
|
||||||
def action_space(self):
|
|
||||||
return spaces.Discrete(self._actions.n)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def observation_space(self):
|
|
||||||
agent_slice = self.n_agents if self.omit_agent_slice_in_obs else 0
|
|
||||||
agent_slice = (self.n_agents - 1) if self.combin_agent_slices_in_obs else agent_slice
|
|
||||||
if self.pomdp_radius:
|
|
||||||
shape = (self._state.shape[0] - agent_slice, self.pomdp_radius * 2 + 1, self.pomdp_radius * 2 + 1)
|
|
||||||
space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
|
|
||||||
return space
|
|
||||||
else:
|
|
||||||
shape = [x-agent_slice if idx == 0 else x for idx, x in enumerate(self._state.shape)]
|
|
||||||
space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
|
|
||||||
return space
|
|
||||||
|
|
||||||
@property
|
|
||||||
def movement_actions(self):
|
|
||||||
return self._actions.movement_actions
|
|
||||||
|
|
||||||
@property
|
|
||||||
def has_doors(self):
|
|
||||||
return hasattr(self, '_doors')
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
||||||
self.close()
|
|
||||||
|
|
||||||
def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_radius: Union[None, int] = 0,
|
|
||||||
movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
|
|
||||||
combin_agent_slices_in_obs: bool = False, frames_to_stack=0,
|
|
||||||
omit_agent_slice_in_obs=False, **kwargs):
|
|
||||||
assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
|
|
||||||
(not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
|
|
||||||
'Both options are exclusive'
|
|
||||||
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
|
|
||||||
|
|
||||||
self.movement_properties = movement_properties
|
|
||||||
self.level_name = level_name
|
|
||||||
|
|
||||||
self.n_agents = n_agents
|
|
||||||
self.max_steps = max_steps
|
|
||||||
self.pomdp_radius = pomdp_radius
|
|
||||||
self.combin_agent_slices_in_obs = combin_agent_slices_in_obs
|
|
||||||
self.omit_agent_slice_in_obs = omit_agent_slice_in_obs
|
|
||||||
self.frames_to_stack = frames_to_stack
|
|
||||||
|
|
||||||
self.done_at_collision = False
|
|
||||||
|
|
||||||
self._state_slices = StateSlices()
|
|
||||||
|
|
||||||
# Level
|
|
||||||
level_filepath = Path(__file__).parent / h.LEVELS_DIR / f'{self.level_name}.txt'
|
|
||||||
parsed_level = h.parse_level(level_filepath)
|
|
||||||
self._level = h.one_hot_level(parsed_level)
|
|
||||||
level_slices = [h.LEVEL]
|
|
||||||
|
|
||||||
# Doors
|
|
||||||
if parse_doors:
|
|
||||||
parsed_doors = h.one_hot_level(parsed_level, h.DOOR)
|
|
||||||
if parsed_doors.any():
|
|
||||||
self._doors = parsed_doors
|
|
||||||
level_slices.append(h.DOORS)
|
|
||||||
|
|
||||||
# Agents
|
|
||||||
offset = len(level_slices)
|
|
||||||
self._state_slices.register_additional_items([*level_slices,
|
|
||||||
*[f'agent#{i}' for i in range(offset, n_agents + offset)]])
|
|
||||||
|
|
||||||
# Additional Slices from SubDomains
|
|
||||||
if 'additional_slices' in kwargs:
|
|
||||||
self._state_slices.register_additional_items(kwargs.get('additional_slices'))
|
|
||||||
self._zones = Zones(parsed_level)
|
|
||||||
|
|
||||||
self._actions = Actions(self.movement_properties, can_use_doors=self.has_doors)
|
|
||||||
self._actions.register_additional_items(self.additional_actions)
|
|
||||||
self.reset()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def additional_actions(self) -> Union[str, List[str]]:
|
|
||||||
"""
|
|
||||||
When heriting from this Base Class, you musst implement this methode!!!
|
|
||||||
Please return a dict with the given types -> {int: str}.
|
|
||||||
The int should start at 0.
|
|
||||||
|
|
||||||
:return: An Actions-object holding all actions with keys in range 0-n.
|
|
||||||
:rtype: Actions
|
|
||||||
"""
|
|
||||||
raise NotImplementedError('Please register additional actions ')
|
|
||||||
|
|
||||||
def reset(self) -> (np.ndarray, int, bool, dict):
|
|
||||||
slices = [np.expand_dims(self._level, 0)]
|
|
||||||
self._steps = 0
|
|
||||||
self._agent_states = list()
|
|
||||||
|
|
||||||
# Door Init
|
|
||||||
if self.has_doors:
|
|
||||||
self._door_states = [DoorState(i, tuple(pos)) for i, pos
|
|
||||||
in enumerate(np.argwhere(self._doors == h.IS_OCCUPIED_CELL))]
|
|
||||||
slices.append(np.expand_dims(self._doors, 0))
|
|
||||||
|
|
||||||
# Agent placement ...
|
|
||||||
floor_tiles = np.argwhere(self._level == h.IS_FREE_CELL)
|
|
||||||
# ... on random positions
|
|
||||||
np.random.shuffle(floor_tiles)
|
|
||||||
agents = np.zeros((self.n_agents, *self._level.shape), dtype=np.int8)
|
|
||||||
for i, (x, y) in enumerate(floor_tiles[:self.n_agents]):
|
|
||||||
agents[i, x, y] = h.IS_OCCUPIED_CELL
|
|
||||||
agent_state = AgentState(i, -1, pos=(x, y))
|
|
||||||
self._agent_states.append(agent_state)
|
|
||||||
slices.append(agents)
|
|
||||||
|
|
||||||
# GLOBAL STATE
|
|
||||||
self._state = np.concatenate(slices, axis=0)
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _get_observations(self) -> np.ndarray:
|
|
||||||
if self.n_agents == 1:
|
|
||||||
obs = self._build_per_agent_obs(0)
|
|
||||||
elif self.n_agents >= 2:
|
|
||||||
obs = np.stack([self._build_per_agent_obs(agent_i) for agent_i in range(self.n_agents)])
|
|
||||||
else:
|
|
||||||
raise ValueError('n_agents cannot be smaller than 1!!')
|
|
||||||
return obs
|
|
||||||
|
|
||||||
def _build_per_agent_obs(self, agent_i: int) -> np.ndarray:
|
|
||||||
first_agent_slice = self._state_slices.AGENTSTARTIDX
|
|
||||||
# Todo: make this more efficient!
|
|
||||||
if self.pomdp_radius:
|
|
||||||
pomdp_diameter = self.pomdp_radius * 2 + 1
|
|
||||||
global_x, global_y = self._agent_states[agent_i].pos
|
|
||||||
x0, x1 = max(0, global_x - self.pomdp_radius), global_x + self.pomdp_radius + 1
|
|
||||||
y0, y1 = max(0, global_y - self.pomdp_radius), global_y + self.pomdp_radius + 1
|
|
||||||
obs = self._state[:, x0:x1, y0:y1]
|
|
||||||
if obs.shape[1] != pomdp_diameter or obs.shape[2] != pomdp_diameter:
|
|
||||||
obs_padded = np.full((obs.shape[0], pomdp_diameter, pomdp_diameter), h.IS_OCCUPIED_CELL)
|
|
||||||
local_x, local_y = np.argwhere(obs[first_agent_slice + agent_i] == h.IS_OCCUPIED_CELL)[0]
|
|
||||||
obs_padded[:,
|
|
||||||
abs(local_x-self.pomdp_radius):abs(local_x-self.pomdp_radius)+obs.shape[1],
|
|
||||||
abs(local_y-self.pomdp_radius):abs(local_y-self.pomdp_radius)+obs.shape[2]] = obs
|
|
||||||
obs = obs_padded
|
|
||||||
else:
|
|
||||||
obs = self._state
|
|
||||||
if self.omit_agent_slice_in_obs:
|
|
||||||
obs_new = obs[[key for key, val in self._state_slices.items() if h.AGENT not in val]]
|
|
||||||
return obs_new
|
|
||||||
else:
|
|
||||||
if self.combin_agent_slices_in_obs:
|
|
||||||
agent_obs = np.sum(obs[[key for key, val in self._state_slices.items() if 'agent' in val]],
|
|
||||||
axis=0, keepdims=True)
|
|
||||||
obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
|
|
||||||
return obs
|
|
||||||
else:
|
|
||||||
return obs
|
|
||||||
|
|
||||||
def do_additional_actions(self, agent_i: int, action: int) -> ((int, int), bool):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def step(self, actions):
|
|
||||||
actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions
|
|
||||||
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
|
|
||||||
self._steps += 1
|
|
||||||
done = False
|
|
||||||
|
|
||||||
# Move this in a seperate function?
|
|
||||||
for agent_i, action in enumerate(actions):
|
|
||||||
agent = self._agent_states[agent_i]
|
|
||||||
if self._actions.is_moving_action(action):
|
|
||||||
pos, valid = self.move_or_colide(agent_i, action)
|
|
||||||
elif self._actions.is_no_op(action):
|
|
||||||
pos, valid = agent.pos, h.VALID
|
|
||||||
elif self._actions.is_door_usage(action):
|
|
||||||
# Check if agent raly stands on a door:
|
|
||||||
if self._state[self._state_slices.by_name(h.DOORS)][agent.pos] in [h.IS_OCCUPIED_CELL, ]:
|
|
||||||
door = [door for door in self._door_states if door.pos == self._agent_states[agent_i].pos][0]
|
|
||||||
door.use()
|
|
||||||
pos, valid = self._agent_states[agent_i].pos, h.VALID
|
|
||||||
# When he doesn't...
|
|
||||||
else:
|
|
||||||
pos, valid = self._agent_states[agent_i].pos, h.NOT_VALID
|
|
||||||
else:
|
|
||||||
pos, valid = self.do_additional_actions(agent_i, action)
|
|
||||||
# Update state accordingly
|
|
||||||
self._agent_states[agent_i].update(pos=pos, action_valid=valid, action=action)
|
|
||||||
|
|
||||||
for i, collision_vec in enumerate(self.check_all_collisions(self._agent_states, self._state.shape[0])):
|
|
||||||
self._agent_states[i].update(collision_vector=collision_vec)
|
|
||||||
if self.done_at_collision and collision_vec.any():
|
|
||||||
done = True
|
|
||||||
|
|
||||||
# Step the door close intervall
|
|
||||||
agents_pos = [agent.pos for agent in self._agent_states]
|
|
||||||
if self.has_doors:
|
|
||||||
for door_i, door in enumerate(self._door_states):
|
|
||||||
if door.is_open and door.time_to_close and door.pos not in agents_pos:
|
|
||||||
door.time_to_close -= 1
|
|
||||||
elif door.is_open and not door.time_to_close and door.pos not in agents_pos:
|
|
||||||
door.use()
|
|
||||||
self._state[self._state_slices.by_name(h.DOORS)] = 1 if door.is_closed else -1
|
|
||||||
|
|
||||||
reward, info = self.calculate_reward(self._agent_states)
|
|
||||||
|
|
||||||
if self._steps >= self.max_steps:
|
|
||||||
done = True
|
|
||||||
|
|
||||||
info.update(step_reward=reward, step=self._steps)
|
|
||||||
|
|
||||||
return None, reward, done, info
|
|
||||||
|
|
||||||
def check_all_collisions(self, agent_states: List[AgentState], collisions: int) -> np.ndarray:
|
|
||||||
collision_vecs = np.zeros((len(agent_states), collisions)) # n_agents x n_slices
|
|
||||||
for agent_state in agent_states:
|
|
||||||
# Register only collisions of moving agents
|
|
||||||
if self._actions.is_moving_action(agent_state.action):
|
|
||||||
collision_vecs[agent_state.i] = self.check_collisions(agent_state)
|
|
||||||
return collision_vecs
|
|
||||||
|
|
||||||
def check_collisions(self, agent_state: AgentState) -> np.ndarray:
|
|
||||||
pos_x, pos_y = agent_state.pos
|
|
||||||
# FixMe: We need to find a way to spare out some dimensions, eg. an info dimension etc... a[?,]
|
|
||||||
# https://numpy.org/doc/stable/reference/arrays.indexing.html#boolean-array-indexing
|
|
||||||
collisions_vec = self._state[:, pos_x, pos_y].copy() # "vertical fiber" at position of agent i
|
|
||||||
collisions_vec[self._state_slices.AGENTSTARTIDX + agent_state.i] = h.IS_FREE_CELL # no self-collisions
|
|
||||||
if 'door' in self._state_slices.values():
|
|
||||||
collisions_vec[self._state_slices.by_name('doors')] = h.IS_FREE_CELL # no door-collisions
|
|
||||||
|
|
||||||
if agent_state.action_valid:
|
|
||||||
# All well, no collision.
|
|
||||||
# Place a function hook here if needed.
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# Place a marker to indicate a collision with the level boundrys
|
|
||||||
collisions_vec[self._state_slices.by_name(h.LEVEL)] = h.IS_OCCUPIED_CELL
|
|
||||||
return collisions_vec
|
|
||||||
|
|
||||||
def do_move(self, agent_i: int, old_pos: (int, int), new_pos: (int, int)) -> None:
|
|
||||||
(x, y), (x_new, y_new) = old_pos, new_pos
|
|
||||||
self._state[agent_i + self._state_slices.AGENTSTARTIDX, x, y] = h.IS_FREE_CELL
|
|
||||||
self._state[agent_i + self._state_slices.AGENTSTARTIDX, x_new, y_new] = h.IS_OCCUPIED_CELL
|
|
||||||
|
|
||||||
def move_or_colide(self, agent_i: int, action: int) -> ((int, int), bool):
|
|
||||||
old_pos, new_pos, valid = self._check_agent_move(agent_i=agent_i, action=self._actions[action])
|
|
||||||
if valid:
|
|
||||||
# Does not collide width level boundaries
|
|
||||||
self.do_move(agent_i, old_pos, new_pos)
|
|
||||||
return new_pos, valid
|
|
||||||
else:
|
|
||||||
# Agent seems to be trying to collide in this step
|
|
||||||
return old_pos, valid
|
|
||||||
|
|
||||||
def _check_agent_move(self, agent_i, action: str):
|
|
||||||
agent_slice_idx = self._state_slices.AGENTSTARTIDX + agent_i
|
|
||||||
agent_slice = self._state[agent_slice_idx] # horizontal slice from state tensor
|
|
||||||
agent_pos = np.argwhere(agent_slice == 1)
|
|
||||||
if len(agent_pos) > 1:
|
|
||||||
raise AssertionError('Only one agent per slice is allowed.')
|
|
||||||
x, y = agent_pos[0]
|
|
||||||
|
|
||||||
# Actions
|
|
||||||
x_diff, y_diff = h.ACTIONMAP[action]
|
|
||||||
x_new = x + x_diff
|
|
||||||
y_new = y + y_diff
|
|
||||||
|
|
||||||
if self.has_doors and self._agent_states[agent_i]._last_pos != (-1, -1):
|
|
||||||
door = [door for door in self._door_states if door.pos == (x, y)]
|
|
||||||
if door:
|
|
||||||
door = door[0]
|
|
||||||
if door.is_open:
|
|
||||||
pass
|
|
||||||
else: # door.is_closed:
|
|
||||||
local_door_map = self._state[self._state_slices.by_name(h.LEVEL)][door.pos[0]-1:door.pos[0]+2,
|
|
||||||
door.pos[1]-1:door.pos[1]+2]
|
|
||||||
local_agent_map = np.zeros_like(local_door_map)
|
|
||||||
local_agent_map[tuple(np.subtract(door.pos, self._agent_states[agent_i]._last_pos))] += 1
|
|
||||||
local_agent_map[tuple(np.subtract(door.pos, (x_new, y_new)))] += 1
|
|
||||||
if np.all(local_door_map == h.HORIZONTAL_DOOR_MAP):
|
|
||||||
# This is a horizontal Door Configuration
|
|
||||||
if np.sum(local_agent_map[0]) >= 2 or np.sum(local_agent_map[-1]) >= 2:
|
|
||||||
# The Agent goes back to where he came from
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# The Agent tries to go through a closed door
|
|
||||||
return (x, y), (x, y), h.NOT_VALID
|
|
||||||
else:
|
|
||||||
# This is a vertical Door Configuration
|
|
||||||
if np.sum(local_agent_map[:, 0]) >= 2 or np.sum(local_agent_map[:, -1]) >= 2:
|
|
||||||
# The Agent goes back to where he came from
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# The Agent tries to go through a closed door
|
|
||||||
return (x, y), (x, y), h.NOT_VALID
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
|
|
||||||
valid = h.check_position(self._state[self._state_slices.by_name(h.LEVEL)], (x_new, y_new))
|
|
||||||
|
|
||||||
return (x, y), (x_new, y_new), valid
|
|
||||||
|
|
||||||
def agent_i_position(self, agent_i: int) -> (int, int):
|
|
||||||
positions = np.argwhere(self._state[self._state_slices.AGENTSTARTIDX + agent_i] == h.IS_OCCUPIED_CELL)
|
|
||||||
assert positions.shape[0] == 1
|
|
||||||
pos_x, pos_y = positions[0] # a.flatten()
|
|
||||||
return pos_x, pos_y
|
|
||||||
|
|
||||||
def free_cells(self, excluded_slices: Union[None, List[int], int] = None) -> np.array:
|
|
||||||
|
|
||||||
excluded_slices = excluded_slices or []
|
|
||||||
assert isinstance(excluded_slices, (int, list))
|
|
||||||
excluded_slices = excluded_slices if isinstance(excluded_slices, list) else [excluded_slices]
|
|
||||||
|
|
||||||
state = self._state
|
|
||||||
|
|
||||||
if excluded_slices:
|
|
||||||
# Todo: Is there a cleaner way?
|
|
||||||
# inds = list(range(self._state.shape[0]))
|
|
||||||
# excluded_slices = [inds[x] if x < 0 else x for x in excluded_slices]
|
|
||||||
# state = self._state[[x for x in inds if x not in excluded_slices]]
|
|
||||||
|
|
||||||
# Yes there is!
|
|
||||||
bool_array = np.full(self._state.shape[0], True)
|
|
||||||
bool_array[excluded_slices] = False
|
|
||||||
state = self._state[bool_array]
|
|
||||||
|
|
||||||
free_cells = np.argwhere(state.sum(0) == h.IS_FREE_CELL)
|
|
||||||
np.random.shuffle(free_cells)
|
|
||||||
return free_cells
|
|
||||||
|
|
||||||
def calculate_reward(self, agent_states: List[AgentState]) -> (int, dict):
|
|
||||||
# Returns: Reward, Info
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def render(self, mode='human'):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def save_params(self, filepath: Path):
|
|
||||||
# noinspection PyProtectedMember
|
|
||||||
# d = {key: val._asdict() if hasattr(val, '_asdict') else val for key, val in self.__dict__.items()
|
|
||||||
d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')}
|
|
||||||
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
with filepath.open('w') as f:
|
|
||||||
yaml.dump(d, f)
|
|
||||||
# pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL)
|
|
@ -53,7 +53,7 @@ class Renderer:
|
|||||||
|
|
||||||
def blit_params(self, entity):
|
def blit_params(self, entity):
|
||||||
r, c = entity.pos
|
r, c = entity.pos
|
||||||
img = self.assets[entity.name]
|
img = self.assets[entity.name.lower()]
|
||||||
if entity.value_operation == 'opacity':
|
if entity.value_operation == 'opacity':
|
||||||
img.set_alpha(255*entity.value)
|
img.set_alpha(255*entity.value)
|
||||||
elif entity.value_operation == 'scale':
|
elif entity.value_operation == 'scale':
|
||||||
|
@ -3,13 +3,17 @@ import random
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from environments.factory.base_factory import BaseFactory
|
|
||||||
from environments import helpers as h
|
from environments import helpers as h
|
||||||
|
from environments.helpers import Constants as c
|
||||||
|
from environments.factory.base.base_factory import BaseFactory
|
||||||
|
from environments.factory.base.objects import Agent, Action, Object, Slice
|
||||||
|
from environments.factory.base.registers import Entities
|
||||||
|
|
||||||
from environments.factory.renderer import Renderer, Entity
|
from environments.factory.renderer import Renderer, Entity
|
||||||
from environments.utility_classes import AgentState, MovementProperties
|
from environments.utility_classes import MovementProperties
|
||||||
|
|
||||||
DIRT_INDEX = -1
|
DIRT = "dirt"
|
||||||
CLEAN_UP_ACTION = 'clean_up'
|
CLEAN_UP_ACTION = 'clean_up'
|
||||||
|
|
||||||
|
|
||||||
@ -26,95 +30,104 @@ class DirtProperties(NamedTuple):
|
|||||||
class SimpleFactory(BaseFactory):
|
class SimpleFactory(BaseFactory):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def additional_actions(self) -> List[str]:
|
def additional_actions(self) -> List[Object]:
|
||||||
return [CLEAN_UP_ACTION]
|
return [Action(CLEAN_UP_ACTION)]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def additional_entities(self) -> Union[Entities, List[Entities]]:
|
||||||
|
return []
|
||||||
|
|
||||||
|
@property
|
||||||
|
def additional_slices(self) -> List[Slice]:
|
||||||
|
return [Slice('dirt', np.zeros(self._level_shape))]
|
||||||
|
|
||||||
def _is_clean_up_action(self, action: Union[str, int]):
|
def _is_clean_up_action(self, action: Union[str, int]):
|
||||||
if isinstance(action, str):
|
if isinstance(action, str):
|
||||||
action = self._actions.by_name(action)
|
action = self._actions.by_name(action)
|
||||||
return self._actions[action] == CLEAN_UP_ACTION
|
return self._actions[action].name == CLEAN_UP_ACTION
|
||||||
|
|
||||||
def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), verbose=False, **kwargs):
|
def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), verbose=False, **kwargs):
|
||||||
self.dirt_properties = dirt_properties
|
self.dirt_properties = dirt_properties
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.max_dirt = 20
|
|
||||||
self._renderer = None # expensive - don't use it when not required !
|
self._renderer = None # expensive - don't use it when not required !
|
||||||
super(SimpleFactory, self).__init__(*args, additional_slices=['dirt'], **kwargs)
|
super(SimpleFactory, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def _flush_state(self):
|
||||||
|
super(SimpleFactory, self)._flush_state()
|
||||||
|
self._obs_cube[self._slices.get_idx_by_name(DIRT)] = self._slices.by_name(DIRT).slice
|
||||||
|
|
||||||
def render(self, mode='human'):
|
def render(self, mode='human'):
|
||||||
|
|
||||||
if not self._renderer: # lazy init
|
if not self._renderer: # lazy init
|
||||||
height, width = self._state.shape[1:]
|
height, width = self._obs_cube.shape[1:]
|
||||||
self._renderer = Renderer(width, height, view_radius=self.pomdp_radius, fps=5)
|
self._renderer = Renderer(width, height, view_radius=self.pomdp_radius, fps=5)
|
||||||
|
dirt_slice = self._slices.by_name(DIRT).slice
|
||||||
dirt = [Entity('dirt', [x, y], min(0.15 + self._state[DIRT_INDEX, x, y], 1.5), 'scale')
|
dirt = [Entity('dirt', tile.pos, min(0.15 + dirt_slice[tile.pos], 1.5), 'scale')
|
||||||
for x, y in np.argwhere(self._state[DIRT_INDEX] > h.IS_FREE_CELL)]
|
for tile in [tile for tile in self._tiles if dirt_slice[tile.pos]]]
|
||||||
walls = [Entity('wall', pos)
|
walls = [Entity('wall', pos)
|
||||||
for pos in np.argwhere(self._state[self._state_slices.by_name(h.LEVEL)] > h.IS_FREE_CELL)]
|
for pos in np.argwhere(self._slices.by_enum(c.LEVEL).slice == c.OCCUPIED_CELL.value)]
|
||||||
|
|
||||||
def asset_str(agent):
|
def asset_str(agent):
|
||||||
if any([x is None for x in [self._state_slices[j] for j in agent.collisions]]):
|
# What does this abonimation do?
|
||||||
print('error')
|
# if any([x is None for x in [self._slices[j] for j in agent.collisions]]):
|
||||||
cols = ' '.join([self._state_slices[j] for j in agent.collisions])
|
# print('error')
|
||||||
if h.AGENT in cols:
|
col_names = [x.name for x in agent.temp_collisions]
|
||||||
|
if c.AGENT.value in col_names:
|
||||||
return 'agent_collision', 'blank'
|
return 'agent_collision', 'blank'
|
||||||
elif not agent.action_valid or 'level' in cols or h.AGENT in cols:
|
elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names:
|
||||||
return h.AGENT, 'invalid'
|
return c.AGENT.value, 'invalid'
|
||||||
elif self._is_clean_up_action(agent.action):
|
elif self._is_clean_up_action(agent.temp_action):
|
||||||
return h.AGENT, 'valid'
|
return c.AGENT.value, 'valid'
|
||||||
else:
|
else:
|
||||||
return h.AGENT, 'idle'
|
return c.AGENT.value, 'idle'
|
||||||
agents = []
|
agents = []
|
||||||
for i, agent in enumerate(self._agent_states):
|
for i, agent in enumerate(self._agents):
|
||||||
name, state = asset_str(agent)
|
name, state = asset_str(agent)
|
||||||
agents.append(Entity(name, agent.pos, 1, 'none', state, i+1))
|
agents.append(Entity(name, agent.pos, 1, 'none', state, i+1))
|
||||||
doors = []
|
doors = []
|
||||||
if self.has_doors:
|
if self.parse_doors:
|
||||||
for i, door in enumerate(self._door_states):
|
for i, door in enumerate(self._doors):
|
||||||
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
|
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
|
||||||
agents.append(Entity(name, door.pos, 1, 'none', state, i+1))
|
agents.append(Entity(name, door.pos, 1, 'none', state, i+1))
|
||||||
self._renderer.render(dirt+walls+agents+doors)
|
self._renderer.render(dirt+walls+agents+doors)
|
||||||
|
|
||||||
def spawn_dirt(self) -> None:
|
def spawn_dirt(self) -> None:
|
||||||
if not np.argwhere(self._state[DIRT_INDEX] != h.IS_FREE_CELL).shape[0] > self.dirt_properties.max_global_amount:
|
dirt_slice = self._slices.by_name(DIRT).slice
|
||||||
free_for_dirt = self.free_cells(excluded_slices=DIRT_INDEX)
|
# dirty_tiles = [tile for tile in self._tiles if dirt_slice[tile.pos]]
|
||||||
|
curr_dirt_amount = dirt_slice.sum()
|
||||||
|
if not curr_dirt_amount > self.dirt_properties.max_global_amount:
|
||||||
|
free_for_dirt = self._tiles.empty_tiles
|
||||||
|
|
||||||
# randomly distribute dirt across the grid
|
# randomly distribute dirt across the grid
|
||||||
n_dirt_tiles = int(random.uniform(0, self.dirt_properties.max_spawn_ratio) * len(free_for_dirt))
|
n_dirt_tiles = int(random.uniform(0, self.dirt_properties.max_spawn_ratio) * len(free_for_dirt))
|
||||||
for x, y in free_for_dirt[:n_dirt_tiles]:
|
for tile in free_for_dirt[:n_dirt_tiles]:
|
||||||
new_value = self._state[DIRT_INDEX, x, y] + self.dirt_properties.gain_amount
|
new_value = dirt_slice[tile.pos] + self.dirt_properties.gain_amount
|
||||||
self._state[DIRT_INDEX, x, y] = max(new_value, self.dirt_properties.max_local_amount)
|
dirt_slice[tile.pos] = min(new_value, self.dirt_properties.max_local_amount)
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def clean_up(self, pos: (int, int)) -> ((int, int), bool):
|
def clean_up(self, agent: Agent) -> bool:
|
||||||
new_dirt_amount = self._state[DIRT_INDEX][pos] - self.dirt_properties.clean_amount
|
dirt_slice = self._slices.by_name(DIRT).slice
|
||||||
cleanup_was_sucessfull: bool
|
if dirt_slice[agent.pos]:
|
||||||
if self._state[DIRT_INDEX][pos] == h.IS_FREE_CELL:
|
new_dirt_amount = dirt_slice[agent.pos] - self.dirt_properties.clean_amount
|
||||||
cleanup_was_sucessfull = False
|
dirt_slice[agent.pos] = max(new_dirt_amount, c.FREE_CELL.value)
|
||||||
return pos, cleanup_was_sucessfull
|
return True
|
||||||
else:
|
else:
|
||||||
cleanup_was_sucessfull = True
|
return False
|
||||||
self._state[DIRT_INDEX][pos] = max(new_dirt_amount, h.IS_FREE_CELL)
|
|
||||||
return pos, cleanup_was_sucessfull
|
|
||||||
|
|
||||||
def step(self, actions):
|
def post_step(self) -> dict:
|
||||||
_, reward, done, info = super(SimpleFactory, self).step(actions)
|
|
||||||
if not self._next_dirt_spawn:
|
if not self._next_dirt_spawn:
|
||||||
self.spawn_dirt()
|
self.spawn_dirt()
|
||||||
self._next_dirt_spawn = self.dirt_properties.spawn_frequency
|
self._next_dirt_spawn = self.dirt_properties.spawn_frequency
|
||||||
else:
|
else:
|
||||||
self._next_dirt_spawn -= 1
|
self._next_dirt_spawn -= 1
|
||||||
|
return {}
|
||||||
|
|
||||||
obs = self._get_observations()
|
def do_additional_actions(self, agent: Agent, action: int) -> bool:
|
||||||
return obs, reward, done, info
|
|
||||||
|
|
||||||
def do_additional_actions(self, agent_i: int, action: int) -> ((int, int), bool):
|
|
||||||
if action != self._actions.is_moving_action(action):
|
if action != self._actions.is_moving_action(action):
|
||||||
if self._is_clean_up_action(action):
|
if self._is_clean_up_action(action):
|
||||||
agent_i_pos = self.agent_i_position(agent_i)
|
valid = self.clean_up(agent)
|
||||||
_, valid = self.clean_up(agent_i_pos)
|
return valid
|
||||||
return agent_i_pos, valid
|
|
||||||
else:
|
else:
|
||||||
raise RuntimeError('This should not happen!!!')
|
raise RuntimeError('This should not happen!!!')
|
||||||
else:
|
else:
|
||||||
@ -122,19 +135,21 @@ class SimpleFactory(BaseFactory):
|
|||||||
|
|
||||||
def reset(self) -> (np.ndarray, int, bool, dict):
|
def reset(self) -> (np.ndarray, int, bool, dict):
|
||||||
_ = super().reset() # state, reward, done, info ... =
|
_ = super().reset() # state, reward, done, info ... =
|
||||||
dirt_slice = np.zeros((1, *self._state.shape[1:]))
|
|
||||||
self._state = np.concatenate((self._state, dirt_slice)) # dirt is now the last slice
|
|
||||||
self.spawn_dirt()
|
self.spawn_dirt()
|
||||||
self._next_dirt_spawn = self.dirt_properties.spawn_frequency
|
self._next_dirt_spawn = self.dirt_properties.spawn_frequency
|
||||||
obs = self._get_observations()
|
obs = self._get_observations()
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
def calculate_reward(self, agent_states: List[AgentState]) -> (int, dict):
|
def calculate_reward(self) -> (int, dict):
|
||||||
info_dict = dict()
|
info_dict = dict()
|
||||||
current_dirt_amount = self._state[DIRT_INDEX].sum()
|
|
||||||
dirty_tiles = np.argwhere(self._state[DIRT_INDEX] != h.IS_FREE_CELL).shape[0]
|
dirt_slice = self._slices.by_name(DIRT).slice
|
||||||
|
dirty_tiles = [dirt_slice[tile.pos] for tile in self._tiles if dirt_slice[tile.pos]]
|
||||||
|
current_dirt_amount = sum(dirty_tiles)
|
||||||
|
dirty_tile_count = len(dirty_tiles)
|
||||||
|
|
||||||
info_dict.update(dirt_amount=current_dirt_amount)
|
info_dict.update(dirt_amount=current_dirt_amount)
|
||||||
info_dict.update(dirty_tile_count=dirty_tiles)
|
info_dict.update(dirty_tile_count=dirty_tile_count)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# penalty = current_dirt_amount
|
# penalty = current_dirt_amount
|
||||||
@ -142,52 +157,47 @@ class SimpleFactory(BaseFactory):
|
|||||||
except (ZeroDivisionError, RuntimeWarning):
|
except (ZeroDivisionError, RuntimeWarning):
|
||||||
reward = 0
|
reward = 0
|
||||||
|
|
||||||
for agent_state in agent_states:
|
for agent in self._agents:
|
||||||
agent_name = f'{h.AGENT.capitalize()} {agent_state.i}'
|
if agent.temp_collisions:
|
||||||
cols = agent_state.collisions
|
self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
|
||||||
|
|
||||||
list_of_collisions = [self._state_slices[entity] for entity in cols
|
if self._is_clean_up_action(agent.temp_action):
|
||||||
if entity != self._state_slices.by_name('dirt')]
|
if agent.temp_valid:
|
||||||
|
|
||||||
if list_of_collisions:
|
|
||||||
self.print(f't = {self._steps}\t{agent_name} has collisions with {list_of_collisions}')
|
|
||||||
|
|
||||||
if self._is_clean_up_action(agent_state.action):
|
|
||||||
if agent_state.action_valid:
|
|
||||||
reward += 1
|
reward += 1
|
||||||
self.print(f'{agent_name} did just clean up some dirt at {agent_state.pos}.')
|
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
|
||||||
info_dict.update(dirt_cleaned=1)
|
info_dict.update(dirt_cleaned=1)
|
||||||
else:
|
else:
|
||||||
reward -= 0.01
|
reward -= 0.01
|
||||||
self.print(f'{agent_name} just tried to clean up some dirt at {agent_state.pos}, but failed.')
|
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
|
||||||
info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_action': 1})
|
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||||
info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_dirt_cleanup': 1})
|
info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
|
||||||
|
|
||||||
elif self._actions.is_moving_action(agent_state.action):
|
elif self._actions.is_moving_action(agent.temp_action):
|
||||||
if agent_state.action_valid:
|
if agent.temp_valid:
|
||||||
# info_dict.update(movement=1)
|
# info_dict.update(movement=1)
|
||||||
reward -= 0.00
|
reward -= 0.00
|
||||||
else:
|
else:
|
||||||
# self.print('collision')
|
# self.print('collision')
|
||||||
reward -= 0.01
|
reward -= 0.05
|
||||||
|
self.print(f'{agent.name} just hit the wall at {agent.pos}.')
|
||||||
|
info_dict.update({f'{agent.name}_vs_LEVEL': 1})
|
||||||
|
|
||||||
elif self._actions.is_door_usage(agent_state.action):
|
elif self._actions.is_door_usage(agent.temp_action):
|
||||||
if agent_state.action_valid:
|
if agent.temp_valid:
|
||||||
reward += 0.1
|
self.print(f'{agent.name} did just use the door at {agent.pos}.')
|
||||||
self.print(f'{agent_name} did just use the door at {agent_state.pos}.')
|
|
||||||
info_dict.update(door_used=1)
|
info_dict.update(door_used=1)
|
||||||
else:
|
else:
|
||||||
self.print(f'{agent_name} just tried to use a door at {agent_state.pos}, but failed.')
|
reward -= 0.01
|
||||||
info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_action': 1})
|
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
|
||||||
info_dict.update({f'{h.AGENT}_{agent_state.i}_failed_door_open': 1})
|
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||||
|
info_dict.update({f'{agent.name}_failed_door_open': 1})
|
||||||
|
|
||||||
else:
|
else:
|
||||||
info_dict.update(no_op=1)
|
info_dict.update(no_op=1)
|
||||||
reward -= 0.00
|
reward -= 0.00
|
||||||
|
|
||||||
for entity in list_of_collisions:
|
for other_agent in agent.temp_collisions:
|
||||||
entity = h.AGENT if h.AGENT in entity else entity
|
info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1})
|
||||||
info_dict.update({f'{h.AGENT}_{agent_state.i}_vs_{entity}': 1})
|
|
||||||
|
|
||||||
self.print(f"reward is {reward}")
|
self.print(f"reward is {reward}")
|
||||||
# Potential based rewards ->
|
# Potential based rewards ->
|
||||||
@ -205,13 +215,13 @@ if __name__ == '__main__':
|
|||||||
move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True)
|
move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True)
|
||||||
dirt_props = DirtProperties()
|
dirt_props = DirtProperties()
|
||||||
factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=10,
|
factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=10,
|
||||||
combin_agent_slices_in_obs=True, level_name='rooms',
|
combin_agent_slices_in_obs=False, level_name='rooms', parse_doors=True,
|
||||||
pomdp_radius=3)
|
pomdp_radius=3)
|
||||||
|
|
||||||
n_actions = factory.action_space.n - 1
|
n_actions = factory.action_space.n - 1
|
||||||
_ = factory.observation_space
|
_ = factory.observation_space
|
||||||
|
|
||||||
for epoch in range(10000):
|
for epoch in range(100):
|
||||||
random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)]
|
random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)]
|
||||||
env_state = factory.reset()
|
env_state = factory.reset()
|
||||||
r = 0
|
r = 0
|
||||||
|
@ -1,27 +1,46 @@
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import Tuple
|
from enum import Enum, auto
|
||||||
|
from typing import Tuple, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
WALL = '#'
|
class Constants(Enum):
|
||||||
DOOR = 'D'
|
WALL = '#'
|
||||||
DANGER_ZONE = 'x'
|
DOOR = 'D'
|
||||||
|
DANGER_ZONE = 'x'
|
||||||
|
LEVEL = 'level'
|
||||||
|
AGENT = 'Agent'
|
||||||
|
FREE_CELL = 0
|
||||||
|
OCCUPIED_CELL = 1
|
||||||
|
|
||||||
|
DOORS = 'doors'
|
||||||
|
IS_CLOSED_DOOR = 1
|
||||||
|
IS_OPEN_DOOR = -1
|
||||||
|
|
||||||
|
LEVEL_IDX = 0
|
||||||
|
|
||||||
|
ACTION = auto()
|
||||||
|
COLLISIONS = auto()
|
||||||
|
VALID = True
|
||||||
|
NOT_VALID = False
|
||||||
|
|
||||||
|
def __bool__(self):
|
||||||
|
return bool(self.value)
|
||||||
|
|
||||||
|
|
||||||
LEVELS_DIR = 'levels'
|
LEVELS_DIR = 'levels'
|
||||||
LEVEL = 'level'
|
|
||||||
AGENT = 'agent'
|
|
||||||
IS_FREE_CELL = 0
|
|
||||||
IS_OCCUPIED_CELL = 1
|
|
||||||
|
|
||||||
DOORS = 'doors'
|
|
||||||
IS_CLOSED_DOOR = IS_OCCUPIED_CELL
|
|
||||||
IS_OPEN_DOOR = -1
|
|
||||||
|
|
||||||
LEVEL_IDX = 0
|
|
||||||
|
|
||||||
TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles']
|
TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles']
|
||||||
IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount', 'dirty_tile_count']
|
IGNORED_DF_COLUMNS = ['Episode', 'Run', 'train_step', 'step', 'index', 'dirt_amount',
|
||||||
|
'dirty_tile_count', 'terminal_observation', 'episode']
|
||||||
|
|
||||||
|
MANHATTAN_MOVES = ['north', 'east', 'south', 'west']
|
||||||
|
DIAGONAL_MOVES = ['north_east', 'south_east', 'south_west', 'north_west']
|
||||||
|
|
||||||
|
NO_POS = (-9999, -9999)
|
||||||
|
|
||||||
ACTIONMAP = defaultdict(lambda: (0, 0), dict(north=(-1, 0), east=(0, 1),
|
ACTIONMAP = defaultdict(lambda: (0, 0), dict(north=(-1, 0), east=(0, 1),
|
||||||
south=(1, 0), west=(0, -1),
|
south=(1, 0), west=(0, -1),
|
||||||
@ -38,8 +57,7 @@ HORIZONTAL_DOOR_ZONE_2 = np.asarray([[0, 0, 0], [0, 0, 0], [1, 1, 1]])
|
|||||||
VERTICAL_DOOR_ZONE_1 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
|
VERTICAL_DOOR_ZONE_1 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
|
||||||
VERTICAL_DOOR_ZONE_2 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
|
VERTICAL_DOOR_ZONE_2 = np.asarray([[1, 0, 0], [0, 0, 0], [0, 0, 1]])
|
||||||
|
|
||||||
NOT_VALID = False
|
|
||||||
VALID = True
|
|
||||||
|
|
||||||
|
|
||||||
# Utility functions
|
# Utility functions
|
||||||
@ -51,10 +69,13 @@ def parse_level(path):
|
|||||||
return level
|
return level
|
||||||
|
|
||||||
|
|
||||||
def one_hot_level(level, wall_char=WALL):
|
def one_hot_level(level, wall_char: Union[Constants, str] = Constants.WALL):
|
||||||
grid = np.array(level)
|
grid = np.array(level)
|
||||||
binary_grid = np.zeros(grid.shape, dtype=np.int8)
|
binary_grid = np.zeros(grid.shape, dtype=np.int8)
|
||||||
binary_grid[grid == wall_char] = 1
|
if wall_char in Constants:
|
||||||
|
binary_grid[grid == wall_char.value] = Constants.OCCUPIED_CELL.value
|
||||||
|
else:
|
||||||
|
binary_grid[grid == wall_char] = Constants.OCCUPIED_CELL.value
|
||||||
return binary_grid
|
return binary_grid
|
||||||
|
|
||||||
|
|
||||||
@ -70,7 +91,7 @@ def check_position(slice_to_check_against: np.ndarray, position_to_check: Tuple[
|
|||||||
|
|
||||||
# Check for collision with level walls
|
# Check for collision with level walls
|
||||||
valid = valid and not slice_to_check_against[x_pos, y_pos]
|
valid = valid and not slice_to_check_against[x_pos, y_pos]
|
||||||
return valid
|
return Constants.VALID if valid else Constants.NOT_VALID
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import pickle
|
import pickle
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import List, Dict
|
||||||
|
|
||||||
from stable_baselines3.common.callbacks import BaseCallback
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
|
|
||||||
@ -66,13 +67,15 @@ class MonitorCallback(BaseCallback):
|
|||||||
print('Plotting done.')
|
print('Plotting done.')
|
||||||
self.closed = True
|
self.closed = True
|
||||||
|
|
||||||
def _on_step(self) -> bool:
|
def _on_step(self, alt_infos: List[Dict] = None, alt_dones: List[bool] = None) -> bool:
|
||||||
for _, info in enumerate(self.locals.get('infos', [])):
|
infos = alt_infos or self.locals.get('infos', [])
|
||||||
|
dones = alt_dones or self.locals.get('dones', None) or self.locals.get('done', [None])
|
||||||
|
for _, info in enumerate(infos):
|
||||||
self._monitor_dict[self.num_timesteps] = {key: val for key, val in info.items()
|
self._monitor_dict[self.num_timesteps] = {key: val for key, val in info.items()
|
||||||
if key not in ['terminal_observation', 'episode']}
|
if key not in ['terminal_observation', 'episode']
|
||||||
|
and not key.startswith('rec_')}
|
||||||
|
|
||||||
for env_idx, done in list(enumerate(self.locals.get('dones', []))) + \
|
for env_idx, done in enumerate(dones):
|
||||||
list(enumerate(self.locals.get('done', []))):
|
|
||||||
if done:
|
if done:
|
||||||
env_monitor_df = pd.DataFrame.from_dict(self._monitor_dict, orient='index')
|
env_monitor_df = pd.DataFrame.from_dict(self._monitor_dict, orient='index')
|
||||||
self._monitor_dict = dict()
|
self._monitor_dict = dict()
|
||||||
|
74
environments/logging/recorder.py
Normal file
74
environments/logging/recorder.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
|
|
||||||
|
from environments.factory.base.base_factory import REC_TAC
|
||||||
|
from environments.helpers import IGNORED_DF_COLUMNS
|
||||||
|
|
||||||
|
|
||||||
|
class RecorderCallback(BaseCallback):
|
||||||
|
|
||||||
|
def __init__(self, filepath: Union[str, Path], occupation_map: bool = False, trajectory_map: bool = False):
|
||||||
|
super(RecorderCallback, self).__init__()
|
||||||
|
self.trajectory_map = trajectory_map
|
||||||
|
self.occupation_map = occupation_map
|
||||||
|
self.filepath = Path(filepath)
|
||||||
|
self._recorder_dict = dict()
|
||||||
|
self._recorder_df = pd.DataFrame()
|
||||||
|
self.started = False
|
||||||
|
self.closed = False
|
||||||
|
|
||||||
|
def _on_step(self) -> bool:
|
||||||
|
for _, info in enumerate(self.locals.get('infos', [])):
|
||||||
|
self._recorder_dict[self.num_timesteps] = {key: val for key, val in info.items()
|
||||||
|
if not key.startswith(f'{REC_TAC}_')}
|
||||||
|
|
||||||
|
for env_idx, done in list(enumerate(self.locals.get('dones', []))) + \
|
||||||
|
list(enumerate(self.locals.get('done', []))):
|
||||||
|
if done:
|
||||||
|
env_monitor_df = pd.DataFrame.from_dict(self._recorder_dict, orient='index')
|
||||||
|
self._recorder_dict = dict()
|
||||||
|
columns = [col for col in env_monitor_df.columns if col not in IGNORED_DF_COLUMNS]
|
||||||
|
env_monitor_df = env_monitor_df.aggregate(
|
||||||
|
{col: 'mean' if col.endswith('ount') else 'sum' for col in columns}
|
||||||
|
)
|
||||||
|
env_monitor_df['episode'] = len(self._recorder_df)
|
||||||
|
self._recorder_df = self._recorder_df.append([env_monitor_df])
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
return True
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self._on_training_start()
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
self._on_training_end()
|
||||||
|
|
||||||
|
def _on_training_start(self) -> None:
|
||||||
|
if self.started:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
self.filepath.parent.mkdir(exist_ok=True, parents=True)
|
||||||
|
self.started = True
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _on_training_end(self) -> None:
|
||||||
|
if self.closed:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# self.out_file.unlink(missing_ok=True)
|
||||||
|
with self.filepath.open('w') as f:
|
||||||
|
json_df = self._recorder_df.to_json(orient="table")
|
||||||
|
parsed = json.loads(json_df)
|
||||||
|
json.dump(parsed, f, indent=4)
|
||||||
|
|
||||||
|
if self.occupation_map:
|
||||||
|
print('Recorder files were dumped to disk, now plotting the occupation map...')
|
||||||
|
|
||||||
|
if self.trajectory_map:
|
||||||
|
print('Recorder files were dumped to disk, now plotting the occupation map...')
|
||||||
|
|
||||||
|
self.closed = True
|
@ -1,68 +0,0 @@
|
|||||||
from typing import List, Union
|
|
||||||
|
|
||||||
import gym
|
|
||||||
|
|
||||||
|
|
||||||
class Entities():
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# noinspection PyAttributeOutsideInit
|
|
||||||
class BaseFactory(gym.Env):
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
||||||
self.close()
|
|
||||||
|
|
||||||
def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_radius: Union[None, int] = 0,
|
|
||||||
movement_properties: MovementProperties = MovementProperties(),
|
|
||||||
combin_agent_slices_in_obs: bool = False, frames_to_stack=0,
|
|
||||||
omit_agent_slice_in_obs=False, **kwargs):
|
|
||||||
assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
|
|
||||||
(not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
|
|
||||||
'Both options are exclusive'
|
|
||||||
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
|
|
||||||
|
|
||||||
self.movement_properties = movement_properties
|
|
||||||
self.level_name = level_name
|
|
||||||
|
|
||||||
self.n_agents = n_agents
|
|
||||||
self.max_steps = max_steps
|
|
||||||
self.pomdp_radius = pomdp_radius
|
|
||||||
self.combin_agent_slices_in_obs = combin_agent_slices_in_obs
|
|
||||||
self.omit_agent_slice_in_obs = omit_agent_slice_in_obs
|
|
||||||
self.frames_to_stack = frames_to_stack
|
|
||||||
|
|
||||||
self.done_at_collision = False
|
|
||||||
|
|
||||||
self._state_slices = StateSlices()
|
|
||||||
level_filepath = Path(__file__).parent / h.LEVELS_DIR / f'{self.level_name}.txt'
|
|
||||||
parsed_level = h.parse_level(level_filepath)
|
|
||||||
self._level = h.one_hot_level(parsed_level)
|
|
||||||
parsed_doors = h.one_hot_level(parsed_level, h.DOOR)
|
|
||||||
if parsed_doors.any():
|
|
||||||
self._doors = parsed_doors
|
|
||||||
level_slices = ['level', 'doors']
|
|
||||||
can_use_doors = True
|
|
||||||
else:
|
|
||||||
level_slices = ['level']
|
|
||||||
can_use_doors = False
|
|
||||||
offset = len(level_slices)
|
|
||||||
self._state_slices.register_additional_items([*level_slices,
|
|
||||||
*[f'agent#{i}' for i in range(offset, n_agents + offset)]])
|
|
||||||
if 'additional_slices' in kwargs:
|
|
||||||
self._state_slices.register_additional_items(kwargs.get('additional_slices'))
|
|
||||||
self._zones = Zones(parsed_level)
|
|
||||||
|
|
||||||
self._actions = Actions(self.movement_properties, can_use_doors=can_use_doors)
|
|
||||||
self._actions.register_additional_items(self.additional_actions)
|
|
||||||
self.reset()
|
|
||||||
|
|
||||||
|
|
||||||
def step(self, actions: Union[int, List[int]]):
|
|
||||||
actions = actions if isinstance(actions, list) else [actions]
|
|
||||||
self.entities.step()
|
|
@ -1,298 +1,7 @@
|
|||||||
from typing import Union, List, NamedTuple, Tuple
|
from typing import NamedTuple
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from environments import helpers as h
|
|
||||||
|
|
||||||
|
|
||||||
IS_CLOSED = 'CLOSED'
|
|
||||||
IS_OPEN = 'OPEN'
|
|
||||||
|
|
||||||
|
|
||||||
class MovementProperties(NamedTuple):
|
class MovementProperties(NamedTuple):
|
||||||
allow_square_movement: bool = True
|
allow_square_movement: bool = True
|
||||||
allow_diagonal_movement: bool = False
|
allow_diagonal_movement: bool = False
|
||||||
allow_no_op: bool = False
|
allow_no_op: bool = False
|
||||||
|
|
||||||
|
|
||||||
# Preperations for Entities (not used yet)
|
|
||||||
class Entity:
|
|
||||||
|
|
||||||
@property
|
|
||||||
def pos(self):
|
|
||||||
return self._pos
|
|
||||||
|
|
||||||
@property
|
|
||||||
def identifier(self):
|
|
||||||
return self._identifier
|
|
||||||
|
|
||||||
def __init__(self, identifier, pos):
|
|
||||||
self._pos = pos
|
|
||||||
self._identifier = identifier
|
|
||||||
|
|
||||||
|
|
||||||
class Door(Entity):
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_closed(self):
|
|
||||||
return self._state == IS_CLOSED
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_open(self):
|
|
||||||
return self._state == IS_OPEN
|
|
||||||
|
|
||||||
@property
|
|
||||||
def status(self):
|
|
||||||
return self._state
|
|
||||||
|
|
||||||
def __init__(self, *args, closed_on_init=True, **kwargs):
|
|
||||||
super(Door, self).__init__(*args, **kwargs)
|
|
||||||
self._state = IS_CLOSED if closed_on_init else IS_OPEN
|
|
||||||
|
|
||||||
def use(self):
|
|
||||||
self._state: str = IS_CLOSED if self._state == IS_OPEN else IS_OPEN
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class Agent(Entity):
|
|
||||||
|
|
||||||
@property
|
|
||||||
def direction_of_vision(self):
|
|
||||||
return self._direction_of_vision
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super(Agent, self).__init__(*args, **kwargs)
|
|
||||||
self._direction_of_vision = (None, None)
|
|
||||||
|
|
||||||
def move(self, new_pos: Tuple[int, int]):
|
|
||||||
x_old, y_old = self.pos
|
|
||||||
self._pos = new_pos
|
|
||||||
x_new, y_new = new_pos
|
|
||||||
self._direction_of_vision = (x_old-x_new, y_old-y_new)
|
|
||||||
return self.pos
|
|
||||||
|
|
||||||
|
|
||||||
class AgentState:
|
|
||||||
|
|
||||||
@property
|
|
||||||
def collisions(self):
|
|
||||||
return np.argwhere(self.collision_vector != 0).flatten()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def direction_of_view(self):
|
|
||||||
last_x, last_y = self._last_pos
|
|
||||||
curr_x, curr_y = self.pos
|
|
||||||
return last_x-curr_x, last_y-curr_y
|
|
||||||
|
|
||||||
def __init__(self, i: int, action: int, pos=None):
|
|
||||||
self.i = i
|
|
||||||
self.action = action
|
|
||||||
|
|
||||||
self.collision_vector = None
|
|
||||||
self.action_valid = None
|
|
||||||
self.pos = pos
|
|
||||||
self._last_pos = (-1, -1)
|
|
||||||
|
|
||||||
def update(self, **kwargs): # is this hacky?? o.0
|
|
||||||
last_pos = self.pos
|
|
||||||
for key, value in kwargs.items():
|
|
||||||
if hasattr(self, key):
|
|
||||||
self.__setattr__(key, value)
|
|
||||||
else:
|
|
||||||
raise AttributeError(f'"{key}" cannot be updated, this attr is not a part of {self.__name__}')
|
|
||||||
if self.action_valid and last_pos != self.pos:
|
|
||||||
self._last_pos = last_pos
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.__init__(self.i, self.action)
|
|
||||||
|
|
||||||
|
|
||||||
class DoorState:
|
|
||||||
|
|
||||||
def __init__(self, i: int, pos: Tuple[int, int], closed_on_init=True, auto_close_interval=10):
|
|
||||||
self.i = i
|
|
||||||
self.pos = pos
|
|
||||||
self._state = self._state = IS_CLOSED if closed_on_init else IS_OPEN
|
|
||||||
self.auto_close_interval = auto_close_interval
|
|
||||||
self.time_to_close = -1
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_closed(self):
|
|
||||||
return self._state == IS_CLOSED
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_open(self):
|
|
||||||
return self._state == IS_OPEN
|
|
||||||
|
|
||||||
@property
|
|
||||||
def status(self):
|
|
||||||
return self._state
|
|
||||||
|
|
||||||
def use(self):
|
|
||||||
if self._state == IS_OPEN:
|
|
||||||
self._state = IS_CLOSED
|
|
||||||
else:
|
|
||||||
self._state = IS_OPEN
|
|
||||||
self.time_to_close = self.auto_close_interval
|
|
||||||
|
|
||||||
class Register:
|
|
||||||
|
|
||||||
@property
|
|
||||||
def n(self):
|
|
||||||
return len(self)
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self._register = dict()
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self._register)
|
|
||||||
|
|
||||||
def __add__(self, other: str):
|
|
||||||
assert isinstance(other, str), f'All item names have to be of type {str}'
|
|
||||||
self._register.update({len(self._register): other})
|
|
||||||
return self
|
|
||||||
|
|
||||||
def register_additional_items(self, others: List[str]):
|
|
||||||
for other in others:
|
|
||||||
self + other
|
|
||||||
return self
|
|
||||||
|
|
||||||
def keys(self):
|
|
||||||
return self._register.keys()
|
|
||||||
|
|
||||||
def values(self):
|
|
||||||
return self._register.values()
|
|
||||||
|
|
||||||
def items(self):
|
|
||||||
return self._register.items()
|
|
||||||
|
|
||||||
def __getitem__(self, item):
|
|
||||||
try:
|
|
||||||
return self._register[item]
|
|
||||||
except KeyError:
|
|
||||||
print('NO')
|
|
||||||
raise
|
|
||||||
|
|
||||||
def by_name(self, item):
|
|
||||||
return list(self._register.keys())[list(self._register.values()).index(item)]
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f'{self.__class__.__name__}({self._register})'
|
|
||||||
|
|
||||||
|
|
||||||
class Agents(Register):
|
|
||||||
|
|
||||||
def __init__(self, n_agents):
|
|
||||||
super(Agents, self).__init__()
|
|
||||||
self.register_additional_items([f'agent#{i}' for i in range(n_agents)])
|
|
||||||
self._agents = [Agent(x, (-1, -1)) for x in self.keys()]
|
|
||||||
pass
|
|
||||||
|
|
||||||
def __getitem__(self, item):
|
|
||||||
return self._agents[item]
|
|
||||||
|
|
||||||
def get_name(self, item):
|
|
||||||
return self._register[item]
|
|
||||||
|
|
||||||
def by_name(self, item):
|
|
||||||
return self[super(Agents, self).by_name(item)]
|
|
||||||
|
|
||||||
def __add__(self, other):
|
|
||||||
super(Agents, self).__add__(other)
|
|
||||||
self._agents.append(Agent(len(self)+1, (-1, -1)))
|
|
||||||
|
|
||||||
|
|
||||||
class Actions(Register):
|
|
||||||
|
|
||||||
@property
|
|
||||||
def movement_actions(self):
|
|
||||||
return self._movement_actions
|
|
||||||
|
|
||||||
def __init__(self, movement_properties: MovementProperties, can_use_doors=False):
|
|
||||||
self.allow_no_op = movement_properties.allow_no_op
|
|
||||||
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
|
|
||||||
self.allow_square_movement = movement_properties.allow_square_movement
|
|
||||||
self.can_use_doors = can_use_doors
|
|
||||||
super(Actions, self).__init__()
|
|
||||||
|
|
||||||
if self.allow_square_movement:
|
|
||||||
self.register_additional_items(['north', 'east', 'south', 'west'])
|
|
||||||
if self.allow_diagonal_movement:
|
|
||||||
self.register_additional_items(['north_east', 'south_east', 'south_west', 'north_west'])
|
|
||||||
self._movement_actions = self._register.copy()
|
|
||||||
if self.can_use_doors:
|
|
||||||
self.register_additional_items(['use_door'])
|
|
||||||
if self.allow_no_op:
|
|
||||||
self.register_additional_items(['no-op'])
|
|
||||||
|
|
||||||
def is_moving_action(self, action: Union[str, int]):
|
|
||||||
if isinstance(action, str):
|
|
||||||
return action in self.movement_actions.values()
|
|
||||||
else:
|
|
||||||
return self[action] in self.movement_actions.values()
|
|
||||||
|
|
||||||
def is_no_op(self, action: Union[str, int]):
|
|
||||||
if isinstance(action, str):
|
|
||||||
action = self.by_name(action)
|
|
||||||
return self[action] == 'no-op'
|
|
||||||
|
|
||||||
def is_door_usage(self, action: Union[str, int]):
|
|
||||||
if isinstance(action, str):
|
|
||||||
action = self.by_name(action)
|
|
||||||
return self[action] == 'use_door'
|
|
||||||
|
|
||||||
|
|
||||||
class StateSlices(Register):
|
|
||||||
|
|
||||||
@property
|
|
||||||
def AGENTSTARTIDX(self):
|
|
||||||
if self._agent_start_idx:
|
|
||||||
return self._agent_start_idx
|
|
||||||
else:
|
|
||||||
self._agent_start_idx = min([idx for idx, x in self.items() if h.AGENT in x])
|
|
||||||
return self._agent_start_idx
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super(StateSlices, self).__init__()
|
|
||||||
self._agent_start_idx = None
|
|
||||||
|
|
||||||
|
|
||||||
class Zones(Register):
|
|
||||||
|
|
||||||
@property
|
|
||||||
def danger_zone(self):
|
|
||||||
return self._zone_slices[self.by_name(h.DANGER_ZONE)]
|
|
||||||
|
|
||||||
@property
|
|
||||||
def accounting_zones(self):
|
|
||||||
return [self[idx] for idx, name in self.items() if name != h.DANGER_ZONE]
|
|
||||||
|
|
||||||
def __init__(self, parsed_level):
|
|
||||||
super(Zones, self).__init__()
|
|
||||||
slices = list()
|
|
||||||
self._accounting_zones = list()
|
|
||||||
self._danger_zones = list()
|
|
||||||
for symbol in np.unique(parsed_level):
|
|
||||||
if symbol == h.WALL:
|
|
||||||
continue
|
|
||||||
elif symbol == h.DANGER_ZONE:
|
|
||||||
self + symbol
|
|
||||||
slices.append(h.one_hot_level(parsed_level, symbol))
|
|
||||||
self._danger_zones.append(symbol)
|
|
||||||
else:
|
|
||||||
self + symbol
|
|
||||||
slices.append(h.one_hot_level(parsed_level, symbol))
|
|
||||||
self._accounting_zones.append(symbol)
|
|
||||||
|
|
||||||
self._zone_slices = np.stack(slices)
|
|
||||||
|
|
||||||
def __getitem__(self, item):
|
|
||||||
return self._zone_slices[item]
|
|
||||||
|
|
||||||
def get_name(self, item):
|
|
||||||
return self._register[item]
|
|
||||||
|
|
||||||
def by_name(self, item):
|
|
||||||
return self[super(Zones, self).by_name(item)]
|
|
||||||
|
|
||||||
def register_additional_items(self, other: Union[str, List[str]]):
|
|
||||||
raise AttributeError('You are not allowed to add additional Zones in runtime.')
|
|
||||||
|
19
main.py
19
main.py
@ -9,11 +9,12 @@ import pandas as pd
|
|||||||
|
|
||||||
from stable_baselines3.common.callbacks import CallbackList
|
from stable_baselines3.common.callbacks import CallbackList
|
||||||
|
|
||||||
from environments.factory.base_factory import MovementProperties
|
|
||||||
from environments.factory.simple_factory import DirtProperties, SimpleFactory
|
from environments.factory.simple_factory import DirtProperties, SimpleFactory
|
||||||
from environments.helpers import IGNORED_DF_COLUMNS
|
from environments.helpers import IGNORED_DF_COLUMNS
|
||||||
from environments.logging.monitor import MonitorCallback
|
from environments.logging.monitor import MonitorCallback
|
||||||
from environments.logging.plotting import prepare_plot
|
from environments.logging.plotting import prepare_plot
|
||||||
|
from environments.logging.recorder import RecorderCallback
|
||||||
|
from environments.utility_classes import MovementProperties
|
||||||
|
|
||||||
warnings.filterwarnings('ignore', category=FutureWarning)
|
warnings.filterwarnings('ignore', category=FutureWarning)
|
||||||
warnings.filterwarnings('ignore', category=UserWarning)
|
warnings.filterwarnings('ignore', category=UserWarning)
|
||||||
@ -91,8 +92,8 @@ if __name__ == '__main__':
|
|||||||
from algorithms.reg_dqn import RegDQN
|
from algorithms.reg_dqn import RegDQN
|
||||||
# from sb3_contrib import QRDQN
|
# from sb3_contrib import QRDQN
|
||||||
|
|
||||||
dirt_props = DirtProperties(clean_amount=3, gain_amount=0.2, max_global_amount=30,
|
dirt_props = DirtProperties(clean_amount=3, gain_amount=1, max_global_amount=30,
|
||||||
max_local_amount=5, spawn_frequency=1, max_spawn_ratio=0.05)
|
max_local_amount=5, spawn_frequency=3, max_spawn_ratio=0.05)
|
||||||
move_props = MovementProperties(allow_diagonal_movement=True,
|
move_props = MovementProperties(allow_diagonal_movement=True,
|
||||||
allow_square_movement=True,
|
allow_square_movement=True,
|
||||||
allow_no_op=False)
|
allow_no_op=False)
|
||||||
@ -103,9 +104,10 @@ if __name__ == '__main__':
|
|||||||
for modeL_type in [A2C, PPO, RegDQN, DQN]: # , QRDQN]:
|
for modeL_type in [A2C, PPO, RegDQN, DQN]: # , QRDQN]:
|
||||||
for seed in range(3):
|
for seed in range(3):
|
||||||
|
|
||||||
with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400,
|
with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=False,
|
||||||
movement_properties=move_props, level_name='rooms', frames_to_stack=4,
|
movement_properties=move_props, level_name='rooms', frames_to_stack=4,
|
||||||
omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True) as env:
|
omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True, record_episodes=False
|
||||||
|
) as env:
|
||||||
|
|
||||||
if modeL_type.__name__ in ["PPO", "A2C"]:
|
if modeL_type.__name__ in ["PPO", "A2C"]:
|
||||||
kwargs = dict(ent_coef=0.01)
|
kwargs = dict(ent_coef=0.01)
|
||||||
@ -127,10 +129,13 @@ if __name__ == '__main__':
|
|||||||
out_path /= identifier
|
out_path /= identifier
|
||||||
|
|
||||||
callbacks = CallbackList(
|
callbacks = CallbackList(
|
||||||
[MonitorCallback(filepath=out_path / f'monitor_{identifier}.pick', plotting=False)]
|
[MonitorCallback(filepath=out_path / f'monitor_{identifier}.pick', plotting=False),
|
||||||
|
RecorderCallback(filepath=out_path / f'recorder_{identifier}.json', occupation_map=False,
|
||||||
|
trajectory_map=False
|
||||||
|
)]
|
||||||
)
|
)
|
||||||
|
|
||||||
model.learn(total_timesteps=int(1e5), callback=callbacks)
|
model.learn(total_timesteps=int(5e5), callback=callbacks)
|
||||||
|
|
||||||
save_path = out_path / f'model_{identifier}.zip'
|
save_path = out_path / f'model_{identifier}.zip'
|
||||||
save_path.parent.mkdir(parents=True, exist_ok=True)
|
save_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
@ -14,7 +14,7 @@ warnings.filterwarnings('ignore', category=UserWarning)
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
model_name = 'A2C_1623923982'
|
model_name = 'PPO_1626075586'
|
||||||
run_id = 0
|
run_id = 0
|
||||||
out_path = Path(__file__).parent / 'debug_out'
|
out_path = Path(__file__).parent / 'debug_out'
|
||||||
model_path = out_path / model_name
|
model_path = out_path / model_name
|
||||||
|
Reference in New Issue
Block a user