2021-07-13 11:12:03 +02:00

371 lines
14 KiB
Python

from pathlib import Path
from typing import List, Union, Iterable
import gym
import numpy as np
from gym import spaces
import yaml
from gym.wrappers import FrameStack
from environments.helpers import Constants as c, Constants
from environments import helpers as h
from environments.factory.base.objects import Slice, Agent, Tile, Action, MoveableEntity
from environments.factory.base.registers import StateSlices, Actions, Entities, Agents, Doors, FloorTiles
from environments.utility_classes import MovementProperties
REC_TAC = 'rec'
# noinspection PyAttributeOutsideInit
class BaseFactory(gym.Env):
@property
def action_space(self):
return spaces.Discrete(self._actions.n)
@property
def observation_space(self):
agent_slice = self.n_agents if self.omit_agent_slice_in_obs else 0
agent_slice = (self.n_agents - 1) if self.combin_agent_slices_in_obs else agent_slice
if self.pomdp_radius:
shape = (self._obs_cube.shape[0] - agent_slice, self.pomdp_radius * 2 + 1, self.pomdp_radius * 2 + 1)
space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
return space
else:
shape = [x-agent_slice if idx == 0 else x for idx, x in enumerate(self._obs_cube.shape)]
space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
return space
@property
def pomdp_diameter(self):
return self.pomdp_radius * 2 + 1
@property
def movement_actions(self):
return self._actions.movement_actions
@property
def additional_actions(self) -> Union[str, List[str]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A list of Actions-object holding all additional actions.
:rtype: List[Action]
"""
raise NotImplementedError('Please register additional actions ')
@property
def additional_entities(self) -> Union[Entities, List[Entities]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A single Entites collection or a list of such.
:rtype: Union[Entities, List[Entities]]
"""
raise NotImplementedError('Please register additional entities.')
@property
def additional_slices(self) -> Union[Slice, List[Slice]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A list of Slice-objects.
:rtype: List[Slice]
"""
raise NotImplementedError('Please register additional slices.')
def __enter__(self):
return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_radius: Union[None, int] = 0,
movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False,
omit_agent_slice_in_obs=False, done_at_collision=False, **kwargs):
assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
(not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
'Both options are exclusive'
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
# Attribute Assignment
self.movement_properties = movement_properties
self.level_name = level_name
self._level_shape = None
self.n_agents = n_agents
self.max_steps = max_steps
self.pomdp_radius = pomdp_radius
self.combin_agent_slices_in_obs = combin_agent_slices_in_obs
self.omit_agent_slice_in_obs = omit_agent_slice_in_obs
self.frames_to_stack = frames_to_stack
self.done_at_collision = done_at_collision
self.record_episodes = record_episodes
self.parse_doors = parse_doors
# Actions
self._actions = Actions(self.movement_properties, can_use_doors=self.parse_doors)
if additional_actions := self.additional_actions:
self._actions.register_additional_items(additional_actions)
self.reset()
def _init_state_slices(self) -> StateSlices:
state_slices = StateSlices()
# Objects
# Level
level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt'
parsed_level = h.parse_level(level_filepath)
level = [Slice(c.LEVEL.name, h.one_hot_level(parsed_level))]
self._level_shape = level[0].shape
# Doors
parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
doors = [Slice(c.DOORS.value, parsed_doors)] if parsed_doors.any() and self.parse_doors else []
# Agents
agents = []
for i in range(self.n_agents):
agents.append(Slice(f'{c.AGENT.name}#{i}', np.zeros_like(level[0].slice)))
state_slices.register_additional_items(level+doors+agents)
# Additional Slices from SubDomains
if additional_slices := self.additional_slices:
state_slices.register_additional_items(additional_slices)
return state_slices
def _init_obs_cube(self) -> np.ndarray:
x, y = self._slices.by_enum(c.LEVEL).shape
state = np.zeros((len(self._slices), x, y))
state[0] = self._slices.by_enum(c.LEVEL).slice
if r := self.pomdp_radius:
self._padded_obs_cube = np.full((len(self._slices), x + r*2, y + r*2), c.FREE_CELL.value)
self._padded_obs_cube[0] = c.OCCUPIED_CELL.value
self._padded_obs_cube[:, r:r+x, r:r+y] = state
return state
def _init_entities(self):
# Tile Init
self._tiles = FloorTiles.from_argwhere_coordinates(self._slices.by_enum(c.LEVEL).free_tiles)
# Door Init
if self.parse_doors:
tiles = [self._tiles.by_pos(x) for x in self._slices.by_enum(c.DOORS).occupied_tiles]
self._doors = Doors.from_tiles(tiles, context=self._tiles)
# Agent Init on random positions
self._agents = Agents.from_tiles(np.random.choice(self._tiles, self.n_agents))
entities = Entities()
entities.register_additional_items([self._agents])
if self.parse_doors:
entities.register_additional_items([self._doors])
if additional_entities := self.additional_entities:
entities.register_additional_items([additional_entities])
return entities
def reset(self) -> (np.ndarray, int, bool, dict):
self._slices = self._init_state_slices()
self._obs_cube = self._init_obs_cube()
self._entitites = self._init_entities()
self._flush_state()
self._steps = 0
info = self._summarize_state() if self.record_episodes else {}
return None, None, None, info
def pre_step(self) -> None:
pass
def post_step(self) -> dict:
pass
def step(self, actions):
actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
self._steps += 1
done = False
# Pre step Hook for later use
self.pre_step()
# Move this in a seperate function?
for action, agent in zip(actions, self._agents):
agent.clear_temp_sate()
action_name = self._actions[action]
if self._actions.is_moving_action(action):
valid = self._move_or_colide(agent, action_name)
elif self._actions.is_no_op(action):
valid = c.VALID.value
elif self._actions.is_door_usage(action):
# Check if agent raly stands on a door:
if door := self._doors.by_pos(agent.pos):
door.use()
valid = c.VALID.value
# When he doesn't...
else:
valid = c.NOT_VALID.value
else:
valid = self.do_additional_actions(agent, action)
agent.temp_action = action
agent.temp_valid = valid
self._flush_state()
tiles_with_collisions = self.get_all_tiles_with_collisions()
for tile in tiles_with_collisions:
guests = tile.guests_that_can_collide
for i, guest in enumerate(guests):
this_collisions = guests[:]
del this_collisions[i]
guest.temp_collisions = this_collisions
if self.done_at_collision and tiles_with_collisions:
done = True
# Step the door close intervall
if self.parse_doors:
self._doors.tick_doors()
# Finalize
reward, info = self.calculate_reward()
if self._steps >= self.max_steps:
done = True
info.update(step_reward=reward, step=self._steps)
if self.record_episodes:
info.update(self._summarize_state())
# Post step Hook for later use
info.update(self.post_step())
obs = self._get_observations()
return obs, reward, done, info
def _flush_state(self):
self._obs_cube[np.arange(len(self._slices)) != self._slices.get_idx(c.LEVEL)] = c.FREE_CELL.value
if self.parse_doors:
for door in self._doors:
if door.is_open:
self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.IS_OPEN_DOOR.value
else:
self._obs_cube[self._slices.get_idx(c.DOORS)][door.pos] = c.IS_CLOSED_DOOR.value
for agent in self._agents:
self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.pos] = c.OCCUPIED_CELL.value
if agent.last_pos != h.NO_POS:
self._obs_cube[self._slices.get_idx_by_name(agent.name)][agent.last_pos] = c.FREE_CELL.value
def _get_observations(self) -> np.ndarray:
if self.n_agents == 1:
obs = self._build_per_agent_obs(self._agents[0])
elif self.n_agents >= 2:
obs = np.stack([self._build_per_agent_obs(agent) for agent in self._agents])
else:
raise ValueError('n_agents cannot be smaller than 1!!')
return obs
def _build_per_agent_obs(self, agent: Agent) -> np.ndarray:
first_agent_slice = self._slices.AGENTSTARTIDX
if r := self.pomdp_radius:
x, y = self._level_shape
self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube
global_x, global_y = agent.pos
global_x += r
global_y += r
x0, x1 = max(0, global_x - self.pomdp_radius), global_x + self.pomdp_radius + 1
y0, y1 = max(0, global_y - self.pomdp_radius), global_y + self.pomdp_radius + 1
obs = self._padded_obs_cube[:, x0:x1, y0:y1]
else:
obs = self._obs_cube
if self.omit_agent_slice_in_obs:
obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val]]
return obs_new
else:
if self.combin_agent_slices_in_obs:
agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name]],
axis=0, keepdims=True)
obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
return obs
else:
return obs
def do_additional_actions(self, agent_i: int, action: int) -> bool:
raise NotImplementedError
def get_all_tiles_with_collisions(self) -> List[Tile]:
tiles_with_collisions = list()
for tile in self._tiles:
if tile.is_occupied():
guests = [guest for guest in tile.guests if guest.can_collide]
if len(guests) >= 2:
tiles_with_collisions.append(tile)
return tiles_with_collisions
def _move_or_colide(self, agent: Agent, action: Action) -> Constants:
new_tile, valid = self._check_agent_move(agent, action)
if valid:
# Does not collide width level boundaries
return agent.move(new_tile)
else:
# Agent seems to be trying to collide in this step
return c.NOT_VALID
def _check_agent_move(self, agent, action: Action) -> (Tile, bool):
# Actions
x_diff, y_diff = h.ACTIONMAP[action.name]
x_new = agent.x + x_diff
y_new = agent.y + y_diff
new_tile = self._tiles.by_pos((x_new, y_new))
if new_tile:
valid = c.VALID
else:
tile = agent.tile
valid = c.VALID
return tile, valid
if self.parse_doors and agent.last_pos != h.NO_POS:
if door := self._doors.by_pos(agent.pos):
if door.is_open:
pass
else: # door.is_closed:
if door.is_linked(agent.last_pos, new_tile.pos):
pass
else:
return agent.tile, c.NOT_VALID
else:
pass
else:
pass
return new_tile, valid
def calculate_reward(self) -> (int, dict):
# Returns: Reward, Info
raise NotImplementedError
def render(self, mode='human'):
raise NotImplementedError
def save_params(self, filepath: Path):
# noinspection PyProtectedMember
# d = {key: val._asdict() if hasattr(val, '_asdict') else val for key, val in self.__dict__.items()
d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')}
filepath.parent.mkdir(parents=True, exist_ok=True)
with filepath.open('w') as f:
yaml.dump(d, f)
# pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL)
def _summarize_state(self):
summary = {f'{REC_TAC}_step': self._steps}
for entity in self._entitites:
if hasattr(entity, 'summarize_state'):
summary.update({f'{REC_TAC}_{entity.name}': entity.summarize_state()})
return summary