major redesign ob observations and entittes

This commit is contained in:
Steffen Illium 2023-06-09 14:04:17 +02:00
parent 901fbcbc32
commit c552c35f66
161 changed files with 4458 additions and 4163 deletions

View File

@ -1,100 +0,0 @@
import numpy as np
from networkx.algorithms.approximation import traveling_salesman as tsp
from environments.factory.base.objects import Agent
from environments.helpers import points_to_graph
from environments import helpers as h
from environments.helpers import Constants as BaseConstants
from environments.helpers import EnvActions as BaseActions
class Constants(BaseConstants):
DIRT = 'DirtPile'
class Actions(BaseActions):
CLEAN_UP = 'do_cleanup_action'
a = Actions
c = Constants
future_planning = 7
class TSPDirtAgent(Agent):
def __init__(self, env, *args,
static_problem: bool = True, **kwargs):
super().__init__(*args, **kwargs)
self.static_problem = static_problem
self.local_optimization = True
self._env = env
self._floortile_graph = points_to_graph(self._env[c.FLOOR].positions,
allow_euclidean_connections=self._env._actions.allow_diagonal_movement,
allow_manhattan_connections=self._env._actions.allow_square_movement)
self._static_route = None
def predict(self, *_, **__):
if self._env[c.DIRT].by_pos(self.pos) is not None:
# Translate the action_object to an integer to have the same output as any other model
action = a.CLEAN_UP
elif any('door' in x.name.lower() for x in self.tile.guests):
door = next(x for x in self.tile.guests if 'door' in x.name.lower())
if door.is_closed:
# Translate the action_object to an integer to have the same output as any other model
action = h.EnvActions.USE_DOOR
else:
action = self._predict_move()
else:
action = self._predict_move()
# Translate the action_object to an integer to have the same output as any other model
action_obj = next(action_i for action_name, action_i in self._env.named_action_space.items() if action_name == action)
return action_obj
def _predict_move(self):
if len(self._env[c.DIRT]) >= 1:
if self.static_problem:
if not self._static_route:
self._static_route = self.calculate_tsp_route()
else:
pass
next_pos = self._static_route.pop(0)
while next_pos == self.pos:
next_pos = self._static_route.pop(0)
else:
if not self._static_route:
self._static_route = self.calculate_tsp_route()[:7]
next_pos = self._static_route.pop(0)
while next_pos == self.pos:
next_pos = self._static_route.pop(0)
diff = np.subtract(next_pos, self.pos)
# Retrieve action based on the pos dif (like in: What do i have to do to get there?)
try:
action = next(action for action, pos_diff in h.ACTIONMAP.items()
if (diff == pos_diff).all())
except StopIteration:
print('This Should not happen!')
else:
action = int(np.random.randint(self._env.action_space.n))
return action
def calculate_tsp_route(self):
if self.local_optimization:
nodes = \
[self.pos] + \
[x for x in self._env[c.DIRT].positions if max(abs(np.subtract(x, self.pos))) < 3]
try:
while len(nodes) < 7:
nodes += [next(x for x in self._env[c.DIRT].positions if x not in nodes)]
except StopIteration:
nodes = [self.pos] + self._env[c.DIRT].positions
else:
nodes = [self.pos] + self._env[c.DIRT].positions
route = tsp.traveling_salesman_problem(self._floortile_graph,
nodes=nodes, cycle=True, method=tsp.greedy_tsp)
return route

View File

@ -1,5 +1,5 @@
import torch
from typing import Union, List
from typing import Union, List, Dict
import numpy as np
from torch.distributions import Categorical
from algorithms.marl.memory import MARLActorCriticMemory
@ -74,7 +74,7 @@ class BaseActorCritic:
actions = [Categorical(logits=logits).sample().item() for logits in out[nms.LOGITS]]
return actions
def init_hidden(self) -> dict[ListOrTensor]:
def init_hidden(self) -> Dict[str, ListOrTensor]:
pass
def forward(self,
@ -82,7 +82,7 @@ class BaseActorCritic:
actions: ListOrTensor,
hidden_actor: ListOrTensor,
hidden_critic: ListOrTensor
) -> dict[ListOrTensor]:
) -> Dict[str, ListOrTensor]:
pass
@torch.no_grad()

View File

@ -39,7 +39,7 @@ class LoopIAC(BaseActorCritic):
def forward(self, observations, actions, hidden_actor, hidden_critic):
outputs = [
net(
self._as_torch(observations[ag_i]).unsqueeze(0).unsqueeze(0), # agents x time
self._as_torch(observations[ag_i]).unsqueeze(0).unsqueeze(0), # agent x time
self._as_torch(actions[ag_i]).unsqueeze(0),
hidden_actor[ag_i],
hidden_critic[ag_i]

View File

@ -46,7 +46,7 @@ class LoopMAPPO(LoopSNAC):
# monte carlo returns
mc_returns = self.monte_carlo_returns(batch[nms.REWARD], batch[nms.DONE], gamma)
mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) #todo: norm across agents ok?
mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) #todo: norm across agent ok?
advantages = mc_returns - out[nms.CRITIC][:, :-1]
# policy loss

View File

@ -120,7 +120,7 @@ class MARLActorCriticMemory(object):
def __getattr__(self, attr):
all_attrs = [getattr(mem, attr) for mem in self.memories]
return torch.cat(all_attrs, 0) # agents x time ...
return torch.cat(all_attrs, 0) # agent x time ...
def chunk_dataloader(self, chunk_len, k):
datasets = [ExperienceChunks(mem, chunk_len, k) for mem in self.memories]

View File

@ -0,0 +1,95 @@
from random import choice
import numpy as np
from networkx.algorithms.approximation import traveling_salesman as tsp
from environment.utils.helpers import points_to_graph
from modules.doors import constants as do
from environment import constants as c
from environment.utils.helpers import MOVEMAP
from abc import abstractmethod, ABC
future_planning = 7
class TSPBaseAgent(ABC):
def __init__(self, state, agent_i, static_problem: bool = True):
self.static_problem = static_problem
self.local_optimization = True
self._env = state
self.state = self._env.state[c.AGENT][agent_i]
self._floortile_graph = points_to_graph(self._env[c.FLOOR].positions)
self._static_route = None
@abstractmethod
def predict(self, *_, **__) -> int:
return 0
def _use_door_or_move(self, door, target):
if door.is_closed:
# Translate the action_object to an integer to have the same output as any other model
action = do.ACTION_DOOR_USE
else:
action = self._predict_move(target)
return action
def calculate_tsp_route(self, target_identifier):
positions = [x for x in self._env.state[target_identifier].positions if x != c.VALUE_NO_POS]
if self.local_optimization:
nodes = \
[self.state.pos] + \
[x for x in positions if max(abs(np.subtract(x, self.state.pos))) < 3]
try:
while len(nodes) < 7:
nodes += [next(x for x in positions if x not in nodes)]
except StopIteration:
nodes = [self.state.pos] + positions
else:
nodes = [self.state.pos] + positions
route = tsp.traveling_salesman_problem(self._floortile_graph,
nodes=nodes, cycle=True, method=tsp.greedy_tsp)
return route
def _door_is_close(self):
try:
return next(y for x in self.state.tile.neighboring_floor for y in x.guests if do.DOOR in y.name)
except StopIteration:
return None
def _has_targets(self, target_identifier):
return bool(len([x for x in self._env.state[target_identifier] if x.pos != c.VALUE_NO_POS]) >= 1)
def _predict_move(self, target_identifier):
if self._has_targets(target_identifier):
if self.static_problem:
if not self._static_route:
self._static_route = self.calculate_tsp_route(target_identifier)
else:
pass
next_pos = self._static_route.pop(0)
while next_pos == self.state.pos:
next_pos = self._static_route.pop(0)
else:
if not self._static_route:
self._static_route = self.calculate_tsp_route(target_identifier)[:7]
next_pos = self._static_route.pop(0)
while next_pos == self.state.pos:
next_pos = self._static_route.pop(0)
diff = np.subtract(next_pos, self.state.pos)
# Retrieve action based on the pos dif (like in: What do I have to do to get there?)
try:
action = next(action for action, pos_diff in MOVEMAP.items() if np.all(diff == pos_diff))
except StopIteration:
print(f'diff: {diff}')
print('This Should not happen!')
action = choice(self.state.actions).name
else:
action = choice(self.state.actions).name
# noinspection PyUnboundLocalVariable
return action

View File

@ -0,0 +1,27 @@
from algorithms.static.TSP_base_agent import TSPBaseAgent
from modules.clean_up import constants as di
future_planning = 7
class TSPDirtAgent(TSPBaseAgent):
def __init__(self, *args, **kwargs):
super(TSPDirtAgent, self).__init__(*args, **kwargs)
def predict(self, *_, **__):
if self._env.state[di.DIRT].by_pos(self.state.pos) is not None:
# Translate the action_object to an integer to have the same output as any other model
action = di.CLEAN_UP
elif door := self._door_is_close():
action = self._use_door_or_move(door, di.DIRT)
else:
action = self._predict_move(di.DIRT)
# Translate the action_object to an integer to have the same output as any other model
try:
action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
except (StopIteration, UnboundLocalError):
print('Will not happen')
raise EnvironmentError
return action_obj

View File

@ -0,0 +1,59 @@
import numpy as np
from algorithms.static.TSP_base_agent import TSPBaseAgent
from modules.items import constants as i
future_planning = 7
inventory_size = 3
MODE_GET = 'Mode_Get'
MODE_BRING = 'Mode_Bring'
class TSPItemAgent(TSPBaseAgent):
def __init__(self, *args, mode=MODE_GET, **kwargs):
super(TSPItemAgent, self).__init__(*args, **kwargs)
self.mode = mode
def predict(self, *_, **__):
if self._env.state[i.ITEM].by_pos(self.state.pos) is not None:
# Translate the action_object to an integer to have the same output as any other model
action = i.ITEM_ACTION
elif self._env.state[i.DROP_OFF].by_pos(self.state.pos) is not None:
# Translate the action_object to an integer to have the same output as any other model
action = i.ITEM_ACTION
elif door := self._door_is_close():
action = self._use_door_or_move(door, i.DROP_OFF if self.mode == MODE_BRING else i.ITEM)
else:
action = self._choose()
# Translate the action_object to an integer to have the same output as any other model
try:
action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
except (StopIteration, UnboundLocalError):
print('Will not happen')
raise EnvironmentError
# noinspection PyUnboundLocalVariable
if self.mode == MODE_BRING and len(self._env[i.INVENTORY].by_entity(self.state)):
pass
elif self.mode == MODE_BRING and not len(self._env[i.INVENTORY].by_entity(self.state)):
self.mode = MODE_GET
elif self.mode == MODE_GET and len(self._env[i.INVENTORY].by_entity(self.state)) > inventory_size:
self.mode = MODE_BRING
else:
pass
return action_obj
def _choose(self):
target = i.DROP_OFF if self.mode == MODE_BRING else i.ITEM
if len(self._env.state[i.ITEM]) >= 1:
action = self._predict_move(target)
elif len(self._env[i.INVENTORY].by_entity(self.state)):
self.mode = MODE_BRING
action = self._predict_move(target)
else:
action = int(np.random.randint(self._env.action_space.n))
# noinspection PyUnboundLocalVariable
return action

View File

@ -0,0 +1,32 @@
from algorithms.static.TSP_base_agent import TSPBaseAgent
from modules.destinations import constants as d
from modules.doors import constants as do
future_planning = 7
class TSPTargetAgent(TSPBaseAgent):
def __init__(self, *args, **kwargs):
super(TSPTargetAgent, self).__init__(*args, **kwargs)
def _handle_doors(self):
try:
return next(y for x in self.state.tile.neighboring_floor for y in x.guests if do.DOOR in y.name)
except StopIteration:
return None
def predict(self, *_, **__):
if door := self._door_is_close():
action = self._use_door_or_move(door, d.DESTINATION)
else:
action = self._predict_move(d.DESTINATION)
# Translate the action_object to an integer to have the same output as any other model
try:
action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
except (StopIteration, UnboundLocalError):
print('Will not happen')
return action_obj

View File

@ -0,0 +1,15 @@
from random import randint
from algorithms.static.TSP_base_agent import TSPBaseAgent
future_planning = 7
class TSPRandomAgent(TSPBaseAgent):
def __init__(self, n_actions, *args, **kwargs):
super(TSPRandomAgent, self).__init__(*args, **kwargs)
self.n_action = n_actions
def predict(self, *_, **__):
return randint(0, self.n_action - 1)

View File

@ -1,4 +1,3 @@
import re
import torch
import numpy as np
import yaml

101
environment/actions.py Normal file
View File

@ -0,0 +1,101 @@
import abc
from typing import Union
from environment import rewards as r
from environment import constants as c
from environment.utils.helpers import MOVEMAP
from environment.utils.results import ActionResult
class Action(abc.ABC):
@property
def name(self):
return self._identifier
@abc.abstractmethod
def __init__(self, identifier: str):
self._identifier = identifier
@abc.abstractmethod
def do(self, entity, state) -> Union[None, ActionResult]:
return
def __repr__(self):
return f'Action[{self._identifier}]'
class Noop(Action):
def __init__(self):
super().__init__(c.NOOP)
def do(self, entity, *_) -> Union[None, ActionResult]:
return ActionResult(identifier=self._identifier, validity=c.VALID,
reward=r.NOOP, entity=entity)
class Move(Action, abc.ABC):
@abc.abstractmethod
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def do(self, entity, env):
new_pos = self._calc_new_pos(entity.pos)
if next_tile := env[c.FLOOR].by_pos(new_pos):
# noinspection PyUnresolvedReferences
valid = entity.move(next_tile)
else:
valid = c.NOT_VALID
reward = r.MOVEMENTS_VALID if valid else r.MOVEMENTS_FAIL
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, reward=reward)
def _calc_new_pos(self, pos):
x_diff, y_diff = MOVEMAP[self._identifier]
return pos[0] + x_diff, pos[1] + y_diff
class North(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.NORTH, *args, **kwargs)
class NorthEast(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.NORTHEAST, *args, **kwargs)
class East(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.EAST, *args, **kwargs)
class SouthEast(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.SOUTHEAST, *args, **kwargs)
class South(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.SOUTH, *args, **kwargs)
class SouthWest(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.SOUTHWEST, *args, **kwargs)
class West(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.WEST, *args, **kwargs)
class NorthWest(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.NORTHWEST, *args, **kwargs)
Move4 = [North, East, South, West]
# noinspection PyTypeChecker
Move8 = Move4 + [NorthEast, SouthEast, SouthWest, NorthWest]

View File

Before

Width:  |  Height:  |  Size: 8.3 KiB

After

Width:  |  Height:  |  Size: 8.3 KiB

View File

Before

Width:  |  Height:  |  Size: 3.3 KiB

After

Width:  |  Height:  |  Size: 3.3 KiB

View File

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 18 KiB

View File

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 1.6 KiB

View File

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 1.6 KiB

View File

Before

Width:  |  Height:  |  Size: 5.8 KiB

After

Width:  |  Height:  |  Size: 5.8 KiB

View File

Before

Width:  |  Height:  |  Size: 5.6 KiB

After

Width:  |  Height:  |  Size: 5.6 KiB

View File

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

60
environment/constants.py Normal file
View File

@ -0,0 +1,60 @@
# Names
DANGER_ZONE = 'x' # Dange Zone tile _identifier for resolving the string based map files.
DEFAULTS = 'Defaults'
SELF = 'Self'
PLACEHOLDER = 'Placeholder'
FLOOR = 'Floor' # Identifier of Floor-objects and groups (groups).
FLOORS = 'Floors' # Identifier of Floor-objects and groups (groups).
WALL = 'Wall' # Identifier of Wall-objects and groups (groups).
WALLS = 'Walls' # Identifier of Wall-objects and groups (groups).
LEVEL = 'Level' # Identifier of Level-objects and groups (groups).
AGENT = 'Agent' # Identifier of Agent-objects and groups (groups).
AGENTS = 'Agents' # Identifier of Agent-objects and groups (groups).
OTHERS = 'Other'
COMBINED = 'Combined'
GLOBAL_POSITION = 'GLOBAL_POSITION' # Identifier of the global position slice
# Attributes
IS_BLOCKING_LIGHT = 'is_blocking_light'
HAS_POSITION = 'has_position'
HAS_NO_POSITION = 'has_no_position'
ALL = 'All'
# Symbols (Read from map-files)
SYMBOL_WALL = '#'
SYMBOL_FLOOR = '-'
VALID = True # Identifier to rename boolean values in the context of actions.
NOT_VALID = False # Identifier to rename boolean values in the context of actions.
VALUE_FREE_CELL = 0 # Free-Cell value used in observation
VALUE_OCCUPIED_CELL = 1 # Occupied-Cell value used in observation
VALUE_NO_POS = (-9999, -9999) # Invalid Position value used in the environment (smth. is off-grid)
ACTION = 'action' # Identifier of Action-objects and groups (groups).
COLLISION = 'Collision' # Identifier to use in the context of collitions.
LAST_POS = 'LAST_POS' # Identifiert for retrieving an enitites last pos.
VALIDITY = 'VALIDITY' # Identifiert for retrieving the Validity of Action, Tick, etc. ...
# Actions
# Movements
NORTH = 'north'
EAST = 'east'
SOUTH = 'south'
WEST = 'west'
NORTHEAST = 'north_east'
SOUTHEAST = 'south_east'
SOUTHWEST = 'south_west'
NORTHWEST = 'north_west'
# Move Groups
MOVE8 = 'Move8'
MOVE4 = 'Move4'
# No-Action / Wait
NOOP = 'Noop'
# Result Identifier
MOVEMENTS_VALID = 'motion_valid'
MOVEMENTS_FAIL = 'motion_not_valid'

View File

@ -0,0 +1,76 @@
from typing import List, Union
from environment import constants as c
from environment.actions import Action
from environment.entity.entity import Entity
from environment.utils.render import RenderEntity
from environment.utils import renderer
from environment.utils.helpers import is_move
from environment.utils.results import ActionResult, Result
class Agent(Entity):
@property
def obs_tag(self):
return self.name
@property
def actions(self):
return self._actions
@property
def observations(self):
return self._observations
@property
def can_collide(self):
return True
def step_result(self):
pass
@property
def collection(self):
return self._collection
@property
def state(self):
return self._state or ActionResult(entity=self, identifier=c.NOOP, validity=c.VALID, reward=0)
def __init__(self, actions: List[Action], observations: List[str], *args, **kwargs):
super(Agent, self).__init__(*args, **kwargs)
self.step_result = dict()
self._actions = actions
self._observations = observations
self._state: Union[Result, None] = None
# noinspection PyAttributeOutsideInit
def clear_temp_state(self):
self._state = None
return self
def summarize_state(self):
state_dict = super().summarize_state()
state_dict.update(valid=bool(self.state.validity), action=str(self.state.identifier))
return state_dict
def set_state(self, action_result):
self._state = action_result
def render(self):
i = next(idx for idx, x in enumerate(self._collection) if x.name == self.name)
curr_state = self.state
if curr_state.identifier == c.COLLISION:
render_state = renderer.STATE_COLLISION
elif curr_state.validity:
if curr_state.identifier == c.NOOP:
render_state = renderer.STATE_IDLE
elif is_move(curr_state.identifier):
render_state = renderer.STATE_MOVE
else:
render_state = renderer.STATE_VALID
else:
render_state = renderer.STATE_INVALID
return RenderEntity(c.AGENT, self.pos, 1, 'none', render_state, i + 1, real_name=self.name)

View File

@ -0,0 +1,79 @@
import abc
from environment import constants as c
from environment.entity.object import EnvObject
from environment.utils.render import RenderEntity
class Entity(EnvObject, abc.ABC):
"""Full Env Entity that lives on the env Grid. Doors, Items, DirtPile etc..."""
@property
def has_position(self):
return self.pos != c.VALUE_NO_POS
@property
def x(self):
return self.pos[0]
@property
def y(self):
return self.pos[1]
@property
def pos(self):
return self._tile.pos
@property
def tile(self):
return self._tile
@property
def last_tile(self):
try:
return self._last_tile
except AttributeError:
# noinspection PyAttributeOutsideInit
self._last_tile = None
return self._last_tile
@property
def last_pos(self):
try:
return self.last_tile.pos
except AttributeError:
return c.VALUE_NO_POS
@property
def direction_of_view(self):
last_x, last_y = self.last_pos
curr_x, curr_y = self.pos
return last_x - curr_x, last_y - curr_y
def move(self, next_tile):
curr_tile = self.tile
if not_same_tile := curr_tile != next_tile:
if valid := next_tile.enter(self):
curr_tile.leave(self)
self._tile = next_tile
self._last_tile = curr_tile
for observer in self.observers:
observer.notify_change_pos(self)
return valid
return not_same_tile
def __init__(self, tile, **kwargs):
super().__init__(**kwargs)
self._tile = tile
tile.enter(self)
def summarize_state(self) -> dict:
return dict(name=str(self.name), x=int(self.x), y=int(self.y),
tile=str(self.tile.name), can_collide=bool(self.can_collide))
@abc.abstractmethod
def render(self):
return RenderEntity(self.__class__.__name__.lower(), self.pos)
def __repr__(self):
return super(Entity, self).__repr__() + f'(@{self.pos})'

View File

@ -0,0 +1,18 @@
# noinspection PyAttributeOutsideInit
class BoundEntityMixin:
@property
def bound_entity(self):
return self._bound_entity
@property
def name(self):
return f'{self.__class__.__name__}({self._bound_entity.name})'
def belongs_to_entity(self, entity):
return entity == self.bound_entity
def bind_to(self, entity):
self._bound_entity = entity

View File

@ -0,0 +1,127 @@
from abc import ABC, abstractmethod
from collections import defaultdict
from typing import Union
from environment import constants as c
class Object:
"""Generell Objects for Organisation and Maintanance such as Actions etc..."""
_u_idx = defaultdict(lambda: 0)
def __bool__(self):
return True
@property
def observers(self):
return self._observers
@property
def name(self):
if self._str_ident is not None:
return f'{self.__class__.__name__}[{self._str_ident}]'
return f'{self.__class__.__name__}#{self.identifier_int}'
@property
def identifier(self):
if self._str_ident is not None:
return self._str_ident
else:
return self.name
def __init__(self, str_ident: Union[str, None] = None, **kwargs):
self._observers = []
self._str_ident = str_ident
self.identifier_int = self._identify_and_count_up()
self._collection = None
if kwargs:
print(f'Following kwargs were passed, but ignored: {kwargs}')
def __repr__(self):
return f'{self.name}'
def __eq__(self, other) -> bool:
return other == self.identifier
def __hash__(self):
return hash(self.identifier)
def _identify_and_count_up(self):
idx = Object._u_idx[self.__class__.__name__]
Object._u_idx[self.__class__.__name__] += 1
return idx
def set_collection(self, collection):
self._collection = collection
def add_observer(self, observer):
self.observers.append(observer)
observer.notify_change_pos(self)
def del_observer(self, observer):
self.observers.remove(observer)
class EnvObject(Object):
"""Objects that hold Information that are observable, but have no position on the env grid. Inventories etc..."""
_u_idx = defaultdict(lambda: 0)
@property
def obs_tag(self):
try:
return self._collection.name or self.name
except AttributeError:
return self.name
@property
def is_blocking_light(self):
try:
return self._collection.is_blocking_light or False
except AttributeError:
return False
@property
def can_move(self):
try:
return self._collection.can_move or False
except AttributeError:
return False
@property
def is_blocking_pos(self):
try:
return self._collection.is_blocking_pos or False
except AttributeError:
return False
@property
def has_position(self):
try:
return self._collection.has_position or False
except AttributeError:
return False
@property
def can_collide(self):
try:
return self._collection.can_collide or False
except AttributeError:
return False
@property
def encoding(self):
return c.VALUE_OCCUPIED_CELL
def __init__(self, **kwargs):
super(EnvObject, self).__init__(**kwargs)
def change_parent_collection(self, other_collection):
other_collection.add_item(self)
self._collection.delete_env_object(self)
self._collection = other_collection
return self._collection == other_collection

View File

@ -0,0 +1,45 @@
import math
import numpy as np
from environment.entity.mixin import BoundEntityMixin
from environment.entity.object import Object, EnvObject
##########################################################################
# ####################### Objects and Entitys ########################## #
##########################################################################
class PlaceHolder(Object):
def __init__(self, *args, fill_value=0, **kwargs):
super().__init__(*args, **kwargs)
self._fill_value = fill_value
@property
def can_collide(self):
return False
@property
def encoding(self):
return self._fill_value
@property
def name(self):
return "PlaceHolder"
class GlobalPosition(BoundEntityMixin, EnvObject):
@property
def encoding(self):
if self._normalized:
return tuple(np.divide(self._bound_entity.pos, self._level_shape))
else:
return self.bound_entity.pos
def __init__(self, *args, normalized: bool = True, **kwargs):
super(GlobalPosition, self).__init__(*args, **kwargs)
self._level_shape = math.sqrt(self.size)
self._normalized = normalized

View File

@ -0,0 +1,131 @@
from typing import List
import numpy as np
from environment import constants as c
from environment.entity.object import EnvObject
from environment.utils.render import RenderEntity
from environment.utils import helpers as h
class Floor(EnvObject):
@property
def has_position(self):
return True
@property
def can_collide(self):
return False
@property
def can_move(self):
return False
@property
def is_blocking_pos(self):
return False
@property
def is_blocking_light(self):
return False
@property
def neighboring_floor_pos(self):
return [x.pos for x in self.neighboring_floor]
@property
def neighboring_floor(self):
if self._neighboring_floor:
pass
else:
self._neighboring_floor = [x for x in [self._collection.by_pos(np.add(self.pos, pos))
for pos in h.POS_MASK.reshape(-1, 2)
if not np.all(pos == [0, 0])]
if x]
return self._neighboring_floor
@property
def encoding(self):
return c.VALUE_OCCUPIED_CELL
@property
def guests_that_can_collide(self):
return [x for x in self.guests if x.can_collide]
@property
def guests(self):
return self._guests.values()
@property
def x(self):
return self.pos[0]
@property
def y(self):
return self.pos[1]
@property
def is_blocked(self):
return any([x.is_blocking_pos for x in self.guests])
def __init__(self, pos, **kwargs):
super(Floor, self).__init__(**kwargs)
self._guests = dict()
self.pos = tuple(pos)
self._neighboring_floor: List[Floor] = list()
self._blocked_by = None
def __len__(self):
return len(self._guests)
def is_empty(self):
return not len(self._guests)
def is_occupied(self):
return bool(len(self._guests))
def enter(self, guest):
if (guest.name not in self._guests and not self.is_blocked) and not (guest.is_blocking_pos and self.is_occupied()):
self._guests.update({guest.name: guest})
return c.VALID
else:
return c.NOT_VALID
def leave(self, guest):
try:
del self._guests[guest.name]
except (ValueError, KeyError):
return c.NOT_VALID
return c.VALID
def __repr__(self):
return f'{self.name}(@{self.pos})'
def summarize_state(self, **_):
return dict(name=self.name, x=int(self.x), y=int(self.y))
def render(self):
return None
class Wall(Floor):
@property
def can_collide(self):
return True
@property
def encoding(self):
return c.VALUE_OCCUPIED_CELL
def render(self):
return RenderEntity(c.WALL, self.pos)
@property
def is_blocking_pos(self):
return True
@property
def is_blocking_light(self):
return True

201
environment/factory.py Normal file
View File

@ -0,0 +1,201 @@
import shutil
from collections import defaultdict
from itertools import chain
from os import PathLike
from pathlib import Path
from typing import Union
import gymnasium as gym
from environment.utils.level_parser import LevelParser
from environment.utils.observation_builder import OBSBuilder
from environment.utils.config_parser import FactoryConfigParser
from environment.utils import helpers as h
import environment.constants as c
from environment.utils.states import Gamestate
REC_TAC = 'rec_'
class BaseFactory(gym.Env):
@property
def action_space(self):
return self.state[c.AGENT].action_space
@property
def named_action_space(self):
return self.state[c.AGENT].named_action_space
@property
def observation_space(self):
return self.obs_builder.observation_space(self.state)
@property
def named_observation_space(self):
return self.obs_builder.named_observation_space(self.state)
@property
def params(self) -> dict:
import yaml
config_path = Path(self._config_file)
config_dict = yaml.safe_load(config_path.open())
return config_dict
@property
def summarize_header(self):
summary_dict = self._summarize_state(stateless_entities=True)
return summary_dict
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def __init__(self, config_file: Union[str, PathLike]):
self._config_file = config_file
self.conf = FactoryConfigParser(self._config_file)
# Attribute Assignment
self.level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.conf.level_name}.txt'
self._renderer = None # expensive - don't use it when not required !
parsed_entities = self.conf.load_entities()
self.map = LevelParser(self.level_filepath, parsed_entities, self.conf.pomdp_r)
# Init for later usage:
self.state: Gamestate
self.map: LevelParser
self.obs_builder: OBSBuilder
# TODO: Reset ---> document this
self.reset()
def __getitem__(self, item):
return self.state.entities[item]
def reset(self) -> (dict, dict):
self.state = None
# Init entity:
entities = self.map.do_init()
# Grab all rules:
rules = self.conf.load_rules()
# Agents
# noinspection PyAttributeOutsideInit
self.state = Gamestate(entities, rules, self.conf.env_seed)
agents = self.conf.load_agents(self.map.size, self[c.FLOOR].empty_tiles)
self.state.entities.add_item({c.AGENT: agents})
# All is set up, trigger additional init (after agent entity spawn etc)
self.state.rules.do_all_init(self.state)
# Observations
# noinspection PyAttributeOutsideInit
self.obs_builder = OBSBuilder(self.map.level_shape, self.state, self.map.pomdp_r)
return self.obs_builder.refresh_and_build_for_all(self.state)
def step(self, actions):
if not isinstance(actions, list):
actions = [int(actions)]
# Apply rules, do actions, tick the state, etc...
tick_result = self.state.tick(actions)
# Check Done Conditions
done_results = self.state.check_done()
# Finalize
reward, reward_info, done = self.summarize_step_results(tick_result, done_results)
info = reward_info
info.update(step_reward=sum(reward), step=self.state.curr_step)
# TODO:
# if self._record_episodes:
# info.update(self._summarize_state())
obs, reset_info = self.obs_builder.refresh_and_build_for_all(self.state)
info.update(reset_info)
return None, [x for x in obs.values()], reward, done, info
def summarize_step_results(self, tick_results: list, done_check_results: list) -> (int, dict, bool):
# Returns: Reward, Info
rewards = defaultdict(lambda: 0.0)
# Gather per agent env rewards and
# Combine Info dicts into a global one
combined_info_dict = defaultdict(lambda: 0.0)
for result in chain(tick_results, done_check_results):
if result.reward is not None:
try:
rewards[result.entity.name] += result.reward
except AttributeError:
rewards['global'] += result.reward
infos = result.get_infos()
for info in infos:
assert isinstance(info.value, (float, int))
combined_info_dict[info.identifier] += info.value
# Check Done Rule Results
try:
done_reason = next(x for x in done_check_results if x.validity)
done = True
self.state.print(f'Env done, Reason: {done_reason.name}.')
except StopIteration:
done = False
if self.conf.individual_rewards:
global_rewards = rewards['global']
del rewards['global']
reward = [rewards[agent.name] for agent in self.state[c.AGENT]]
reward = [x + global_rewards for x in reward]
self.state.print(f"rewards are {rewards}")
return reward, combined_info_dict, done
else:
reward = sum(rewards.values())
self.state.print(f"reward is {reward}")
return reward, combined_info_dict, done
def start_recording(self):
self.conf.do_record = True
return self.conf.do_record
def stop_recording(self):
self.conf.do_record = False
return not self.conf.do_record
# noinspection PyGlobalUndefined
def render(self, mode='human'):
if not self._renderer: # lazy init
from environment.utils.renderer import Renderer
global Renderer
self._renderer = Renderer(self.map.level_shape, view_radius=self.conf.pomdp_r, fps=20)
render_entities = self.state.entities.render()
if self.conf.pomdp_r:
for render_entity in render_entities:
if render_entity.name == c.AGENT:
render_entity.aux = self.obs_builder.curr_lightmaps[render_entity.real_name]
return self._renderer.render(render_entities)
def _summarize_state(self, stateless_entities=False):
summary = {f'{REC_TAC}step': self.state.curr_step}
for entity_group in self.state:
if entity_group.is_stateless == stateless_entities:
summary.update({f'{REC_TAC}{entity_group.name}': entity_group.summarize_states()})
return summary
def print(self, string):
if self.conf.verbose:
print(string)
def save_params(self, filepath: Path):
# noinspection PyProtectedMember
filepath = Path(filepath)
filepath.parent.mkdir(parents=True, exist_ok=True)
shutil.copyfile(self._config_file, filepath)

View File

@ -0,0 +1,30 @@
from environment.groups.env_objects import EnvObjects
from environment.groups.mixins import PositionMixin
from environment.entity.agent import Agent
import environment.constants as c
class Agents(PositionMixin, EnvObjects):
_entity = Agent
is_blocking_light = False
can_move = True
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@property
def obs_pairs(self):
return [(a.name, a) for a in self]
@property
def action_space(self):
from gymnasium import spaces
space = spaces.Tuple([spaces.Discrete(len(x.actions)) for x in self])
return space
@property
def named_action_space(self):
named_space = dict()
for agent in self:
named_space[agent.name] = {action.name: idx for idx, action in enumerate(agent.actions)}
return named_space

View File

@ -0,0 +1,33 @@
from environment.groups.objects import Objects
from environment.entity.object import EnvObject
class EnvObjects(Objects):
_entity = EnvObject
is_blocking_light: bool = False
can_collide: bool = False
has_position: bool = False
can_move: bool = False
@property
def encodings(self):
return [x.encoding for x in self]
def __init__(self, size, *args, **kwargs):
super(EnvObjects, self).__init__(*args, **kwargs)
self.size = size
def add_item(self, item: EnvObject):
assert self.has_position or (len(self) <= self.size)
super(EnvObjects, self).add_item(item)
return self
def summarize_states(self):
return [entity.summarize_state() for entity in self.values()]
def delete_env_object(self, env_object: EnvObject):
del self[env_object.name]
def delete_env_object_by_name(self, name):
del self[name]

View File

@ -0,0 +1,64 @@
from collections import defaultdict
from operator import itemgetter
from typing import Dict
from environment.groups.objects import Objects
from environment.entity.entity import Entity
from environment.utils.helpers import POS_MASK
class Entities(Objects):
_entity = Objects
@staticmethod
def neighboring_positions(pos):
return (POS_MASK + pos).reshape(-1, 2)
def get_near_pos(self, pos):
return [y for x in itemgetter(*(tuple(x) for x in self.neighboring_positions(pos)))(self.pos_dict) for y in x]
def render(self):
return [y for x in self for y in x.render() if x is not None]
@property
def names(self):
return list(self._data.keys())
def __init__(self):
self.pos_dict = defaultdict(list)
super().__init__()
def iter_entities(self):
return iter((x for sublist in self.values() for x in sublist))
def add_items(self, items: Dict):
return self.add_item(items)
def add_item(self, item: dict):
assert_str = 'This group of entity has already been added!'
assert not any([key for key in item.keys() if key in self.keys()]), assert_str
self._data.update(item)
for val in item.values():
val.add_observer(self)
return self
def __delitem__(self, name):
assert_str = 'This group of entity does not exist in this collection!'
assert any([key for key in name.keys() if key in self.keys()]), assert_str
self[name]._observers.delete(self)
for entity in self[name]:
entity.del_observer(self)
return super(Entities, self).__delitem__(name)
@property
def obs_pairs(self):
return [y for x in self for y in x.obs_pairs]
def by_pos(self, pos: (int, int)):
return self.pos_dict[pos]
# found_entities = [y for y in (x.by_pos(pos) for x in self.values() if hasattr(x, 'by_pos')) if y is not None]
# return found_entities
@property
def positions(self):
return [k for k, v in self.pos_dict.items() for _ in v]

View File

@ -0,0 +1,102 @@
from abc import ABC
from typing import Tuple
import numpy as np
from environment import constants as c
from environment.entity.entity import Entity
# noinspection PyUnresolvedReferences,PyTypeChecker,PyArgumentList
class PositionMixin:
_entity = Entity
is_blocking_light: bool = True
can_collide: bool = True
has_position: bool = True
def render(self):
return [y for y in [x.render() for x in self] if y is not None]
@classmethod
def from_tiles(cls, tiles, *args, entity_kwargs=None, **kwargs):
collection = cls(*args, **kwargs)
entities = [cls._entity(tile, str_ident=i,
**entity_kwargs if entity_kwargs is not None else {})
for i, tile in enumerate(tiles)]
collection.add_items(entities)
return collection
@classmethod
def from_coordinates(cls, positions: [(int, int)], tiles, *args, entity_kwargs=None, **kwargs, ):
return cls.from_tiles([tiles.by_pos(position) for position in positions], tiles.size, *args,
entity_kwargs=entity_kwargs,
**kwargs)
@property
def tiles(self):
return [entity.tile for entity in self]
def __delitem__(self, name):
idx, obj = next((i, obj) for i, obj in enumerate(self) if obj.name == name)
obj.tile.leave(obj)
super().__delitem__(name)
def by_pos(self, pos: (int, int)):
pos = tuple(pos)
try:
return next(e for e in self if e.pos == pos)
except StopIteration:
pass
except ValueError:
print()
@property
def positions(self):
return [e.pos for e in self]
def notify_del_entity(self, entity: Entity):
try:
self.pos_dict[entity.pos].remove(entity)
except (ValueError, AttributeError):
pass
# noinspection PyUnresolvedReferences,PyTypeChecker
class IsBoundMixin:
@property
def name(self):
return f'{self.__class__.__name__}({self._bound_entity.name})'
def __repr__(self):
return f'{self.__class__.__name__}#{self._bound_entity.name}({self._data})'
def bind(self, entity):
# noinspection PyAttributeOutsideInit
self._bound_entity = entity
return c.VALID
def belongs_to_entity(self, entity):
return self._bound_entity == entity
# noinspection PyUnresolvedReferences,PyTypeChecker
class HasBoundedMixin:
@property
def obs_names(self):
return [x.name for x in self]
def by_entity(self, entity):
try:
return next((x for x in self if x.belongs_to_entity(entity)))
except StopIteration:
return None
def idx_by_entity(self, entity):
try:
return next((idx for idx, x in enumerate(self) if x.belongs_to_entity(entity)))
except StopIteration:
return None

View File

@ -0,0 +1,141 @@
from collections import defaultdict
from typing import List
import numpy as np
from environment.entity.object import Object
class Objects:
_entity = Object
@property
def observers(self):
return self._observers
@property
def obs_tag(self):
return self.__class__.__name__
@staticmethod
def render():
return []
@property
def obs_pairs(self):
return [(self.name, self)]
@property
def names(self):
# noinspection PyUnresolvedReferences
return [x.name for x in self]
@property
def name(self):
return f'{self.__class__.__name__}'
def __init__(self, *args, **kwargs):
self._data = defaultdict(lambda: None)
self._observers = list()
self.pos_dict = defaultdict(list)
def __len__(self):
return len(self._data)
def __iter__(self):
return iter(self.values())
def add_item(self, item: _entity):
assert_str = f'All item names have to be of type {self._entity}, but were {item.__class__}.,'
assert isinstance(item, self._entity), assert_str
assert self._data[item.name] is None, f'{item.name} allready exists!!!'
self._data.update({item.name: item})
item.set_collection(self)
for observer in self.observers:
observer.notify_add_entity(item)
return self
# noinspection PyUnresolvedReferences
def del_observer(self, observer):
self.observers.remove(observer)
for entity in self:
if observer in entity.observers:
entity.del_observer(observer)
# noinspection PyUnresolvedReferences
def add_observer(self, observer):
self.observers.append(observer)
for entity in self:
if observer not in entity.observers:
entity.add_observer(observer)
def __delitem__(self, name):
for observer in self.observers:
observer.notify_del_entity(name)
# noinspection PyTypeChecker
del self._data[name]
def add_items(self, items: List[_entity]):
for item in items:
self.add_item(item)
return self
def keys(self):
return self._data.keys()
def values(self):
return self._data.values()
def items(self):
return self._data.items()
def _get_index(self, item):
try:
return next(i for i, v in enumerate(self._data.values()) if v == item)
except StopIteration:
return None
def __getitem__(self, item):
if isinstance(item, (int, np.int64, np.int32)):
if item < 0:
item = len(self._data) - abs(item)
try:
return next(v for i, v in enumerate(self._data.values()) if i == item)
except StopIteration:
return None
try:
return self._data[item]
except KeyError:
return None
except TypeError:
print('Ups')
raise TypeError
def __repr__(self):
return f'{self.__class__.__name__}[{dict(self._data)}]'
def notify_change_pos(self, entity: object):
try:
self.pos_dict[entity.last_pos].remove(entity)
except (ValueError, AttributeError):
pass
if entity.has_position:
try:
self.pos_dict[entity.pos].append(entity)
except (ValueError, AttributeError):
pass
def notify_del_entity(self, entity: Object):
try:
self.pos_dict[entity.pos].remove(entity)
except (ValueError, AttributeError):
pass
def notify_add_entity(self, entity: Object):
try:
entity.add_observer(self)
self.pos_dict[entity.pos].append(entity)
except (ValueError, AttributeError):
pass

View File

@ -0,0 +1,78 @@
import numbers
from typing import List, Union, Dict
import numpy as np
from environment.groups.env_objects import EnvObjects
from environment.groups.objects import Objects
from environment.groups.mixins import HasBoundedMixin, PositionMixin
from environment.entity.util import PlaceHolder, GlobalPosition
from environment.utils import helpers as h
from environment import constants as c
class Combined(PositionMixin, EnvObjects):
@property
def name(self):
return f'{super().name}({self._ident or self._names})'
@property
def names(self):
return self._names
def __init__(self, names: List[str], *args, identifier: Union[None, str] = None, **kwargs):
super().__init__(*args, **kwargs)
self._ident = identifier
self._names = names or list()
@property
def obs_tag(self):
return self.name
@property
def obs_pairs(self):
return [(name, None) for name in self.names]
class GlobalPositions(HasBoundedMixin, EnvObjects):
_entity = GlobalPosition
is_blocking_light = False,
can_collide = False
def __init__(self, *args, **kwargs):
super(GlobalPositions, self).__init__(*args, **kwargs)
class Zones(Objects):
@property
def accounting_zones(self):
return [self[idx] for idx, name in self.items() if name != c.DANGER_ZONE]
def __init__(self, parsed_level):
raise NotImplementedError('This needs a Rework')
super(Zones, self).__init__()
slices = list()
self._accounting_zones = list()
self._danger_zones = list()
for symbol in np.unique(parsed_level):
if symbol == c.VALUE_OCCUPIED_CELL:
continue
elif symbol == c.DANGER_ZONE:
self + symbol
slices.append(h.one_hot_level(parsed_level, symbol))
self._danger_zones.append(symbol)
else:
self + symbol
slices.append(h.one_hot_level(parsed_level, symbol))
self._accounting_zones.append(symbol)
self._zone_slices = np.stack(slices)
def __getitem__(self, item):
return self._zone_slices[item]
def add_items(self, other: Union[str, List[str]]):
raise AttributeError('You are not allowed to add additional Zones in runtime.')

View File

@ -0,0 +1,56 @@
import random
from typing import List
import numpy as np
from environment import constants as c
from environment.groups.env_objects import EnvObjects
from environment.groups.mixins import PositionMixin
from environment.entity.wall_floor import Wall, Floor
class Walls(PositionMixin, EnvObjects):
_entity = Wall
symbol = c.SYMBOL_WALL
def __init__(self, *args, **kwargs):
super(Walls, self).__init__(*args, **kwargs)
self._value = c.VALUE_OCCUPIED_CELL
@classmethod
def from_coordinates(cls, argwhere_coordinates, *args, **kwargs):
tiles = cls(*args, **kwargs)
# noinspection PyTypeChecker
tiles.add_items([cls._entity(pos) for pos in argwhere_coordinates])
return tiles
@classmethod
def from_tiles(cls, tiles, *args, **kwargs):
raise RuntimeError()
class Floors(Walls):
_entity = Floor
symbol = c.SYMBOL_FLOOR
is_blocking_light: bool = False
can_collide: bool = False
def __init__(self, *args, **kwargs):
super(Floors, self).__init__(*args, **kwargs)
self._value = c.VALUE_FREE_CELL
@property
def occupied_tiles(self):
tiles = [tile for tile in self if tile.is_occupied()]
random.shuffle(tiles)
return tiles
@property
def empty_tiles(self) -> List[Floor]:
tiles = [tile for tile in self if tile.is_empty()]
random.shuffle(tiles)
return tiles
@classmethod
def from_tiles(cls, tiles, *args, **kwargs):
raise RuntimeError()

View File

@ -1,66 +1,50 @@
import pickle
from collections import defaultdict
from os import PathLike
from pathlib import Path
from typing import List, Dict, Union
from typing import Union
from stable_baselines3.common.callbacks import BaseCallback
from gymnasium import Wrapper
from environments.helpers import IGNORED_DF_COLUMNS
from environment.utils.helpers import IGNORED_DF_COLUMNS
from environment.factory import REC_TAC
import pandas as pd
from plotting.compare_runs import plot_single_run
class EnvMonitor(BaseCallback):
class EnvMonitor(Wrapper):
ext = 'png'
def __init__(self, env, filepath: Union[str, PathLike] = None):
super(EnvMonitor, self).__init__()
self.unwrapped = env
super(EnvMonitor, self).__init__(env)
self._filepath = filepath
self._monitor_df = pd.DataFrame()
self._monitor_dicts = defaultdict(dict)
self._monitor_dict = dict()
def __getattr__(self, item):
return getattr(self.unwrapped, item)
def step(self, action):
obs, reward, done, info = self.unwrapped.step(action)
self._read_info(0, info)
self._read_done(0, done)
return obs, reward, done, info
obs_type, obs, reward, done, info = self.env.step(action)
self._read_info(info)
self._read_done(done)
return obs_type, obs, reward, done, info
def reset(self):
return self.unwrapped.reset()
def _on_training_start(self) -> None:
pass
def _on_training_end(self) -> None:
pass
def _on_step(self, alt_infos: List[Dict] = None, alt_dones: List[bool] = None) -> bool:
for env_idx, info in enumerate(self.locals.get('infos', [])):
self._read_info(env_idx, info)
for env_idx, done in list(
enumerate(self.locals.get('dones', []))) + list(enumerate(self.locals.get('done', []))):
self._read_done(env_idx, done)
return True
def _read_info(self, env_idx, info: dict):
self._monitor_dicts[env_idx][len(self._monitor_dicts[env_idx])] = {
def _read_info(self, info: dict):
self._monitor_dict[len(self._monitor_dict)] = {
key: val for key, val in info.items() if
key not in ['terminal_observation', 'episode'] and not key.startswith('rec_')}
key not in ['terminal_observation', 'episode'] and not key.startswith(REC_TAC)}
return
def _read_done(self, env_idx, done):
def _read_done(self, done):
if done:
env_monitor_df = pd.DataFrame.from_dict(self._monitor_dicts[env_idx], orient='index')
self._monitor_dicts[env_idx] = dict()
env_monitor_df = pd.DataFrame.from_dict(self._monitor_dict, orient='index')
self._monitor_dict = dict()
columns = [col for col in env_monitor_df.columns if col not in IGNORED_DF_COLUMNS]
env_monitor_df = env_monitor_df.aggregate(
{col: 'mean' if col.endswith('ount') else 'sum' for col in columns}

View File

@ -4,21 +4,20 @@ from os import PathLike
from pathlib import Path
from typing import Union
from gymnasium import Wrapper
import numpy as np
import pandas as pd
import simplejson
from deepdiff.operator import BaseOperator
from stable_baselines3.common.callbacks import BaseCallback
from environments.factory.base.base_factory import REC_TAC
from environment.factory import REC_TAC
class EnvRecorder(BaseCallback):
class EnvRecorder(Wrapper):
def __init__(self, env, entities: str = 'all', filepath: Union[str, PathLike] = None, freq: int = 0):
super(EnvRecorder, self).__init__()
super(EnvRecorder, self).__init__(env)
self.filepath = filepath
self.unwrapped = env
self.freq = freq
self._recorder_dict = defaultdict(list)
self._recorder_out_list = list()
@ -92,8 +91,8 @@ class EnvRecorder(BaseCallback):
out_dict = {'episodes': self._recorder_out_list}
out_dict.update(
{'n_episodes': self._episode_counter,
'env_params': self.unwrapped.params,
'header': self.unwrapped.summarize_header
'env_params': self.env.params,
'header': self.env.summarize_header
})
try:
simplejson.dump(out_dict, f, indent=4)

4
environment/rewards.py Normal file
View File

@ -0,0 +1,4 @@
MOVEMENTS_VALID: float = -0.001
MOVEMENTS_FAIL: float = -0.05
NOOP: float = -0.01
COLLISION: float = -0.5

83
environment/rules.py Normal file
View File

@ -0,0 +1,83 @@
import abc
from typing import Union, List
from environment.utils.results import Result, TickResult, DoneResult, ActionResult
from environment import constants as c
from environment import rewards as r
class Rule(abc.ABC):
@property
def name(self):
return self.__class__.__name__
def __init__(self):
pass
def __repr__(self):
return f'{self.name}'
def on_init(self, state):
return []
def on_reset(self):
return []
def tick_pre_step(self, state) -> List[TickResult]:
return []
def tick_step(self, state) -> List[TickResult]:
return []
def tick_post_step(self, state) -> List[TickResult]:
return []
def on_check_done(self, state) -> List[DoneResult]:
return []
class MaxStepsReached(Rule):
def __init__(self, max_steps: int = 500):
super().__init__()
self.max_steps = max_steps
def on_init(self, state):
pass
def on_check_done(self, state):
if self.max_steps <= state.curr_step:
return [DoneResult(validity=c.VALID, identifier=self.name, reward=0)]
return [DoneResult(validity=c.NOT_VALID, identifier=self.name, reward=0)]
class Collision(Rule):
def __init__(self, done_at_collisions: bool = False):
super().__init__()
self.done_at_collisions = done_at_collisions
self.curr_done = False
def tick_post_step(self, state) -> List[TickResult]:
self.curr_done = False
tiles_with_collisions = state.get_all_tiles_with_collisions()
results = list()
for tile in tiles_with_collisions:
guests = tile.guests_that_can_collide
if len(guests) >= 2:
for i, guest in enumerate(guests):
try:
guest.set_state(TickResult(identifier=c.COLLISION, reward=r.COLLISION,
validity=c.NOT_VALID, entity=self))
except AttributeError:
pass
results.append(TickResult(entity=guest, identifier=c.COLLISION,
reward=r.COLLISION, validity=c.VALID))
self.curr_done = True
return results
def on_check_done(self, state) -> List[DoneResult]:
if self.curr_done and self.done_at_collisions:
return [DoneResult(validity=c.VALID, identifier=c.COLLISION, reward=r.COLLISION)]
return [DoneResult(validity=c.NOT_VALID, identifier=self.name, reward=0)]

View File

@ -0,0 +1,120 @@
from pathlib import Path
import yaml
from environment.groups.global_entities import Entities
from environment.groups.agents import Agents
from environment.entity.agent import Agent
from environment.utils.helpers import locate_and_import_class
from environment import constants as c
DEFAULT_PATH = 'environment'
MODULE_PATH = 'modules'
class FactoryConfigParser(object):
default_entites = []
default_rules = ['MaxStepsReached', 'Collision']
default_actions = [c.MOVE8, c.NOOP]
default_observations = [c.WALLS, c.AGENTS]
def __init__(self, config_path):
self.config_path = Path(config_path)
self.config = yaml.safe_load(config_path.open())
self.do_record = False
def __getattr__(self, item):
return self['General'][item]
def _get_sub_list(self, primary_key: str, sub_key: str):
return [{key: [s for k, v in val.items() if k == sub_key for s in v] for key, val in x.items()
} for x in self.config[primary_key]]
@property
def agent_actions(self):
return self._get_sub_list('Agents', "Actions")
@property
def agent_observations(self):
return self._get_sub_list('Agents', "Observations")
@property
def rules(self):
return self.config['Rules']
@property
def agents(self):
return self.config['Agents']
@property
def entities(self):
return self.config['Entities']
def __repr__(self):
return str(self.config)
def __getitem__(self, item):
return self.config[item]
def load_entities(self):
# entites = Entities()
entity_classes = dict()
entities = []
if c.DEFAULTS in self.entities:
entities.extend(self.default_entites)
entities.extend(x for x in self.entities if x != c.DEFAULTS)
for entity in entities:
folder_path = MODULE_PATH if entity not in self.default_entites else DEFAULT_PATH
entity_class = locate_and_import_class(entity, folder_path)
entity_kwargs = self.entities.get(entity, {})
entity_symbol = entity_class.symbol if hasattr(entity_class, 'symbol') else None
entity_classes.update({entity: {'class': entity_class, 'kwargs': entity_kwargs, 'symbol': entity_symbol}})
return entity_classes
def load_agents(self, size, free_tiles):
agents = Agents(size)
base_env_actions = self.default_actions.copy() + [c.MOVE4]
for name in self.agents:
# Actions
actions = list()
if c.DEFAULTS in self.agents[name]['Actions']:
actions.extend(self.default_actions)
actions.extend(x for x in self.agents[name]['Actions'] if x != c.DEFAULTS)
parsed_actions = list()
for action in actions:
folder_path = MODULE_PATH if action not in base_env_actions else DEFAULT_PATH
class_or_classes = locate_and_import_class(action, folder_path)
try:
parsed_actions.extend(class_or_classes)
except TypeError:
parsed_actions.append(class_or_classes)
parsed_actions = [x() for x in parsed_actions]
# Observation
observations = list()
if c.DEFAULTS in self.agents[name]['Observations']:
observations.extend(self.default_observations)
observations.extend(x for x in self.agents[name]['Observations'] if x != c.DEFAULTS)
agent = Agent(parsed_actions, observations, free_tiles.pop(), str_ident=name)
agents.add_item(agent)
return agents
def load_rules(self):
# entites = Entities()
rules_classes = dict()
rules = []
if c.DEFAULTS in self.rules:
for rule in self.default_rules:
if rule not in rules:
rules.append(rule)
rules.extend(x for x in self.rules if x != c.DEFAULTS)
for rule in rules:
folder_path = MODULE_PATH if rule not in self.default_rules else DEFAULT_PATH
rule_class = locate_and_import_class(rule, folder_path)
rule_kwargs = self.rules.get(rule, {})
rules_classes.update({rule: {'class': rule_class, 'kwargs': rule_kwargs}})
return rules_classes

View File

@ -1,12 +1,14 @@
import importlib
import itertools
from collections import defaultdict
from typing import Tuple, Union, Dict, List, NamedTuple
from pathlib import PurePath, Path
from typing import Union, Dict, List
import networkx as nx
import numpy as np
from numpy.typing import ArrayLike
from stable_baselines3 import PPO, DQN, A2C
from environment import constants as c
"""
This file is used for:
@ -21,10 +23,7 @@ This file is used for:
"""
MODEL_MAP = dict(PPO=PPO, DQN=DQN, A2C=A2C) # For use in studies and experiments
LEVELS_DIR = 'levels' # for use in studies and experiments
LEVELS_DIR = 'modules/levels' # for use in studies and experiments
STEPS_START = 1 # Define where to the stepcount; which is the first step
# Not used anymore? Clean!
@ -37,132 +36,13 @@ POS_MASK = np.asarray([[[-1, -1], [0, -1], [1, -1]],
[[-1, 0], [0, 0], [1, 0]],
[[-1, 1], [0, 1], [1, 1]]])
class Constants:
"""
String based mapping. Use these to handle keys or define values, which can be then be used globaly.
Please use class inheritance when defining new environments.
"""
WALL = '#' # Wall tile identifier for resolving the string based map files.
DANGER_ZONE = 'x' # Dange Zone tile identifier for resolving the string based map files.
WALLS = 'Walls' # Identifier of Wall-objects and sets (collections).
FLOOR = 'Floor' # Identifier of Floor-objects and sets (collections).
LEVEL = 'Level' # Identifier of Level-objects and sets (collections).
AGENT = 'Agent' # Identifier of Agent-objects and sets (collections).
AGENT_PLACEHOLDER = 'AGENT_PLACEHOLDER' # Identifier of Placeholder-objects and sets (collections).
GLOBAL_POSITION = 'GLOBAL_POSITION' # Identifier of the global position slice
FREE_CELL = 0 # Free-Cell value used in observation
OCCUPIED_CELL = 1 # Occupied-Cell value used in observation
SHADOWED_CELL = -1 # Shadowed-Cell value used in observation
NO_POS = (-9999, -9999) # Invalid Position value used in the environment (something is off-grid)
ACTION = 'action' # Identifier of Action-objects and sets (collections).
COLLISION = 'collision' # Identifier to use in the context of collitions.
VALID = True # Identifier to rename boolean values in the context of actions.
NOT_VALID = False # Identifier to rename boolean values in the context of actions.
class EnvActions:
"""
String based mapping. Use these to identifiy actions, can be used globaly.
Please use class inheritance when defining new environments with new actions.
"""
# Movements
NORTH = 'north'
EAST = 'east'
SOUTH = 'south'
WEST = 'west'
NORTHEAST = 'north_east'
SOUTHEAST = 'south_east'
SOUTHWEST = 'south_west'
NORTHWEST = 'north_west'
# Other
# MOVE = 'move'
NOOP = 'no_op'
_ACTIONMAP = defaultdict(lambda: (0, 0),
{NORTH: (-1, 0), NORTHEAST: (-1, 1),
EAST: (0, 1), SOUTHEAST: (1, 1),
SOUTH: (1, 0), SOUTHWEST: (1, -1),
WEST: (0, -1), NORTHWEST: (-1, -1)
}
)
@classmethod
def is_move(cls, action):
"""
Classmethod; checks if given action is a movement action or not. Depending on the env. configuration,
Movement actions are either `manhattan` (square) style movements (up,down, left, right) and/or diagonal.
:param action: Action to be checked
:type action: str
:return: Whether the given action is a movement action.
:rtype: bool
"""
return any([action == direction for direction in cls.movement_actions()])
@classmethod
def square_move(cls):
"""
Classmethod; return a list of movement actions that are considered square or `manhattan` style movements.
:return: A list of movement actions.
:rtype: list(str)
"""
return [cls.NORTH, cls.EAST, cls.SOUTH, cls.WEST]
@classmethod
def diagonal_move(cls):
"""
Classmethod; return a list of movement actions that are considered diagonal movements.
:return: A list of movement actions.
:rtype: list(str)
"""
return [cls.NORTHEAST, cls.SOUTHEAST, cls.SOUTHWEST, cls.NORTHWEST]
@classmethod
def movement_actions(cls):
"""
Classmethod; return a list of all available movement actions.
Please note, that this is indipendent from the env. properties
:return: A list of movement actions.
:rtype: list(str)
"""
return list(itertools.chain(cls.square_move(), cls.diagonal_move()))
@classmethod
def resolve_movement_action_to_coords(cls, action):
"""
Classmethod; resolve movement actions. Given a movement action, return the delta in coordinates it stands for.
How does the current entity coordinate change if it performs the given action?
Please note, this is indipendent from the env. properties
:return: Delta coorinates.
:rtype: tuple(int, int)
"""
return cls._ACTIONMAP[action]
class RewardsBase(NamedTuple):
"""
Value based mapping. Use these to define reward values for specific conditions (i.e. the action
in a given context), can be used globaly.
Please use class inheritance when defining new environments with new rewards.
"""
MOVEMENTS_VALID: float = -0.001
MOVEMENTS_FAIL: float = -0.05
NOOP: float = -0.01
COLLISION: float = -0.5
MOVEMAP = defaultdict(lambda: (0, 0),
{c.NORTH: (-1, 0), c.NORTHEAST: (-1, 1),
c.EAST: (0, 1), c.SOUTHEAST: (1, 1),
c.SOUTH: (1, 0), c.SOUTHWEST: (1, -1),
c.WEST: (0, -1), c.NORTHWEST: (-1, -1)
}
)
class ObservationTranslator:
@ -171,10 +51,10 @@ class ObservationTranslator:
*per_agent_named_obs_spaces: Dict[str, dict],
placeholder_fill_value: Union[int, str, None] = None):
"""
This is a helper class, which converts agents observations from joined environments.
For example, agents trained in different environments may expect different observations.
This is a helper class, which converts agent observations from joined environments.
For example, agent trained in different environments may expect different observations.
This class translates from larger observations spaces to smaller.
A string identifier based approach is used.
A string _identifier based approach is used.
Currently, it is not possible to mix different obs shapes.
@ -203,7 +83,7 @@ class ObservationTranslator:
self._this_named_obs_space = this_named_observation_space
self._per_agent_named_obs_space = list(per_agent_named_obs_spaces)
def translate_observation(self, agent_idx: int, obs: np.ndarray):
def translate_observation(self, agent_idx: int, obs):
target_obs_space = self._per_agent_named_obs_space[agent_idx]
translation = dict()
for name, idxs in target_obs_space.items():
@ -232,10 +112,10 @@ class ActionTranslator:
def __init__(self, target_named_action_space: Dict[str, int], *per_agent_named_action_space: Dict[str, int]):
"""
This is a helper class, which converts agents action spaces to a joined environments action space.
For example, agents trained in different environments may have different action spaces.
This is a helper class, which converts agent action spaces to a joined environments action space.
For example, agent trained in different environments may have different action spaces.
This class translates from smaller individual agent action spaces to larger joined spaces.
A string identifier based approach is used.
A string _identifier based approach is used.
:param target_named_action_space: Joined `Named action space` for the current environment.
:type target_named_action_space: Dict[str, dict]
@ -282,14 +162,14 @@ def parse_level(path):
return level
def one_hot_level(level, wall_char: str = Constants.WALL):
def one_hot_level(level, symbol: str):
"""
Given a string based level representation (list of lists, see function `parse_level`), this function creates a
binary numpy array or `grid`. Grid values that equal `wall_char` become of `Constants.OCCUPIED_CELL` value.
Can be changed to filter for any symbol.
:param level: String based level representation (list of lists, see function `parse_level`).
:param wall_char: List[List[str]]
:param symbol: List[List[str]]
:return: Binary numpy array
:rtype: np.typing._array_like.ArrayLike
@ -297,35 +177,12 @@ def one_hot_level(level, wall_char: str = Constants.WALL):
grid = np.array(level)
binary_grid = np.zeros(grid.shape, dtype=np.int8)
binary_grid[grid == wall_char] = Constants.OCCUPIED_CELL
binary_grid[grid == symbol] = c.VALUE_OCCUPIED_CELL
return binary_grid
def check_position(slice_to_check_against: ArrayLike, position_to_check: Tuple[int, int]):
"""
Given a slice (2-D Arraylike object)
:param slice_to_check_against: The slice to check for accessability
:type slice_to_check_against: np.typing._array_like.ArrayLike
:param position_to_check: Position in slice that should be checked. Can be outside of slice boundarys.
:type position_to_check: tuple(int, int)
:return: Whether a position can be moved to.
:rtype: bool
"""
x_pos, y_pos = position_to_check
# Check if agent colides with grid boundrys
valid = not (
x_pos < 0 or y_pos < 0
or x_pos >= slice_to_check_against.shape[0]
or y_pos >= slice_to_check_against.shape[1]
)
# Check for collision with level walls
valid = valid and not slice_to_check_against[x_pos, y_pos]
return Constants.VALID if valid else Constants.NOT_VALID
def is_move(action_name: str):
return action_name in MOVEMAP.keys()
def asset_str(agent):
@ -339,18 +196,18 @@ def asset_str(agent):
action = step_result['action_name']
valid = step_result['action_valid']
col_names = [x.name for x in step_result['collisions']]
if any(Constants.AGENT in name for name in col_names):
if any(c.AGENT in name for name in col_names):
return 'agent_collision', 'blank'
elif not valid or Constants.LEVEL in col_names or Constants.AGENT in col_names:
return Constants.AGENT, 'invalid'
elif valid and not EnvActions.is_move(action):
return Constants.AGENT, 'valid'
elif valid and EnvActions.is_move(action):
return Constants.AGENT, 'move'
elif not valid or c.LEVEL in col_names or c.AGENT in col_names:
return c.AGENT, 'invalid'
elif valid and not is_move(action):
return c.AGENT, 'valid'
elif valid and is_move(action):
return c.AGENT, 'move'
else:
return Constants.AGENT, 'idle'
return c.AGENT, 'idle'
else:
return Constants.AGENT, 'idle'
return c.AGENT, 'idle'
def points_to_graph(coordiniates_or_tiles, allow_euclidean_connections=True, allow_manhattan_connections=True):
@ -386,3 +243,30 @@ def points_to_graph(coordiniates_or_tiles, allow_euclidean_connections=True, all
elif allow_manhattan_connections and not allow_euclidean_connections and diff == 1:
graph.add_edge(a, b)
return graph
def locate_and_import_class(class_name, folder_path: Union[str, PurePath] = ''):
"""Locate an object by name or dotted path, importing as necessary."""
import sys
sys.path.append("..")
folder_path = Path(folder_path)
module_paths = [x for x in folder_path.rglob('*.py') if x.is_file() and '__init__' not in x.name]
# possible_package_path = folder_path / '__init__.py'
# package = str(possible_package_path) if possible_package_path.exists() else None
all_found_modules = list()
for module_path in module_paths:
mod = importlib.import_module('.'.join([x.replace('.py', '') for x in module_path.parts]))
all_found_modules.extend([x for x in dir(mod) if not(x.startswith('__') or len(x) < 2 or x.isupper())
and x not in ['Entity', 'NamedTuple', 'List', 'Rule', 'Union', 'random', 'Floor'
'TickResult', 'ActionResult', 'Action', 'Agent', 'deque',
'BoundEntityMixin', 'RenderEntity', 'TemplateRule', 'defaultdict',
'is_move', 'Objects', 'PositionMixin', 'IsBoundMixin', 'EnvObject',
'EnvObjects',]])
try:
model_class = mod.__getattribute__(class_name)
return model_class
except AttributeError:
continue
raise AttributeError(f'Class "{class_name}" was not found!!!"\n'
f'Check the {folder_path.name} name.\n'
f'Possible Options are:\n{set(all_found_modules)}')

View File

@ -0,0 +1,55 @@
from os import PathLike
from pathlib import Path
from typing import Dict
import numpy as np
from environment.groups.global_entities import Entities
from environment.groups.wall_n_floors import Walls, Floors
from environment.utils import helpers as h
from environment import constants as c
class LevelParser(object):
@property
def pomdp_d(self):
return self.pomdp_r * 2 + 1
def __init__(self, level_file_path: PathLike, entity_parse_dict: Dict[Entities, dict], pomdp_r=0):
self.pomdp_r = pomdp_r
self.e_p_dict = entity_parse_dict
self._parsed_level = h.parse_level(Path(level_file_path))
level_array = h.one_hot_level(self._parsed_level, c.SYMBOL_WALL)
self.level_shape = level_array.shape
self.size = self.pomdp_r**2 if self.pomdp_r else np.prod(self.level_shape)
def do_init(self):
entities = Entities()
# Walls
level_array = h.one_hot_level(self._parsed_level, c.SYMBOL_WALL)
walls = Walls.from_coordinates(np.argwhere(level_array == c.VALUE_OCCUPIED_CELL), self.size)
entities.add_items({c.WALL: walls})
# Floor
floor = Floors.from_coordinates(np.argwhere(level_array == c.VALUE_FREE_CELL), self.size)
entities.add_items({c.FLOOR: floor})
# All other
for es_name in self.e_p_dict:
e_class, e_kwargs = self.e_p_dict[es_name]['class'], self.e_p_dict[es_name]['kwargs']
if hasattr(e_class, 'symbol'):
level_array = h.one_hot_level(self._parsed_level, symbol=e_class.symbol)
if np.any(level_array):
e = e_class.from_coordinates(np.argwhere(level_array == c.VALUE_OCCUPIED_CELL).tolist(),
entities[c.FLOOR], self.size, entity_kwargs=e_kwargs
)
else:
raise ValueError(f'No {e_class} (Symbol: {e_class.symbol}) could be found!\n'
f'Check your level file!')
else:
e = e_class(self.size, **e_kwargs)
entities.add_items({e.name: e})
return entities

View File

@ -0,0 +1,315 @@
import math
from collections import defaultdict
from itertools import product
from typing import Dict, List
import numpy as np
from numba import njit
from environment.groups.utils import Combined
from environment.utils.states import Gamestate
from environment import constants as c
class OBSBuilder(object):
default_obs = [c.WALLS, c.OTHERS]
@property
def pomdp_d(self):
if self.pomdp_r:
return (self.pomdp_r * 2) + 1
else:
return 0
def __init__(self, level_shape: np.size, state: Gamestate, pomdp_r: int):
self.all_obs = dict()
self.light_blockers = defaultdict(lambda: False)
self.positional = defaultdict(lambda: False)
self.non_positional = defaultdict(lambda: False)
self.ray_caster = dict()
self.level_shape = level_shape
self.pomdp_r = pomdp_r
self.obs_shape = (self.pomdp_d, self.pomdp_d) if self.pomdp_r else self.level_shape
self.size = np.prod(self.obs_shape)
self.obs_layers = dict()
self.build_structured_obs_block(state)
self.curr_lightmaps = dict()
def build_structured_obs_block(self, state):
self.all_obs[c.PLACEHOLDER] = np.full(self.obs_shape, 0, dtype=float)
self.all_obs.update({key: obj for key, obj in state.entities.obs_pairs})
def observation_space(self, state):
from gymnasium.spaces import Tuple, Box
obsn = self.refresh_and_build_for_all(state)
if len(state[c.AGENT]) == 1:
space = Box(low=0, high=1, shape=next(x for x in obsn.values()).shape, dtype=np.float32)
else:
space = Tuple([Box(low=0, high=1, shape=obs.shape, dtype=np.float32) for obs in obsn.values()])
return space
def named_observation_space(self, state):
return self.refresh_and_build_for_all(state)
def refresh_and_build_for_all(self, state) -> (dict, dict):
self.build_structured_obs_block(state)
info = {}
return {agent.name: self.build_for_agent(agent, state)[0] for agent in state[c.AGENT]}, info
def refresh_and_build_named_for_all(self, state) -> Dict[str, Dict[str, np.ndarray]]:
self.build_structured_obs_block(state)
named_obs_dict = {}
for agent in state[c.AGENT]:
obs, names = self.build_for_agent(agent, state)
named_obs_dict[agent.name] = {'observation': obs, 'names': names}
return named_obs_dict
def build_for_agent(self, agent, state) -> (List[str], np.ndarray):
try:
agent_want_obs = self.obs_layers[agent.name]
except KeyError:
self._sort_and_name_observation_conf(agent)
agent_want_obs = self.obs_layers[agent.name]
# Handle in-grid observations aka visible observations
visible_entitites = self.ray_caster[agent.name].visible_entities(state.entities)
pre_sort_obs = defaultdict(lambda: np.zeros((self.pomdp_d, self.pomdp_d)))
for e in set(visible_entitites):
x, y = (e.x - agent.x) + self.pomdp_r, (e.y - agent.y) + self.pomdp_r
try:
pre_sort_obs[e.obs_tag][x, y] += e.encoding
except IndexError:
# Seemded to be visible but is out or range
pass
pre_sort_obs = dict(pre_sort_obs)
obs = np.zeros((len(agent_want_obs), self.pomdp_d, self.pomdp_d))
for idx, l_name in enumerate(agent_want_obs):
try:
obs[idx] = pre_sort_obs[l_name]
except KeyError:
if c.COMBINED in l_name:
if combined := [pre_sort_obs[x] for x in self.all_obs[f'{c.COMBINED}({agent.name})'].names
if x in pre_sort_obs]:
obs[idx] = np.sum(combined, axis=0)
elif l_name == c.PLACEHOLDER:
obs[idx] = self.all_obs[c.PLACEHOLDER]
else:
try:
e = self.all_obs[l_name]
except KeyError:
try:
e = self.all_obs[f'{l_name}({agent.name})']
except KeyError:
try:
e = next(x for x in self.all_obs if l_name in x and agent.name in x)
except StopIteration:
raise KeyError(
f'Check typing!\n{l_name} could not be found in:\n{dict(self.all_obs).keys()}')
try:
positional = e.has_position
except AttributeError:
positional = False
if positional:
# Seems to be not visible, so just skip it
# obs[idx] = np.zeros((self.pomdp_d, self.pomdp_d))
# All good
pass
else:
try:
v = e.encodings
except AttributeError:
try:
v = e.encoding
except AttributeError:
raise AttributeError(f'This env. expects Entity-Clases to report their "encoding"')
try:
np.put(obs[idx], range(len(v)), v, mode='raise')
except TypeError:
np.put(obs[idx], 0, v, mode='raise')
except IndexError:
raise ValueError(f'Max(obs.size) for {e.name}: {obs[idx].size}, but was: {len(v)}.')
try:
self.curr_lightmaps[agent.name] = pre_sort_obs[c.FLOORS].astype(bool)
except KeyError:
print()
return obs, self.obs_layers[agent.name]
def _sort_and_name_observation_conf(self, agent):
self.ray_caster[agent.name] = RayCaster(agent, self.pomdp_r)
obs_layers = []
for obs_str in agent.observations:
if isinstance(obs_str, dict):
obs_str, vals = next(obs_str.items().__iter__())
else:
vals = None
if obs_str == c.SELF:
obs_layers.append(agent.name)
elif obs_str == c.DEFAULTS:
obs_layers.extend(self.default_obs)
elif obs_str == c.COMBINED:
if isinstance(vals, str):
vals = [vals]
names = list()
for val in vals:
if val == c.SELF:
names.append(agent.name)
elif val == c.OTHERS:
names.extend([x.name for x in agent.collection if x.name != agent.name])
else:
names.append(val)
combined = Combined(names, self.pomdp_r, identifier=agent.name)
self.all_obs[combined.name] = combined
obs_layers.append(combined.name)
elif obs_str == c.OTHERS:
obs_layers.extend([x for x in self.all_obs if x != agent.name and x.startswith(f'{c.AGENT}[')])
elif obs_str == c.AGENTS:
obs_layers.extend([x for x in self.all_obs if x.startswith(f'{c.AGENT}[')])
else:
obs_layers.append(obs_str)
self.obs_layers[agent.name] = obs_layers
self.curr_lightmaps[agent.name] = np.zeros((self.pomdp_d or self.level_shape[0],
self.pomdp_d or self.level_shape[1]
))
class RayCaster:
def __init__(self, agent, pomdp_r, degs=360):
self.agent = agent
self.pomdp_r = pomdp_r
self.n_rays = 100 # (self.pomdp_r + 1) * 8
self.degs = degs
self.ray_targets = self.build_ray_targets()
self.obs_shape_cube = np.array([self.pomdp_r, self.pomdp_r])
def build_ray_targets(self):
north = np.array([0, -1])*self.pomdp_r
thetas = [np.deg2rad(deg) for deg in np.linspace(-self.degs // 2, self.degs // 2, self.n_rays)[::-1]]
rot_M = [
[[math.cos(theta), -math.sin(theta)],
[math.sin(theta), math.cos(theta)]] for theta in thetas
]
rot_M = np.stack(rot_M, 0)
rot_M = np.unique(np.round(rot_M @ north), axis=0)
return rot_M.astype(int)
@staticmethod
def ray_block_cache(cache_dict, key, callback, ents):
if key not in cache_dict:
cache_dict[key] = callback()
if any(True for e in ents.pos_dict[key] if e.is_blocking_light) and not cache_dict[key]:
print()
return cache_dict[key]
def visible_entities(self, entities):
visible = list()
cache_blocking = {}
for ray in self.get_rays():
rx, ry = ray[0]
for x, y in ray:
cx, cy = x - rx, y - ry
entities_hit = entities.pos_dict[(x, y)]
hits = self.ray_block_cache(cache_blocking,
(x, y),
lambda: any(True for e in entities_hit if e.is_blocking_light),
entities)
diag_hits = all([
self.ray_block_cache(
cache_blocking,
key,
lambda: all(False for e in entities.pos_dict[key] if not e.is_blocking_light),
entities)
for key in ((x, y-cy), (x-cx, y))
]) if (cx != 0 and cy != 0) else False
visible += entities_hit if not diag_hits else []
if hits or diag_hits:
break
rx, ry = x, y
try:
d = next(x for x in visible if 'Door' in x.name)
v = [x for x in visible if tuple(np.subtract(x.pos, d.pos)) in [(1, 0), (0, 1), (-1, 0), (0, -1)] and x.name.startswith('Floor')]
if len(v) > 2:
pass
except StopIteration:
pass
return visible
def get_rays(self):
a_pos = self.agent.pos
outline = self.ray_targets + a_pos
return self.bresenham_loop(a_pos, outline)
# todo do this once and cache the points!
def get_fov_outline(self) -> np.ndarray:
return self.ray_targets + self.agent.pos
def get_square_outline(self):
agent = self.agent
x_coords = range(agent.x - self.pomdp_r, agent.x + self.pomdp_r + 1)
y_coords = range(agent.y - self.pomdp_r, agent.y + self.pomdp_r + 1)
outline = list(product(x_coords, [agent.y - self.pomdp_r, agent.y + self.pomdp_r])) \
+ list(product([agent.x - self.pomdp_r, agent.x + self.pomdp_r], y_coords))
return outline
@staticmethod
@njit
def bresenham_loop(a_pos, points):
results = []
for end in points:
x1, y1 = a_pos
x2, y2 = end
dx = x2 - x1
dy = y2 - y1
# Determine how steep the line is
is_steep = abs(dy) > abs(dx)
# Rotate line
if is_steep:
x1, y1 = y1, x1
x2, y2 = y2, x2
# Swap start and end points if necessary and store swap state
swapped = False
if x1 > x2:
x1, x2 = x2, x1
y1, y2 = y2, y1
swapped = True
# Recalculate differentials
dx = x2 - x1
dy = y2 - y1
# Calculate error
error = int(dx / 2.0)
ystep = 1 if y1 < y2 else -1
# Iterate over bounding box generating points between start and end
y = y1
points = []
for x in range(int(x1), int(x2) + 1):
coord = [y, x] if is_steep else [x, y]
points.append(coord)
error -= abs(dy)
if error < 0:
y += ystep
error += dx
# Reverse the list if the coordinates were swapped
if swapped:
points.reverse()
results.append(points)
return results

View File

@ -0,0 +1,16 @@
from dataclasses import dataclass
from typing import Any
import numpy as np
@dataclass
class RenderEntity:
name: str
pos: np.array
value: float = 1
value_operation: str = 'none'
state: str = None
id: int = 0
aux: Any = None
real_name: str = 'none'

View File

@ -1,32 +1,26 @@
import sys
import numpy as np
from pathlib import Path
from collections import deque
from itertools import product
import pygame
from typing import NamedTuple, Any
from typing import Tuple, Union
import time
import torch
from environment.utils.render import RenderEntity
class RenderEntity(NamedTuple):
name: str
pos: np.array
value: float = 1
value_operation: str = 'none'
state: str = None
id: int = 0
aux: Any = None
class RenderNames:
AGENT: str = 'agent'
BLANK: str = 'blank'
DOOR: str = 'door'
OPACITY: str = 'opacity'
SCALE: str = 'scale'
rn = RenderNames
AGENT: str = 'agent'
STATE_IDLE: str = 'idle'
STATE_MOVE: str = 'move'
STATE_VALID: str = 'valid'
STATE_INVALID: str = 'invalid'
STATE_COLLISION: str = 'agent_collision'
BLANK: str = 'blank'
DOOR: str = 'door'
OPACITY: str = 'opacity'
SCALE: str = 'scale'
class Renderer:
@ -34,11 +28,12 @@ class Renderer:
WHITE = (223, 230, 233) # (200, 200, 200)
AGENT_VIEW_COLOR = (9, 132, 227)
ASSETS = Path(__file__).parent.parent / 'assets'
MODULE_ASSETS = Path(__file__).parent.parent.parent / 'modules'
def __init__(self, lvl_shape=(16, 16),
lvl_padded_shape=None,
cell_size=40, fps=7,
grid_lines=True, view_radius=2):
def __init__(self, lvl_shape: Tuple[int, int] = (16, 16),
lvl_padded_shape: Union[Tuple[int, int], None] = None,
cell_size: int = 40, fps: int = 7,
grid_lines: bool = True, view_radius: int = 2):
self.grid_h, self.grid_w = lvl_shape
self.lvl_padded_shape = lvl_padded_shape if lvl_padded_shape is not None else lvl_shape
self.cell_size = cell_size
@ -49,7 +44,7 @@ class Renderer:
self.screen_size = (self.grid_w*cell_size, self.grid_h*cell_size)
self.screen = pygame.display.set_mode(self.screen_size)
self.clock = pygame.time.Clock()
assets = list(self.ASSETS.rglob('*.png'))
assets = list(self.ASSETS.rglob('*.png')) + list(self.MODULE_ASSETS.rglob('*.png'))
self.assets = {path.stem: self.load_asset(str(path), 1) for path in assets}
self.fill_bg()
@ -75,9 +70,9 @@ class Renderer:
r, c = r - offset_r, c-offset_c
img = self.assets[entity.name.lower()]
if entity.value_operation == rn.OPACITY:
if entity.value_operation == OPACITY:
img.set_alpha(255*entity.value)
elif entity.value_operation == rn.SCALE:
elif entity.value_operation == SCALE:
re = img.get_rect()
img = pygame.transform.smoothscale(
img, (int(entity.value*re.width), int(entity.value*re.height))
@ -116,19 +111,16 @@ class Renderer:
sys.exit()
self.fill_bg()
blits = deque()
for entity in [x for x in entities if rn.DOOR in x.name]:
for entity in [x for x in entities]:
bp = self.blit_params(entity)
blits.append(bp)
for entity in [x for x in entities if rn.DOOR not in x.name]:
bp = self.blit_params(entity)
blits.append(bp)
if entity.name.lower() == rn.AGENT:
if entity.name.lower() == AGENT:
if self.view_radius > 0:
vis_rects = self.visibility_rects(bp, entity.aux)
blits.extendleft(vis_rects)
if entity.state != rn.BLANK:
if entity.state != BLANK:
agent_state_blits = self.blit_params(
RenderEntity(entity.state, (entity.pos[0] + 0.12, entity.pos[1]), 0.48, rn.SCALE)
RenderEntity(entity.state, (entity.pos[0] + 0.12, entity.pos[1]), 0.48, SCALE)
)
textsurface = self.font.render(str(entity.id), False, (0, 0, 0))
text_blit = dict(source=textsurface, dest=(bp['dest'].center[0]-.07*self.cell_size,
@ -146,7 +138,6 @@ class Renderer:
if __name__ == '__main__':
renderer = Renderer(fps=2, cell_size=40)
for i in range(15):
entity_1 = RenderEntity('agent_collision', [5, i], 1, 'idle', 'idle')
for pos_i in range(15):
entity_1 = RenderEntity('agent_collision', [5, pos_i], 1, 'idle', 'idle')
renderer.render([entity_1])

View File

@ -0,0 +1,48 @@
from typing import Union
from dataclasses import dataclass, asdict
from environment.entity.entity import Entity
TYPE_VALUE = 'value'
TYPE_REWARD = 'reward'
types = [TYPE_VALUE, TYPE_REWARD]
@dataclass
class InfoObject:
identifier: str
val_type: str
value: Union[float, int]
@dataclass
class Result:
identifier: str
validity: bool
reward: Union[float, None] = None
value: Union[float, None] = None
entity: Union[Entity, None] = None
def get_infos(self):
n = self.entity.name if self.entity is not None else "Global"
return [InfoObject(identifier=f'{n}_{self.identifier}_{t}',
val_type=t, value=self.__getattribute__(t)) for t in types
if self.__getattribute__(t) is not None]
def __repr__(self):
valid = "not " if not self.validity else ""
return f'{self.__class__.__name__}({self.identifier.capitalize()} {valid}valid: {self.reward})'
@dataclass
class TickResult(Result):
pass
@dataclass
class ActionResult(Result):
pass
@dataclass
class DoneResult(Result):
pass

112
environment/utils/states.py Normal file
View File

@ -0,0 +1,112 @@
from typing import List, Dict
import numpy as np
from environment.entity.wall_floor import Floor
from environment.rules import Rule
from environment.utils.results import Result
from environment import constants as c
class StepRules:
def __init__(self, *args):
if args:
self.rules = list(args)
else:
self.rules = list()
def __repr__(self):
return f'Rules{[x.name for x in self]}'
def __iter__(self):
return iter(self.rules)
def append(self, item):
assert isinstance(item, Rule)
self.rules.append(item)
return True
def do_all_init(self, state):
for rule in self.rules:
if rule_init_printline := rule.on_init(state):
state.print(rule_init_printline)
return c.VALID
def tick_step_all(self, state):
results = list()
for rule in self.rules:
if tick_step_result := rule.tick_step(state):
results.extend(tick_step_result)
return results
def tick_pre_step_all(self, state):
results = list()
for rule in self.rules:
if tick_pre_step_result := rule.tick_post_step(state):
results.extend(tick_pre_step_result)
return results
def tick_post_step_all(self, state):
results = list()
for rule in self.rules:
if tick_post_step_result := rule.tick_post_step(state):
results.extend(tick_post_step_result)
return results
class Gamestate(object):
@property
def moving_entites(self):
return [y for x in self.entities for y in x if x.can_move]
def __init__(self, entitites, rules: Dict[str, dict], env_seed=69, verbose=False):
self.entities = entitites
self.NO_POS_TILE = Floor(c.VALUE_NO_POS)
self.curr_step = 0
self.curr_actions = None
self.verbose = verbose
self.rng = np.random.default_rng(env_seed)
self.rules = StepRules(*(v['class'](**v['kwargs']) for v in rules.values()))
def __getitem__(self, item):
return self.entities[item]
def __iter__(self):
return iter(e for e in self.entities.values())
def __repr__(self):
return f'{self.__class__.__name__}({len(self.entities)} Entitites @ Step {self.curr_step})'
def tick(self, actions) -> List[Result]:
results = list()
self.curr_step += 1
# Main Agent Step
results.extend(self.rules.tick_pre_step_all(self))
for idx, action_int in enumerate(actions):
agent = self[c.AGENT][idx].clear_temp_state()
action = agent.actions[action_int]
action_result = action.do(agent, self)
results.append(action_result)
agent.set_state(action_result)
results.extend(self.rules.tick_step_all(self))
results.extend(self.rules.tick_post_step_all(self))
return results
def print(self, string):
if self.verbose:
print(string)
def check_done(self):
results = list()
for rule in self.rules:
if on_check_done_result := rule.on_check_done(self):
results.extend(on_check_done_result)
return results
def get_all_tiles_with_collisions(self) -> List[Floor]:
tiles = [self[c.FLOOR].by_pos(pos) for pos, e in self.entities.pos_dict.items()
if sum([x.can_collide for x in e]) > 1]
# tiles = [x for x in self[c.FLOOR] if len(x.guests_that_can_collide) > 1]
return tiles

View File

@ -0,0 +1,27 @@
import gymnasium as gym
class EnvCombiner(object):
def __init__(self, *envs_cls):
self._env_dict = {env_cls.__name__: env_cls for env_cls in envs_cls}
@staticmethod
def combine_cls(name, *envs_cls):
return type(name, envs_cls, {})
def build(self):
name = f'{"".join([x.lower().replace("factory").capitalize() for x in self._env_dict.keys()])}Factory'
return self.combine_cls(name, tuple(self._env_dict.values()))
class MarlFrameStack(gym.ObservationWrapper):
"""todo @romue404"""
def __init__(self, env):
super().__init__(env)
def observation(self, observation):
if isinstance(self.env, gym.wrappers.FrameStack) and self.env.unwrapped.n_agents > 1:
return observation[0:].swapaxes(0, 1)
return observation

View File

@ -1,28 +0,0 @@
def make(env_name, pomdp_r=2, max_steps=400, stack_n_frames=3, n_agents=1, individual_rewards=False):
import yaml
from pathlib import Path
from environments.factory.combined_factories import DirtItemFactory
from environments.factory.factory_item import ItemFactory
from environments.factory.additional.item.item_util import ItemProperties
from environments.factory.factory_dirt import DirtFactory
from environments.factory.dirt_util import DirtProperties
from environments.factory.dirt_util import RewardsDirt
from environments.utility_classes import AgentRenderOptions
with (Path(__file__).parent / 'levels' / 'parameters' / f'{env_name}.yaml').open('r') as stream:
dictionary = yaml.load(stream, Loader=yaml.FullLoader)
obs_props = dict(render_agents=AgentRenderOptions.COMBINED,
pomdp_r=pomdp_r,
indicate_door_area=True,
show_global_position_info=False,
frames_to_stack=stack_n_frames)
factory_kwargs = dict(**dictionary,
n_agents=n_agents,
individual_rewards=individual_rewards,
max_steps=max_steps,
obs_prop=obs_props,
verbose=False,
)
return DirtFactory(**factory_kwargs).__enter__()

View File

@ -1,38 +0,0 @@
from typing import Union
from environments.factory.additional.doors.doors_entities import Door
from environments.factory.base.registers import EntityCollection
from environments.factory.additional.doors.doors_util import Constants as c
class Doors(EntityCollection):
def __init__(self, *args, indicate_area=False, **kwargs):
self.indicate_area = indicate_area
self._area_marked = False
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
_accepted_objects = Door
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
try:
return next(door for door in self if position in door.tile.neighboring_floor_pos)
except StopIteration:
return None
def tick_doors(self):
for door in self:
door.tick()
def as_array(self):
if not self._area_marked and self.indicate_area:
for door in self:
for tile in door.tile.neighboring_floor:
if self._individual_slices:
pass
else:
pos = (0, *tile.pos)
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
self._area_marked = True
return super(Doors, self).as_array()

View File

@ -1,71 +0,0 @@
from environments.factory.base.objects import Entity
from environments.factory.additional.doors.doors_util import Constants as c
class Template(Entity):
"""Template for new Entity"""
# How to define / override properties
@property
def is_blocking(self):
return False
@property
def can_collide(self):
return False if self.template_attr else True
@property
def encoding(self):
# This is important as it shadow is checked by occupation value
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
@property
def str_state(self):
return 'open' if self.is_open else 'closed'
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs):
super(Template, self).__init__(*args, **kwargs)
self._state = c.CLOSED_DOOR
self.indicate_area = indicate_area
self.auto_close_interval = auto_close_interval
self.time_to_close = -1
if not closed_on_init:
self._open()
def summarize_state(self):
state_dict = super().summarize_state()
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
return state_dict
@property
def is_closed(self):
return self._state == c.CLOSED_DOOR
@property
def is_open(self):
return self._state == c.OPEN_DOOR
@property
def status(self):
return self._state
def use(self):
if self._state == c.OPEN_DOOR:
self._close()
else:
self._open()
def tick(self):
if self.is_open and len(self.tile) == 1 and self.time_to_close:
self.time_to_close -= 1
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
self.use()
def _open(self):
self._state = c.OPEN_DOOR
self._collection.notify_change_to_value(self)
self.time_to_close = self.auto_close_interval
def _close(self):
self._state = c.CLOSED_DOOR
self._collection.notify_change_to_value(self)

View File

@ -1,31 +0,0 @@
from typing import NamedTuple
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
class Constants(BaseConstants):
DOOR = 'Door' # Identifier of Single-Door Entities.
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files.
ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation
OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation
CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
# ACCESS_DOOR = 'access' # Identifier to compare access positions
class Actions(BaseActions):
USE_DOOR = 'use_door'
class RewardsDoor(NamedTuple):
USE_DOOR_VALID: float = -0.00
USE_DOOR_FAIL: float = -0.01
class DoorProperties(NamedTuple):
indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation.

View File

@ -1,196 +0,0 @@
import time
from typing import List, Union, Dict
import random
import numpy as np
from environments.factory.additional.doors.doors_collections import Doors
from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action
from environments.factory.base.registers import Entities
from environments import helpers as h
from environments.factory.base.renderer import RenderEntity
from environments.utility_classes import ObservationProperties
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum()
def entropy(x):
return -(x * np.log(x + 1e-8)).sum()
c = Constants
a = Actions
# noinspection PyAttributeOutsideInit, PyAbstractClass
class DoorFactory(BaseFactory):
@property
def actions_hook(self) -> Union[Action, List[Action]]:
super_actions = super().actions_hook
super_actions.append(Action(str_ident=a.USE_DOOR))
return super_actions
@property
def entities_hook(self) -> Dict[(str, Entities)]:
super_entities = super().entities_hook
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL)
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
if np.any(parsed_doors):
door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area,
entity_kwargs=dict()
)
super_entities.update(({c.DOORS: doors}))
return super_entities
def __init__(self, *args,
door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(),
env_seed=time.time_ns(), **kwargs):
if isinstance(door_properties, dict):
door_properties = DoorProperties(**door_properties)
if isinstance(rewards_door, dict):
rewards_door = RewardsDoor(**rewards_door)
self.door_properties = door_properties
self.rewards_door = rewards_door
self._door_rng = np.random.default_rng(env_seed)
self._doors: Doors
kwargs.update(env_seed=env_seed)
# TODO: Reset ---> document this
super().__init__(*args, **kwargs)
def render_assets_hook(self, mode='human'):
additional_assets = super().render_assets_hook()
doors = []
for i, door in enumerate(self[c.DOORS]):
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
additional_assets.extend(doors)
return additional_assets
def step_hook(self) -> (List[dict], dict):
super_reward_info = super().step_hook()
# Step the door close intervall
# TODO: Maybe move this to self.post_step_hook? May collide with reward calculation.
if doors := self[c.DOORS]:
doors.tick_doors()
return super_reward_info
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
action_result = super().do_additional_actions(agent, action)
if action_result is None:
if action == a.USE_DOOR:
return self.use_door_action(agent)
else:
return None
else:
return action_result
def use_door_action(self, agent: Agent):
# Check if agent really is standing on a door:
door = self[c.DOORS].get_near_position(agent.pos)
if door is not None:
door.use()
valid = c.VALID
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
# When he doesn't...
else:
valid = c.NOT_VALID
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL,
reason=a.USE_DOOR, info=info_dict)
return valid, reward
def reset_hook(self) -> None:
super().reset_hook()
# There is nothing to reset.
def check_additional_done(self) -> (bool, dict):
super_done, super_dict = super().check_additional_done()
return super_done, super_dict
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
additional_observations = super().observations_hook()
additional_observations.update({c.DOORS: self[c.DOORS].as_array()})
return additional_observations
def post_step_hook(self) -> List[Dict[str, int]]:
super_post_step = super(DoorFactory, self).post_step_hook()
return super_post_step
if __name__ == '__main__':
from environments.utility_classes import AgentRenderOptions as aro
render = True
door_props = DoorProperties(
indicate_door_area=True
)
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True
)
move_props = {'allow_square_movement': True,
'allow_diagonal_movement': False,
'allow_no_op': False}
import time
global_timings = []
for i in range(10):
factory = DoorFactory(n_agents=10, done_at_collision=False,
level_name='rooms', max_steps=1000,
obs_prop=obs_props, parse_doors=True,
verbose=True,
mv_prop=move_props, dirt_prop=door_props,
# inject_agents=[TSPDirtAgent],
)
# noinspection DuplicatedCode
n_actions = factory.action_space.n - 1
_ = factory.observation_space
obs_space = factory.observation_space
obs_space_named = factory.named_observation_space
action_space_named = factory.named_action_space
times = []
for epoch in range(10):
start_time = time.time()
random_actions = [[random.randint(0, n_actions) for _
in range(factory.n_agents)] for _
in range(factory.max_steps+1)]
env_state = factory.reset()
if render:
factory.render()
# tsp_agent = factory.get_injected_agents()[0]
rwrd = 0
for agent_i_action in random_actions:
# agent_i_action = tsp_agent.predict()
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
rwrd += step_rwrd
if render:
factory.render()
if done_bool:
break
times.append(time.time() - start_time)
# print(f'Factory run {epoch} done, reward is:\n {r}')
print('Mean Time Taken: ', sum(times) / 10)
global_timings.extend(times)
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
print('Median Time Taken: ', global_timings[len(global_timings)//2])

View File

@ -1,41 +0,0 @@
from environments.factory.additional.btry.btry_objects import Battery, ChargePod
from environments.factory.base.registers import EnvObjectCollection, EntityCollection
class Batteries(EnvObjectCollection):
_accepted_objects = Battery
def __init__(self, *args, **kwargs):
super(Batteries, self).__init__(*args, individual_slices=True,
is_blocking_light=False, can_be_shadowed=False, **kwargs)
self.is_observable = True
def spawn_batteries(self, agents, initial_charge_level):
batteries = [self._accepted_objects(initial_charge_level, agent, self) for _, agent in enumerate(agents)]
self.add_additional_items(batteries)
# Todo Move this to Mixin!
def by_entity(self, entity):
try:
return next((x for x in self if x.belongs_to_entity(entity)))
except StopIteration:
return None
def idx_by_entity(self, entity):
try:
return next((idx for idx, x in enumerate(self) if x.belongs_to_entity(entity)))
except StopIteration:
return None
def as_array_by_entity(self, entity):
return self._array[self.idx_by_entity(entity)]
class ChargePods(EntityCollection):
_accepted_objects = ChargePod
_stateless_entities = True
def __repr__(self):
super(ChargePods, self).__repr__()

View File

@ -1,30 +0,0 @@
from typing import NamedTuple, Union
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
class Constants(BaseConstants):
# Battery Env
CHARGE_PODS = 'Charge_Pod'
BATTERIES = 'BATTERIES'
BATTERY_DISCHARGED = 'DISCHARGED'
CHARGE_POD = 1
class Actions(BaseActions):
CHARGE = 'do_charge_action'
class RewardsBtry(NamedTuple):
CHARGE_VALID: float = 0.1
CHARGE_FAIL: float = -0.1
BATTERY_DISCHARGED: float = -1.0
class BatteryProperties(NamedTuple):
initial_charge: float = 0.8 #
charge_rate: float = 0.4 #
charge_locations: int = 20 #
per_action_costs: Union[dict, float] = 0.02
done_when_discharged: bool = False
multi_charge: bool = False

View File

@ -1,139 +0,0 @@
from typing import Dict, List
import numpy as np
from environments.factory.additional.btry.btry_collections import Batteries, ChargePods
from environments.factory.additional.btry.btry_util import Constants, Actions, RewardsBtry, BatteryProperties
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action
from environments.factory.base.renderer import RenderEntity
c = Constants
a = Actions
class BatteryFactory(BaseFactory):
def __init__(self, *args, btry_prop=BatteryProperties(), rewards_btry: RewardsBtry = RewardsBtry(),
**kwargs):
if isinstance(btry_prop, dict):
btry_prop = BatteryProperties(**btry_prop)
if isinstance(rewards_btry, dict):
rewards_btry = RewardsBtry(**rewards_btry)
self.btry_prop = btry_prop
self.rewards_dest = rewards_btry
super().__init__(*args, **kwargs)
def per_agent_raw_observations_hook(self, agent) -> Dict[str, np.typing.ArrayLike]:
additional_raw_observations = super().per_agent_raw_observations_hook(agent)
additional_raw_observations.update({c.BATTERIES: self[c.BATTERIES].as_array_by_entity(agent)})
return additional_raw_observations
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
additional_observations = super().observations_hook()
additional_observations.update({c.CHARGE_PODS: self[c.CHARGE_PODS].as_array()})
return additional_observations
@property
def entities_hook(self):
super_entities = super().entities_hook
empty_tiles = self[c.FLOOR].empty_tiles[:self.btry_prop.charge_locations]
charge_pods = ChargePods.from_tiles(
empty_tiles, self._level_shape,
entity_kwargs=dict(charge_rate=self.btry_prop.charge_rate,
multi_charge=self.btry_prop.multi_charge)
)
batteries = Batteries(self._level_shape if not self._pomdp_r else ((self.pomdp_diameter,) * 2),
)
batteries.spawn_batteries(self[c.AGENT], self.btry_prop.initial_charge)
super_entities.update({c.BATTERIES: batteries, c.CHARGE_PODS: charge_pods})
return super_entities
def step_hook(self) -> (List[dict], dict):
super_reward_info = super(BatteryFactory, self).step_hook()
# Decharge
batteries = self[c.BATTERIES]
for agent in self[c.AGENT]:
if isinstance(self.btry_prop.per_action_costs, dict):
energy_consumption = self.btry_prop.per_action_costs[agent.temp_action]
else:
energy_consumption = self.btry_prop.per_action_costs
batteries.by_entity(agent).decharge(energy_consumption)
return super_reward_info
def do_charge_action(self, agent) -> (dict, dict):
if charge_pod := self[c.CHARGE_PODS].by_pos(agent.pos):
valid = charge_pod.charge_battery(self[c.BATTERIES].by_entity(agent))
if valid:
info_dict = {f'{agent.name}_{a.CHARGE}_VALID': 1}
self.print(f'{agent.name} just charged batteries at {charge_pod.name}.')
else:
info_dict = {f'{agent.name}_{a.CHARGE}_FAIL': 1}
self.print(f'{agent.name} failed to charged batteries at {charge_pod.name}.')
else:
valid = c.NOT_VALID
info_dict = {f'{agent.name}_{a.CHARGE}_FAIL': 1}
# info_dict = {f'{agent.name}_no_charger': 1}
self.print(f'{agent.name} failed to charged batteries at {agent.pos}.')
reward = dict(value=self.rewards_dest.CHARGE_VALID if valid else self.rewards_dest.CHARGE_FAIL,
reason=a.CHARGE, info=info_dict)
return valid, reward
def do_additional_actions(self, agent: Agent, action: Action) -> (bool, dict):
action_result = super().do_additional_actions(agent, action)
if action_result is None:
if action == a.CHARGE:
action_result = self.do_charge_action(agent)
return action_result
else:
return None
else:
return action_result
pass
def reset_hook(self) -> (List[dict], dict):
super_reward_info = super(BatteryFactory, self).reset_hook()
# There is Nothing to reset.
return super_reward_info
def check_additional_done(self) -> (bool, dict):
super_done, super_dict = super(BatteryFactory, self).check_additional_done()
if super_done:
return super_done, super_dict
else:
if self.btry_prop.done_when_discharged:
if btry_done := any(battery.is_discharged for battery in self[c.BATTERIES]):
super_dict.update(DISCHARGE_DONE=1)
return btry_done, super_dict
else:
pass
else:
pass
return super_done, super_dict
def per_agent_reward_hook(self, agent: Agent) -> List[dict]:
reward_event_list = super(BatteryFactory, self).per_agent_reward_hook(agent)
if self[c.BATTERIES].by_entity(agent).is_discharged:
self.print(f'{agent.name} Battery is discharged!')
info_dict = {f'{agent.name}_{c.BATTERY_DISCHARGED}': 1}
reward_event_list.append({'value': self.rewards_dest.BATTERY_DISCHARGED,
'reason': c.BATTERY_DISCHARGED,
'info': info_dict}
)
else:
# All Fine
pass
return reward_event_list
def render_assets_hook(self):
# noinspection PyUnresolvedReferences
additional_assets = super().render_assets_hook()
charge_pods = [RenderEntity(c.CHARGE_PODS, charge_pod.tile.pos) for charge_pod in self[c.CHARGE_PODS]]
additional_assets.extend(charge_pods)
return additional_assets

View File

@ -1,82 +0,0 @@
import random
# noinspection PyAbstractClass
from environments.factory.additional.btry.btry_util import BatteryProperties
from environments.factory.additional.btry.factory_battery import BatteryFactory
from environments.factory.additional.dest.factory_dest import DestFactory
from environments.factory.additional.dirt.dirt_util import DirtProperties
from environments.factory.additional.dirt.factory_dirt import DirtFactory
from environments.factory.additional.doors.factory_doors import DoorFactory
from environments.factory.additional.item.factory_item import ItemFactory
# noinspection PyAbstractClass
class DoorDirtFactory(DoorFactory, DirtFactory):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# noinspection PyAbstractClass
class DirtItemFactory(ItemFactory, DirtFactory):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# noinspection PyAbstractClass
class DirtBatteryFactory(DirtFactory, BatteryFactory):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# noinspection PyAbstractClass
class DirtDestItemFactory(ItemFactory, DirtFactory, DestFactory):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# noinspection PyAbstractClass
class DestBatteryFactory(BatteryFactory, DestFactory):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if __name__ == '__main__':
from environments.utility_classes import AgentRenderOptions as ARO, ObservationProperties
render = True
obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True,
pomdp_r=2, additional_agent_placeholder=None)
move_props = {'allow_square_movement': True,
'allow_diagonal_movement': False,
'allow_no_op': False}
factory = DoorDirtFactory(n_agents=10, done_at_collision=False,
level_name='rooms', max_steps=400,
obs_prop=obs_props, parse_doors=True,
record_episodes=True, verbose=True,
dirt_prop=DirtProperties(),
mv_prop=move_props)
# noinspection DuplicatedCode
n_actions = factory.action_space.n - 1
_ = factory.observation_space
for epoch in range(4):
random_actions = [[random.randint(0, n_actions) for _
in range(factory.n_agents)] for _
in range(factory.max_steps + 1)]
env_state = factory.reset()
r = 0
for agent_i_action in random_actions:
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
r += step_r
if render:
factory.render()
if done_bool:
break
print(f'Factory run {epoch} done, reward is:\n {r}')
pass

View File

@ -1,38 +0,0 @@
from environments.factory.base.registers import EntityCollection
from environments.factory.additional.dest.dest_util import Constants as c
from environments.factory.additional.dest.dest_enitites import Destination
class Destinations(EntityCollection):
_accepted_objects = Destination
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.is_blocking_light = False
self.can_be_shadowed = False
def as_array(self):
self._array[:] = c.FREE_CELL
# ToDo: Switch to new Style Array Put
# indices = list(zip(range(len(cls)), *zip(*[x.pos for x in cls])))
# np.put(cls._array, [np.ravel_multi_index(x, cls._array.shape) for x in indices], cls.encodings)
for item in self:
if item.pos != c.NO_POS:
self._array[0, item.x, item.y] = item.encoding
return self._array
def __repr__(self):
return super(Destinations, self).__repr__()
class ReachedDestinations(Destinations):
_accepted_objects = Destination
def __init__(self, *args, **kwargs):
super(ReachedDestinations, self).__init__(*args, **kwargs)
self.can_be_shadowed = False
self.is_blocking_light = False
def __repr__(self):
return super(ReachedDestinations, self).__repr__()

View File

@ -1,41 +0,0 @@
from typing import NamedTuple
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
class Constants(BaseConstants):
# Destination Env
DEST = 'Destination'
DESTINATION = 1
DESTINATION_DONE = 0.5
DEST_REACHED = 'ReachedDestination'
class Actions(BaseActions):
WAIT_ON_DEST = 'WAIT'
class RewardsDest(NamedTuple):
WAIT_VALID: float = 0.1
WAIT_FAIL: float = -0.1
DEST_REACHED: float = 5.0
class DestModeOptions(object):
DONE = 'DONE'
GROUPED = 'GROUPED'
PER_DEST = 'PER_DEST'
class DestProperties(NamedTuple):
n_dests: int = 1 # How many destinations are there
dwell_time: int = 0 # How long does the agent need to "wait" on a destination
spawn_frequency: int = 0
spawn_in_other_zone: bool = True #
spawn_mode: str = DestModeOptions.DONE
assert dwell_time >= 0, 'dwell_time cannot be < 0!'
assert spawn_frequency >= 0, 'spawn_frequency cannot be < 0!'
assert n_dests >= 0, 'n_destinations cannot be < 0!'
assert (spawn_mode == DestModeOptions.DONE) != bool(spawn_frequency)

View File

@ -1,203 +0,0 @@
import time
from enum import Enum
from typing import List, Union, Dict
import numpy as np
import random
from environments.factory.additional.dest.dest_collections import Destinations, ReachedDestinations
from environments.factory.additional.dest.dest_enitites import Destination
from environments.factory.additional.dest.dest_util import Constants, Actions, RewardsDest, DestModeOptions, \
DestProperties
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action
from environments.factory.base.registers import Entities
from environments.factory.base.renderer import RenderEntity
c = Constants
a = Actions
# noinspection PyAttributeOutsideInit, PyAbstractClass
class DestFactory(BaseFactory):
# noinspection PyMissingConstructor
def __init__(self, *args, dest_prop: DestProperties = DestProperties(), rewards_dest: RewardsDest = RewardsDest(),
env_seed=time.time_ns(), **kwargs):
if isinstance(dest_prop, dict):
dest_prop = DestProperties(**dest_prop)
if isinstance(rewards_dest, dict):
rewards_dest = RewardsDest(**rewards_dest)
self.dest_prop = dest_prop
self.rewards_dest = rewards_dest
kwargs.update(env_seed=env_seed)
self._dest_rng = np.random.default_rng(env_seed)
super().__init__(*args, **kwargs)
@property
def actions_hook(self) -> Union[Action, List[Action]]:
# noinspection PyUnresolvedReferences
super_actions = super().actions_hook
# If targets are considers reached after some time, agents need an action for that.
if self.dest_prop.dwell_time:
super_actions.append(Action(enum_ident=a.WAIT_ON_DEST))
return super_actions
@property
def entities_hook(self) -> Dict[(Enum, Entities)]:
# noinspection PyUnresolvedReferences
super_entities = super().entities_hook
empty_tiles = self[c.FLOOR].empty_tiles[:self.dest_prop.n_dests]
destinations = Destinations.from_tiles(
empty_tiles, self._level_shape,
entity_kwargs=dict(
dwell_time=self.dest_prop.dwell_time)
)
reached_destinations = ReachedDestinations(level_shape=self._level_shape)
super_entities.update({c.DEST: destinations, c.DEST_REACHED: reached_destinations})
return super_entities
def do_wait_action(self, agent: Agent) -> (dict, dict):
if destination := self[c.DEST].by_pos(agent.pos):
valid = destination.do_wait_action(agent)
self.print(f'{agent.name} just waited at {agent.pos}')
info_dict = {f'{agent.name}_{a.WAIT_ON_DEST}_VALID': 1}
else:
valid = c.NOT_VALID
self.print(f'{agent.name} just tried to do_wait_action do_wait_action at {agent.pos} but failed')
info_dict = {f'{agent.name}_{a.WAIT_ON_DEST}_FAIL': 1}
reward = dict(value=self.rewards_dest.WAIT_VALID if valid else self.rewards_dest.WAIT_FAIL,
reason=a.WAIT_ON_DEST, info=info_dict)
return valid, reward
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
# noinspection PyUnresolvedReferences
super_action_result = super().do_additional_actions(agent, action)
if super_action_result is None:
if action == a.WAIT_ON_DEST:
action_result = self.do_wait_action(agent)
return action_result
else:
return None
else:
return super_action_result
def reset_hook(self) -> None:
# noinspection PyUnresolvedReferences
super().reset_hook()
self._dest_spawn_timer = dict()
def trigger_destination_spawn(self):
destinations_to_spawn = [key for key, val in self._dest_spawn_timer.items()
if val == self.dest_prop.spawn_frequency]
if destinations_to_spawn:
n_dest_to_spawn = len(destinations_to_spawn)
if self.dest_prop.spawn_mode != DestModeOptions.GROUPED:
destinations = [Destination(tile, self[c.DEST]) for tile in self[c.FLOOR].empty_tiles[:n_dest_to_spawn]]
self[c.DEST].add_additional_items(destinations)
for dest in destinations_to_spawn:
del self._dest_spawn_timer[dest]
self.print(f'{n_dest_to_spawn} new destinations have been spawned')
elif self.dest_prop.spawn_mode == DestModeOptions.GROUPED and n_dest_to_spawn == self.dest_prop.n_dests:
destinations = [Destination(tile, self[c.DEST]) for tile in self[c.FLOOR].empty_tiles[:n_dest_to_spawn]]
self[c.DEST].add_additional_items(destinations)
for dest in destinations_to_spawn:
del self._dest_spawn_timer[dest]
self.print(f'{n_dest_to_spawn} new destinations have been spawned')
else:
self.print(f'{n_dest_to_spawn} new destinations could be spawned, but waiting for all.')
pass
else:
self.print('No Items are spawning, limit is reached.')
def step_hook(self) -> (List[dict], dict):
# noinspection PyUnresolvedReferences
super_reward_info = super().step_hook()
for key, val in self._dest_spawn_timer.items():
self._dest_spawn_timer[key] = min(self.dest_prop.spawn_frequency, self._dest_spawn_timer[key] + 1)
for dest in list(self[c.DEST].values()):
if dest.is_considered_reached:
dest.change_parent_collection(self[c.DEST_REACHED])
self._dest_spawn_timer[dest.name] = 0
self.print(f'{dest.name} is reached now, removing...')
else:
for agent_name in dest.currently_dwelling_names:
agent = self[c.AGENT].by_name(agent_name)
if agent.pos == dest.pos:
self.print(f'{agent.name} is still waiting.')
pass
else:
dest.leave(agent)
self.print(f'{agent.name} left the destination early.')
self.trigger_destination_spawn()
return super_reward_info
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
additional_observations = super().observations_hook()
additional_observations.update({c.DEST: self[c.DEST].as_array()})
return additional_observations
def per_agent_reward_hook(self, agent: Agent) -> List[dict]:
# noinspection PyUnresolvedReferences
reward_event_list = super().per_agent_reward_hook(agent)
if len(self[c.DEST_REACHED]):
for reached_dest in list(self[c.DEST_REACHED]):
if agent.pos == reached_dest.pos:
self.print(f'{agent.name} just reached destination at {agent.pos}')
self[c.DEST_REACHED].delete_env_object(reached_dest)
info_dict = {f'{agent.name}_{c.DEST_REACHED}': 1}
reward_event_list.append({'value': self.rewards_dest.DEST_REACHED,
'reason': c.DEST_REACHED,
'info': info_dict})
return reward_event_list
def render_assets_hook(self, mode='human'):
# noinspection PyUnresolvedReferences
additional_assets = super().render_assets_hook()
destinations = [RenderEntity(c.DEST, dest.pos) for dest in self[c.DEST]]
additional_assets.extend(destinations)
return additional_assets
if __name__ == '__main__':
from environments.utility_classes import AgentRenderOptions as aro, ObservationProperties
render = True
dest_probs = DestProperties(n_dests=2, spawn_frequency=5, spawn_mode=DestModeOptions.GROUPED)
obs_props = ObservationProperties(render_agents=aro.LEVEL, omit_agent_self=True, pomdp_r=2)
move_props = {'allow_square_movement': True,
'allow_diagonal_movement': False,
'allow_no_op': False}
factory = DestFactory(n_agents=10, done_at_collision=False,
level_name='rooms', max_steps=400,
obs_prop=obs_props, parse_doors=True,
verbose=True,
mv_prop=move_props, dest_prop=dest_probs
)
# noinspection DuplicatedCode
n_actions = factory.action_space.n - 1
_ = factory.observation_space
for epoch in range(4):
random_actions = [[random.randint(0, n_actions) for _
in range(factory.n_agents)] for _
in range(factory.max_steps + 1)]
env_state = factory.reset()
r = 0
for agent_i_action in random_actions:
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
r += step_r
if render:
factory.render()
if done_bool:
break
print(f'Factory run {epoch} done, reward is:\n {r}')
pass

View File

@ -1,44 +0,0 @@
from environments.factory.additional.dirt.dirt_entity import DirtPile
from environments.factory.additional.dirt.dirt_util import DirtProperties
from environments.factory.base.objects import Floor
from environments.factory.base.registers import EntityCollection
from environments.factory.additional.dirt.dirt_util import Constants as c
class DirtPiles(EntityCollection):
_accepted_objects = DirtPile
@property
def amount(self):
return sum([dirt.amount for dirt in self])
@property
def dirt_properties(self):
return self._dirt_properties
def __init__(self, dirt_properties, *args):
super(DirtPiles, self).__init__(*args)
self._dirt_properties: DirtProperties = dirt_properties
def spawn_dirt(self, then_dirty_tiles) -> bool:
if isinstance(then_dirty_tiles, Floor):
then_dirty_tiles = [then_dirty_tiles]
for tile in then_dirty_tiles:
if not self.amount > self.dirt_properties.max_global_amount:
dirt = self.by_pos(tile.pos)
if dirt is None:
if len(tile.guests) > 1:
return c.NOT_VALID
dirt = DirtPile(tile, self, amount=self.dirt_properties.max_spawn_amount)
self.add_item(dirt)
else:
new_value = dirt.amount + self.dirt_properties.max_spawn_amount
dirt.set_new_amount(min(new_value, self.dirt_properties.max_local_amount))
else:
return c.NOT_VALID
return c.VALID
def __repr__(self):
s = super(DirtPiles, self).__repr__()
return f'{s[:-1]}, {self.amount})'

View File

@ -1,26 +0,0 @@
from environments.factory.base.objects import Entity
class DirtPile(Entity):
@property
def amount(self):
return self._amount
@property
def encoding(self):
# Edit this if you want items to be drawn in the ops differntly
return self._amount
def __init__(self, *args, amount=None, **kwargs):
super(DirtPile, self).__init__(*args, **kwargs)
self._amount = amount
def set_new_amount(self, amount):
self._amount = amount
self._collection.notify_change_to_value(self)
def summarize_state(self):
state_dict = super().summarize_state()
state_dict.update(amount=float(self.amount))
return state_dict

View File

@ -1,30 +0,0 @@
from typing import NamedTuple
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
class Constants(BaseConstants):
DIRT = 'DirtPile'
class Actions(BaseActions):
CLEAN_UP = 'do_cleanup_action'
class RewardsDirt(NamedTuple):
CLEAN_UP_VALID: float = 0.5
CLEAN_UP_FAIL: float = -0.1
CLEAN_UP_LAST_PIECE: float = 4.5
class DirtProperties(NamedTuple):
initial_dirt_ratio: float = 0.3 # On INIT, on max how many tiles does the dirt spawn in percent.
initial_dirt_spawn_r_var: float = 0.05 # How much does the dirt spawn amount vary?
clean_amount: float = 1 # How much does the robot clean with one actions.
max_spawn_ratio: float = 0.20 # On max how many tiles does the dirt spawn in percent.
max_spawn_amount: float = 0.3 # How much dirt does spawn per tile at max.
spawn_frequency: int = 0 # Spawn Frequency in Steps.
max_local_amount: int = 2 # Max dirt amount per tile.
max_global_amount: int = 20 # Max dirt amount in the whole environment.
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place.
done_when_clean: bool = True

View File

@ -1,252 +0,0 @@
import time
from pathlib import Path
from typing import List, Union, Dict
import random
import numpy as np
from environments.factory.additional.dirt.dirt_collections import DirtPiles
from environments.factory.additional.dirt.dirt_entity import DirtPile
from environments.factory.additional.dirt.dirt_util import Constants, Actions, RewardsDirt, DirtProperties
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action
from environments.factory.base.registers import Entities
from environments.factory.base.renderer import RenderEntity
from environments.utility_classes import ObservationProperties
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum()
def entropy(x):
return -(x * np.log(x + 1e-8)).sum()
c = Constants
a = Actions
# noinspection PyAttributeOutsideInit, PyAbstractClass
class DirtFactory(BaseFactory):
@property
def actions_hook(self) -> Union[Action, List[Action]]:
super_actions = super().actions_hook
super_actions.append(Action(str_ident=a.CLEAN_UP))
return super_actions
@property
def entities_hook(self) -> Dict[(str, Entities)]:
super_entities = super().entities_hook
dirt_register = DirtPiles(self.dirt_prop, self._level_shape)
super_entities.update({c.DIRT: dirt_register})
return super_entities
def __init__(self, *args,
dirt_prop: DirtProperties = DirtProperties(), rewards_dirt: RewardsDirt = RewardsDirt(),
env_seed=time.time_ns(), **kwargs):
if isinstance(dirt_prop, dict):
dirt_prop = DirtProperties(**dirt_prop)
if isinstance(rewards_dirt, dict):
rewards_dirt = RewardsDirt(**rewards_dirt)
self.dirt_prop = dirt_prop
self.rewards_dirt = rewards_dirt
self._dirt_rng = np.random.default_rng(env_seed)
self._dirt: DirtPiles
kwargs.update(env_seed=env_seed)
# TODO: Reset ---> document this
super().__init__(*args, **kwargs)
def render_assets_hook(self, mode='human'):
additional_assets = super().render_assets_hook()
dirt = [RenderEntity('dirt', dirt.tile.pos, min(0.15 + dirt.amount, 1.5), 'scale')
for dirt in self[c.DIRT]]
additional_assets.extend(dirt)
return additional_assets
def do_cleanup_action(self, agent: Agent) -> (dict, dict):
if dirt := self[c.DIRT].by_pos(agent.pos):
new_dirt_amount = dirt.amount - self.dirt_prop.clean_amount
if new_dirt_amount <= 0:
self[c.DIRT].delete_env_object(dirt)
else:
dirt.set_new_amount(max(new_dirt_amount, c.FREE_CELL.value))
valid = c.VALID
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
info_dict = {f'{agent.name}_{a.CLEAN_UP}_VALID': 1, 'cleanup_valid': 1}
reward = self.rewards_dirt.CLEAN_UP_VALID
else:
valid = c.NOT_VALID
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
info_dict = {f'{agent.name}_{a.CLEAN_UP}_FAIL': 1, 'cleanup_fail': 1}
reward = self.rewards_dirt.CLEAN_UP_FAIL
if valid and self.dirt_prop.done_when_clean and (len(self[c.DIRT]) == 0):
reward += self.rewards_dirt.CLEAN_UP_LAST_PIECE
self.print(f'{agent.name} picked up the last piece of dirt!')
info_dict = {f'{agent.name}_{a.CLEAN_UP}_LAST_PIECE': 1}
return valid, dict(value=reward, reason=a.CLEAN_UP, info=info_dict)
def trigger_dirt_spawn(self, initial_spawn=False):
dirt_rng = self._dirt_rng
free_for_dirt = [x for x in self[c.FLOOR]
if len(x.guests) == 0 or (len(x.guests) == 1 and isinstance(next(y for y in x.guests), DirtPile))
]
self._dirt_rng.shuffle(free_for_dirt)
if initial_spawn:
var = self.dirt_prop.initial_dirt_spawn_r_var
new_spawn = self.dirt_prop.initial_dirt_ratio + dirt_rng.uniform(-var, var)
else:
new_spawn = dirt_rng.uniform(0, self.dirt_prop.max_spawn_ratio)
n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt)))
self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles])
def step_hook(self) -> (List[dict], dict):
super_reward_info = super().step_hook()
if smear_amount := self.dirt_prop.dirt_smear_amount:
for agent in self[c.AGENT]:
if agent.step_result['action_valid'] and agent.last_pos != c.NO_POS:
if self._actions.is_moving_action(agent.step_result['action_name']):
if old_pos_dirt := self[c.DIRT].by_pos(agent.last_pos):
if smeared_dirt := round(old_pos_dirt.amount * smear_amount, 2):
old_pos_dirt.set_new_amount(max(0, old_pos_dirt.amount-smeared_dirt))
if new_pos_dirt := self[c.DIRT].by_pos(agent.pos):
new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt))
else:
if self[c.DIRT].spawn_dirt(agent.tile):
new_pos_dirt = self[c.DIRT].by_pos(agent.pos)
new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt))
if self._next_dirt_spawn < 0:
pass # No DirtPile Spawn
elif not self._next_dirt_spawn:
self.trigger_dirt_spawn()
self._next_dirt_spawn = self.dirt_prop.spawn_frequency
else:
self._next_dirt_spawn -= 1
return super_reward_info
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
action_result = super().do_additional_actions(agent, action)
if action_result is None:
if action == a.CLEAN_UP:
return self.do_cleanup_action(agent)
else:
return None
else:
return action_result
def reset_hook(self) -> None:
super().reset_hook()
self.trigger_dirt_spawn(initial_spawn=True)
self._next_dirt_spawn = self.dirt_prop.spawn_frequency if self.dirt_prop.spawn_frequency else -1
def check_additional_done(self) -> (bool, dict):
super_done, super_dict = super().check_additional_done()
if self.dirt_prop.done_when_clean:
if all_cleaned := len(self[c.DIRT]) == 0:
super_dict.update(ALL_CLEAN_DONE=all_cleaned)
return all_cleaned, super_dict
return super_done, super_dict
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
additional_observations = super().observations_hook()
additional_observations.update({c.DIRT: self[c.DIRT].as_array()})
return additional_observations
def post_step_hook(self) -> List[Dict[str, int]]:
super_post_step = super(DirtFactory, self).post_step_hook()
info_dict = dict()
dirt = [dirt.amount for dirt in self[c.DIRT]]
current_dirt_amount = sum(dirt)
dirty_tile_count = len(dirt)
# if dirty_tile_count:
# dirt_distribution_score = entropy(softmax(np.asarray(dirt)) / dirty_tile_count)
# else:
# dirt_distribution_score = 0
info_dict.update(dirt_amount=current_dirt_amount)
info_dict.update(dirty_tile_count=dirty_tile_count)
super_post_step.append(info_dict)
return super_post_step
if __name__ == '__main__':
from environments.utility_classes import AgentRenderOptions as aro
render = True
dirt_props = DirtProperties(
initial_dirt_ratio=0.35,
initial_dirt_spawn_r_var=0.1,
clean_amount=0.34,
max_spawn_amount=0.1,
max_global_amount=20,
max_local_amount=1,
spawn_frequency=0,
max_spawn_ratio=0.05,
dirt_smear_amount=0.0
)
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True,
indicate_door_area=False)
move_props = {'allow_square_movement': True,
'allow_diagonal_movement': False,
'allow_no_op': False}
import time
global_timings = []
for i in range(10):
factory = DirtFactory(n_agents=10, done_at_collision=False,
level_name='rooms', max_steps=1000,
doors_have_area=False,
obs_prop=obs_props, parse_doors=True,
verbose=True,
mv_prop=move_props, dirt_prop=dirt_props,
# inject_agents=[TSPDirtAgent],
)
# noinspection DuplicatedCode
n_actions = factory.action_space.n - 1
_ = factory.observation_space
obs_space = factory.observation_space
obs_space_named = factory.named_observation_space
action_space_named = factory.named_action_space
times = []
for epoch in range(10):
start_time = time.time()
random_actions = [[random.randint(0, n_actions) for _
in range(factory.n_agents)] for _
in range(factory.max_steps+1)]
env_state = factory.reset()
if render:
factory.render()
# tsp_agent = factory.get_injected_agents()[0]
rwrd = 0
for agent_i_action in random_actions:
# agent_i_action = tsp_agent.predict()
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
rwrd += step_rwrd
if render:
factory.render()
if done_bool:
break
times.append(time.time() - start_time)
# print(f'Factory run {epoch} done, reward is:\n {r}')
print('Mean Time Taken: ', sum(times) / 10)
global_timings.extend(times)
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
print('Median Time Taken: ', global_timings[len(global_timings)//2])
pass

View File

@ -1,38 +0,0 @@
from typing import Union
from environments.factory.additional.doors.doors_entities import Door
from environments.factory.base.registers import EntityCollection
from environments.factory.additional.doors.doors_util import Constants as c
class Doors(EntityCollection):
def __init__(self, *args, indicate_area=False, **kwargs):
self.indicate_area = indicate_area
self._area_marked = False
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
_accepted_objects = Door
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
try:
return next(door for door in self if position in door.tile.neighboring_floor_pos)
except StopIteration:
return None
def tick_doors(self):
for door in self:
door.tick()
def as_array(self):
if not self._area_marked and self.indicate_area:
for door in self:
for tile in door.tile.neighboring_floor:
if self._individual_slices:
pass
else:
pos = (0, *tile.pos)
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
self._area_marked = True
return super(Doors, self).as_array()

View File

@ -1,69 +0,0 @@
from environments.factory.base.objects import Entity
from environments.factory.additional.doors.doors_util import Constants as c
class Door(Entity):
@property
def is_blocking(self):
return False if self.is_open else True
@property
def can_collide(self):
return False if self.is_open else True
@property
def encoding(self):
# This is important as it shadow is checked by occupation value
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
@property
def str_state(self):
return 'open' if self.is_open else 'closed'
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs):
super(Door, self).__init__(*args, **kwargs)
self._state = c.CLOSED_DOOR
self.indicate_area = indicate_area
self.auto_close_interval = auto_close_interval
self.time_to_close = -1
if not closed_on_init:
self._open()
def summarize_state(self):
state_dict = super().summarize_state()
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
return state_dict
@property
def is_closed(self):
return self._state == c.CLOSED_DOOR
@property
def is_open(self):
return self._state == c.OPEN_DOOR
@property
def status(self):
return self._state
def use(self):
if self._state == c.OPEN_DOOR:
self._close()
else:
self._open()
def tick(self):
if self.is_open and len(self.tile) == 1 and self.time_to_close:
self.time_to_close -= 1
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
self.use()
def _open(self):
self._state = c.OPEN_DOOR
self._collection.notify_change_to_value(self)
self.time_to_close = self.auto_close_interval
def _close(self):
self._state = c.CLOSED_DOOR
self._collection.notify_change_to_value(self)

View File

@ -1,31 +0,0 @@
from typing import NamedTuple
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
class Constants(BaseConstants):
DOOR = 'Door' # Identifier of Single-Door Entities.
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files.
ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation
OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation
CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
# ACCESS_DOOR = 'access' # Identifier to compare access positions
class Actions(BaseActions):
USE_DOOR = 'use_door'
class RewardsDoor(NamedTuple):
USE_DOOR_VALID: float = -0.00
USE_DOOR_FAIL: float = -0.01
class DoorProperties(NamedTuple):
indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation.

View File

@ -1,196 +0,0 @@
import time
from typing import List, Union, Dict
import random
import numpy as np
from environments.factory.additional.doors.doors_collections import Doors
from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action
from environments.factory.base.registers import Entities
from environments import helpers as h
from environments.factory.base.renderer import RenderEntity
from environments.utility_classes import ObservationProperties
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum()
def entropy(x):
return -(x * np.log(x + 1e-8)).sum()
c = Constants
a = Actions
# noinspection PyAttributeOutsideInit, PyAbstractClass
class DoorFactory(BaseFactory):
@property
def actions_hook(self) -> Union[Action, List[Action]]:
super_actions = super().actions_hook
super_actions.append(Action(str_ident=a.USE_DOOR))
return super_actions
@property
def entities_hook(self) -> Dict[(str, Entities)]:
super_entities = super().entities_hook
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL)
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
if np.any(parsed_doors):
door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area,
entity_kwargs=dict()
)
super_entities.update(({c.DOORS: doors}))
return super_entities
def __init__(self, *args,
door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(),
env_seed=time.time_ns(), **kwargs):
if isinstance(door_properties, dict):
door_properties = DoorProperties(**door_properties)
if isinstance(rewards_door, dict):
rewards_door = RewardsDoor(**rewards_door)
self.door_properties = door_properties
self.rewards_door = rewards_door
self._door_rng = np.random.default_rng(env_seed)
self._doors: Doors
kwargs.update(env_seed=env_seed)
# TODO: Reset ---> document this
super().__init__(*args, **kwargs)
def render_assets_hook(self, mode='human'):
additional_assets = super().render_assets_hook()
doors = []
for i, door in enumerate(self[c.DOORS]):
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
additional_assets.extend(doors)
return additional_assets
def step_hook(self) -> (List[dict], dict):
super_reward_info = super().step_hook()
# Step the door close intervall
# TODO: Maybe move this to self.post_step_hook? May collide with reward calculation.
if doors := self[c.DOORS]:
doors.tick_doors()
return super_reward_info
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
action_result = super().do_additional_actions(agent, action)
if action_result is None:
if action == a.USE_DOOR:
return self.use_door_action(agent)
else:
return None
else:
return action_result
def use_door_action(self, agent: Agent):
# Check if agent really is standing on a door:
door = self[c.DOORS].get_near_position(agent.pos)
if door is not None:
door.use()
valid = c.VALID
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
# When he doesn't...
else:
valid = c.NOT_VALID
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL,
reason=a.USE_DOOR, info=info_dict)
return valid, reward
def reset_hook(self) -> None:
super().reset_hook()
# There is nothing to reset.
def check_additional_done(self) -> (bool, dict):
super_done, super_dict = super().check_additional_done()
return super_done, super_dict
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
additional_observations = super().observations_hook()
additional_observations.update({c.DOORS: self[c.DOORS].as_array()})
return additional_observations
def post_step_hook(self) -> List[Dict[str, int]]:
super_post_step = super(DoorFactory, self).post_step_hook()
return super_post_step
if __name__ == '__main__':
from environments.utility_classes import AgentRenderOptions as aro
render = True
door_props = DoorProperties(
indicate_door_area=True
)
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True
)
move_props = {'allow_square_movement': True,
'allow_diagonal_movement': False,
'allow_no_op': False}
import time
global_timings = []
for i in range(10):
factory = DoorFactory(n_agents=10, done_at_collision=False,
level_name='rooms', max_steps=1000,
obs_prop=obs_props, parse_doors=True,
verbose=True,
mv_prop=move_props, dirt_prop=door_props,
# inject_agents=[TSPDirtAgent],
)
# noinspection DuplicatedCode
n_actions = factory.action_space.n - 1
_ = factory.observation_space
obs_space = factory.observation_space
obs_space_named = factory.named_observation_space
action_space_named = factory.named_action_space
times = []
for epoch in range(10):
start_time = time.time()
random_actions = [[random.randint(0, n_actions) for _
in range(factory.n_agents)] for _
in range(factory.max_steps+1)]
env_state = factory.reset()
if render:
factory.render()
# tsp_agent = factory.get_injected_agents()[0]
rwrd = 0
for agent_i_action in random_actions:
# agent_i_action = tsp_agent.predict()
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
rwrd += step_rwrd
if render:
factory.render()
if done_bool:
break
times.append(time.time() - start_time)
# print(f'Factory run {epoch} done, reward is:\n {r}')
print('Mean Time Taken: ', sum(times) / 10)
global_timings.extend(times)
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
print('Median Time Taken: ', global_timings[len(global_timings)//2])

View File

@ -1,193 +0,0 @@
import time
from typing import List, Union, Dict
import numpy as np
import random
from environments.factory.additional.item.item_collections import Items, Inventories, DropOffLocations
from environments.factory.additional.item.item_util import Constants, Actions, RewardsItem, ItemProperties
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action
from environments.factory.base.registers import Entities
from environments.factory.base.renderer import RenderEntity
c = Constants
a = Actions
# noinspection PyAttributeOutsideInit, PyAbstractClass
class ItemFactory(BaseFactory):
# noinspection PyMissingConstructor
def __init__(self, *args, item_prop: ItemProperties = ItemProperties(), env_seed=time.time_ns(),
rewards_item: RewardsItem = RewardsItem(), **kwargs):
if isinstance(item_prop, dict):
item_prop = ItemProperties(**item_prop)
if isinstance(rewards_item, dict):
rewards_item = RewardsItem(**rewards_item)
self.item_prop = item_prop
self.rewards_item = rewards_item
kwargs.update(env_seed=env_seed)
self._item_rng = np.random.default_rng(env_seed)
assert (item_prop.n_items <= ((1 + kwargs.get('_pomdp_r', 0) * 2) ** 2)) or not kwargs.get('_pomdp_r', 0)
super().__init__(*args, **kwargs)
@property
def actions_hook(self) -> Union[Action, List[Action]]:
# noinspection PyUnresolvedReferences
super_actions = super().actions_hook
super_actions.append(Action(str_ident=a.ITEM_ACTION))
return super_actions
@property
def entities_hook(self) -> Dict[(str, Entities)]:
# noinspection PyUnresolvedReferences
super_entities = super().entities_hook
empty_tiles = self[c.FLOOR].empty_tiles[:self.item_prop.n_drop_off_locations]
drop_offs = DropOffLocations.from_tiles(
empty_tiles, self._level_shape,
entity_kwargs=dict(
storage_size_until_full=self.item_prop.max_dropoff_storage_size)
)
item_register = Items(self._level_shape)
empty_tiles = self[c.FLOOR].empty_tiles[:self.item_prop.n_items]
item_register.spawn_items(empty_tiles)
inventories = Inventories(self._obs_shape, self._level_shape)
inventories.spawn_inventories(self[c.AGENT], self.item_prop.max_agent_inventory_capacity)
super_entities.update({c.DROP_OFF: drop_offs, c.ITEM: item_register, c.INVENTORY: inventories})
return super_entities
def per_agent_raw_observations_hook(self, agent) -> Dict[str, np.typing.ArrayLike]:
additional_raw_observations = super().per_agent_raw_observations_hook(agent)
additional_raw_observations.update({c.INVENTORY: self[c.INVENTORY].by_entity(agent).as_array()})
return additional_raw_observations
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
additional_observations = super().observations_hook()
additional_observations.update({c.ITEM: self[c.ITEM].as_array()})
additional_observations.update({c.DROP_OFF: self[c.DROP_OFF].as_array()})
return additional_observations
def do_item_action(self, agent: Agent) -> (dict, dict):
inventory = self[c.INVENTORY].by_entity(agent)
if drop_off := self[c.DROP_OFF].by_pos(agent.pos):
if inventory:
valid = drop_off.place_item(inventory.pop())
else:
valid = c.NOT_VALID
if valid:
self.print(f'{agent.name} just dropped of an item at {drop_off.pos}.')
info_dict = {f'{agent.name}_DROPOFF_VALID': 1, 'DROPOFF_VALID': 1}
else:
self.print(f'{agent.name} just tried to drop off at {agent.pos}, but failed.')
info_dict = {f'{agent.name}_DROPOFF_FAIL': 1, 'DROPOFF_FAIL': 1}
reward = dict(value=self.rewards_item.DROP_OFF_VALID if valid else self.rewards_item.DROP_OFF_FAIL,
reason=a.ITEM_ACTION, info=info_dict)
return valid, reward
elif item := self[c.ITEM].by_pos(agent.pos):
item.change_parent_collection(inventory)
item.set_tile_to(self._NO_POS_TILE)
self.print(f'{agent.name} just picked up an item at {agent.pos}')
info_dict = {f'{agent.name}_{a.ITEM_ACTION}_VALID': 1, f'{a.ITEM_ACTION}_VALID': 1}
return c.VALID, dict(value=self.rewards_item.PICK_UP_VALID, reason=a.ITEM_ACTION, info=info_dict)
else:
self.print(f'{agent.name} just tried to pick up an item at {agent.pos}, but failed.')
info_dict = {f'{agent.name}_{a.ITEM_ACTION}_FAIL': 1, f'{a.ITEM_ACTION}_FAIL': 1}
return c.NOT_VALID, dict(value=self.rewards_item.PICK_UP_FAIL, reason=a.ITEM_ACTION, info=info_dict)
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
# noinspection PyUnresolvedReferences
action_result = super().do_additional_actions(agent, action)
if action_result is None:
if action == a.ITEM_ACTION:
action_result = self.do_item_action(agent)
return action_result
else:
return None
else:
return action_result
def reset_hook(self) -> None:
# noinspection PyUnresolvedReferences
super().reset_hook()
self._next_item_spawn = self.item_prop.spawn_frequency
self.trigger_item_spawn()
def trigger_item_spawn(self):
if item_to_spawns := max(0, (self.item_prop.n_items - len(self[c.ITEM]))):
empty_tiles = self[c.FLOOR].empty_tiles[:item_to_spawns]
self[c.ITEM].spawn_items(empty_tiles)
self._next_item_spawn = self.item_prop.spawn_frequency
self.print(f'{item_to_spawns} new items have been spawned; next spawn in {self._next_item_spawn}')
else:
self.print('No Items are spawning, limit is reached.')
def step_hook(self) -> (List[dict], dict):
# noinspection PyUnresolvedReferences
super_reward_info = super().step_hook()
for item in list(self[c.ITEM].values()):
if item.auto_despawn >= 1:
item.set_auto_despawn(item.auto_despawn-1)
elif not item.auto_despawn:
self[c.ITEM].delete_env_object(item)
else:
pass
if not self._next_item_spawn:
self.trigger_item_spawn()
else:
self._next_item_spawn = max(0, self._next_item_spawn-1)
return super_reward_info
def render_assets_hook(self, mode='human'):
# noinspection PyUnresolvedReferences
additional_assets = super().render_assets_hook()
items = [RenderEntity(c.ITEM, item.tile.pos) for item in self[c.ITEM] if item.tile != self._NO_POS_TILE]
additional_assets.extend(items)
drop_offs = [RenderEntity(c.DROP_OFF, drop_off.tile.pos) for drop_off in self[c.DROP_OFF]]
additional_assets.extend(drop_offs)
return additional_assets
if __name__ == '__main__':
from environments.utility_classes import AgentRenderOptions as aro, ObservationProperties
render = True
item_probs = ItemProperties(n_items=30, n_drop_off_locations=6)
obs_props = ObservationProperties(render_agents=aro.SEPERATE, omit_agent_self=True, pomdp_r=2)
move_props = {'allow_square_movement': True,
'allow_diagonal_movement': True,
'allow_no_op': False}
factory = ItemFactory(n_agents=6, done_at_collision=False,
level_name='rooms', max_steps=400,
obs_prop=obs_props, parse_doors=True,
record_episodes=True, verbose=True,
mv_prop=move_props, item_prop=item_probs
)
# noinspection DuplicatedCode
n_actions = factory.action_space.n - 1
obs_space = factory.observation_space
obs_space_named = factory.named_observation_space
for epoch in range(400):
random_actions = [[random.randint(0, n_actions) for _
in range(factory.n_agents)] for _
in range(factory.max_steps + 1)]
env_state = factory.reset()
rwrd = 0
for agent_i_action in random_actions:
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
rwrd += step_r
if render:
factory.render()
if done_bool:
break
print(f'Factory run {epoch} done, reward is:\n {rwrd}')
pass

View File

@ -1,89 +0,0 @@
from typing import List
import numpy as np
from environments.factory.base.objects import Floor, Agent
from environments.factory.base.registers import EntityCollection, BoundEnvObjCollection, ObjectCollection
from environments.factory.additional.item.item_entities import Item, DropOffLocation
class Items(EntityCollection):
_accepted_objects = Item
def spawn_items(self, tiles: List[Floor]):
items = [Item(tile, self) for tile in tiles]
self.add_additional_items(items)
def despawn_items(self, items: List[Item]):
items = [items] if isinstance(items, Item) else items
for item in items:
del self[item]
class Inventory(BoundEnvObjCollection):
@property
def name(self):
return f'{self.__class__.__name__}({self._bound_entity.name})'
def __init__(self, agent: Agent, capacity: int, *args, **kwargs):
super(Inventory, self).__init__(agent, *args, is_blocking_light=False, can_be_shadowed=False, **kwargs)
self.capacity = capacity
def as_array(self):
if self._array is None:
self._array = np.zeros((1, *self._shape))
return super(Inventory, self).as_array()
def summarize_states(self, **kwargs):
attr_dict = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and key != 'data'}
attr_dict.update(dict(items=[val.summarize_state(**kwargs) for key, val in self.items()]))
attr_dict.update(dict(name=self.name, belongs_to=self._bound_entity.name))
return attr_dict
def pop(self):
item_to_pop = self[0]
self.delete_env_object(item_to_pop)
return item_to_pop
class Inventories(ObjectCollection):
_accepted_objects = Inventory
is_blocking_light = False
can_be_shadowed = False
def __init__(self, obs_shape, *args, **kwargs):
super(Inventories, self).__init__(*args, is_per_agent=True, individual_slices=True, **kwargs)
self._obs_shape = obs_shape
def as_array(self):
return np.stack([inventory.as_array() for inv_idx, inventory in enumerate(self)])
def spawn_inventories(self, agents, capacity):
inventories = [self._accepted_objects(agent, capacity, self._obs_shape)
for _, agent in enumerate(agents)]
self.add_additional_items(inventories)
def idx_by_entity(self, entity):
try:
return next((idx for idx, inv in enumerate(self) if inv.belongs_to_entity(entity)))
except StopIteration:
return None
def by_entity(self, entity):
try:
return next((inv for inv in self if inv.belongs_to_entity(entity)))
except StopIteration:
return None
def summarize_states(self, **kwargs):
return [val.summarize_states(**kwargs) for key, val in self.items()]
class DropOffLocations(EntityCollection):
_accepted_objects = DropOffLocation
_stateless_entities = True

View File

@ -1,31 +0,0 @@
from typing import NamedTuple
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
class Constants(BaseConstants):
NO_ITEM = 0
ITEM_DROP_OFF = 1
# Item Env
ITEM = 'Item'
INVENTORY = 'Inventory'
DROP_OFF = 'Drop_Off'
class Actions(BaseActions):
ITEM_ACTION = 'ITEMACTION'
class RewardsItem(NamedTuple):
DROP_OFF_VALID: float = 0.1
DROP_OFF_FAIL: float = -0.1
PICK_UP_FAIL: float = -0.1
PICK_UP_VALID: float = 0.1
class ItemProperties(NamedTuple):
n_items: int = 5 # How many items are there at the same time
spawn_frequency: int = 10 # Spawn Frequency in Steps
n_drop_off_locations: int = 5 # How many DropOff locations are there at the same time
max_dropoff_storage_size: int = 0 # How many items are needed until the dropoff is full
max_agent_inventory_capacity: int = 5 # How many items are needed until the agent inventory is full

View File

@ -1,651 +0,0 @@
import abc
import time
from collections import defaultdict
from itertools import chain
from pathlib import Path
from typing import List, Union, Iterable, Dict
import numpy as np
import gym
from gym import spaces
from gym.wrappers import FrameStack
from environments.factory.base.shadow_casting import Map
from environments import helpers as h
from environments.helpers import Constants as c
from environments.helpers import EnvActions as a
from environments.helpers import RewardsBase
from environments.factory.base.objects import Agent, Floor, Action
from environments.factory.base.registers import Actions, Entities, Agents, Floors, Walls, PlaceHolders, \
GlobalPositions
from environments.utility_classes import MovementProperties, ObservationProperties, MarlFrameStack
from environments.utility_classes import AgentRenderOptions as a_obs
import simplejson
REC_TAC = 'rec_'
# noinspection PyAttributeOutsideInit
class BaseFactory(gym.Env):
@property
def action_space(self):
return spaces.Discrete(len(self._actions))
@property
def named_action_space(self):
return {x.identifier: idx for idx, x in enumerate(self._actions.values())}
@property
def observation_space(self):
obs, _ = self._build_observations()
if self.n_agents > 1:
shape = obs[0].shape
else:
shape = obs.shape
space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
return space
@property
def named_observation_space(self):
# Build it
_, named_obs = self._build_observations()
if self.n_agents > 1:
# Only return the first named obs space, as their structure at the moment is same.
return named_obs[list(named_obs.keys())[0]]
else:
return named_obs
@property
def pomdp_diameter(self):
return self._pomdp_r * 2 + 1
@property
def movement_actions(self):
return self._actions.movement_actions
@property
def params(self) -> dict:
d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')}
d['class_name'] = self.__class__.__name__
return d
@property
def summarize_header(self):
summary_dict = self._summarize_state(stateless_entities=True)
summary_dict.update(actions=self._actions.summarize())
return summary_dict
def __enter__(self):
return self if self.obs_prop.frames_to_stack == 0 else \
MarlFrameStack(FrameStack(self, self.obs_prop.frames_to_stack))
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2),
mv_prop: MovementProperties = MovementProperties(),
obs_prop: ObservationProperties = ObservationProperties(),
rewards_base: RewardsBase = RewardsBase(),
done_at_collision=False, inject_agents: Union[None, List] = None,
verbose=False, env_seed=time.time_ns(), individual_rewards=False,
class_name='', **kwargs):
if class_name:
print(f'You loaded parameters for {class_name}', f'this is: {self.__class__.__name__}')
if isinstance(mv_prop, dict):
mv_prop = MovementProperties(**mv_prop)
if isinstance(obs_prop, dict):
obs_prop = ObservationProperties(**obs_prop)
if isinstance(rewards_base, dict):
rewards_base = RewardsBase(**rewards_base)
assert obs_prop.frames_to_stack != 1 and \
obs_prop.frames_to_stack >= 0, \
"'frames_to_stack' cannot be negative or 1."
if kwargs:
print(f'Following kwargs were passed, but ignored: {kwargs}')
# Attribute Assignment
self.env_seed = env_seed
self.seed(env_seed)
self._base_rng = np.random.default_rng(self.env_seed)
self.mv_prop = mv_prop
self.obs_prop = obs_prop
self.rewards_base = rewards_base
self.level_name = level_name
self._level_shape = None
self._obs_shape = None
self.verbose = verbose
self._renderer = None # expensive - don't use it when not required !
self._entities = Entities()
self.n_agents = n_agents
level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt'
self._parsed_level = h.parse_level(level_filepath)
self.max_steps = max_steps
self._pomdp_r = self.obs_prop.pomdp_r
self.done_at_collision = done_at_collision
self._record_episodes = False
self._injected_agents = inject_agents or []
self.individual_rewards = individual_rewards
# TODO: Reset ---> document this
self.reset()
def __getitem__(self, item):
return self._entities[item]
def _base_init_env(self):
# All entities
# Objects
self._entities = Entities()
# Level
level_array = h.one_hot_level(self._parsed_level)
self._level_init_shape = level_array.shape
level_array = np.pad(level_array, self.obs_prop.pomdp_r, 'constant', constant_values=c.OCCUPIED_CELL)
self._level_shape = level_array.shape
self._obs_shape = self._level_shape if not self.obs_prop.pomdp_r else (self.pomdp_diameter, ) * 2
# Walls
walls = Walls.from_argwhere_coordinates(
np.argwhere(level_array == c.OCCUPIED_CELL),
self._level_shape
)
self._entities.add_additional_items({c.WALLS: walls})
# Floor
floor = Floors.from_argwhere_coordinates(
np.argwhere(level_array == c.FREE_CELL),
self._level_shape
)
self._entities.add_additional_items({c.FLOOR: floor})
# NOPOS
self._NO_POS_TILE = Floor(c.NO_POS, None)
# Actions
# TODO: Move this to Agent init, so that agents can have individual action sets.
self._actions = Actions(self.mv_prop)
if additional_actions := self.actions_hook:
self._actions.add_additional_items(additional_actions)
# Agents
agents_to_spawn = self.n_agents-len(self._injected_agents)
agents_kwargs = dict(individual_slices=self.obs_prop.render_agents == a_obs.SEPERATE,
hide_from_obs_builder=self.obs_prop.render_agents in [a_obs.NOT, a_obs.LEVEL],
)
if agents_to_spawn:
agents = Agents.from_tiles(floor.empty_tiles[:agents_to_spawn], self._level_shape, **agents_kwargs)
else:
agents = Agents(self._level_shape, **agents_kwargs)
if self._injected_agents:
initialized_injections = list()
for i, injection in enumerate(self._injected_agents):
agents.add_item(injection(self, floor.empty_tiles[0], agents, static_problem=False))
initialized_injections.append(agents[-1])
self._initialized_injections = initialized_injections
self._entities.add_additional_items({c.AGENT: agents})
if self.obs_prop.additional_agent_placeholder is not None:
# TODO: Make this accept Lists for multiple placeholders
# Empty Observations with either [0, 1, N(0, 1)]
placeholder = PlaceHolders.from_values(self.obs_prop.additional_agent_placeholder, self._level_shape,
entity_kwargs=dict(
fill_value=self.obs_prop.additional_agent_placeholder)
)
self._entities.add_additional_items({c.AGENT_PLACEHOLDER: placeholder})
# Additional Entitites from SubEnvs
if additional_entities := self.entities_hook:
self._entities.add_additional_items(additional_entities)
if self.obs_prop.show_global_position_info:
global_positions = GlobalPositions(self._level_shape)
# This moved into the GlobalPosition object
# obs_shape_2d = self._level_shape if not self._pomdp_r else ((self.pomdp_diameter,) * 2)
global_positions.spawn_global_position_objects(self[c.AGENT])
self._entities.add_additional_items({c.GLOBAL_POSITION: global_positions})
# Return
return self._entities
def reset(self) -> (np.typing.ArrayLike, int, bool, dict):
_ = self._base_init_env()
self.reset_hook()
self._steps = 0
obs, _ = self._build_observations()
return obs
def step(self, actions):
if self.n_agents == 1 and not isinstance(actions, list):
actions = [int(actions)]
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
self._steps += 1
# Pre step Hook for later use
self.pre_step_hook()
for action, agent in zip(actions, self[c.AGENT]):
agent.clear_temp_state()
action_obj = self._actions[int(action)]
step_result = dict(collisions=[], rewards=[], info={}, action_name='', action_valid=False)
# cls.print(f'Action #{action} has been resolved to: {action_obj}')
if a.is_move(action_obj):
action_valid, reward = self._do_move_action(agent, action_obj)
elif a.NOOP == action_obj:
action_valid = c.VALID
reward = dict(value=self.rewards_base.NOOP, reason=a.NOOP, info={f'{agent.name}_NOOP': 1, 'NOOP': 1})
else:
# noinspection PyTupleAssignmentBalance
action_valid, reward = self.do_additional_actions(agent, action_obj)
# Not needed any more sice the tuple assignment above will fail in case of a failing action resolvement.
# assert step_result is not None, 'This should not happen, every Action musst be detected correctly!'
step_result['action_name'] = action_obj.identifier
step_result['action_valid'] = action_valid
step_result['rewards'].append(reward)
agent.step_result = step_result
# Additional step and Reward, Info Init
rewards, info = self.step_hook()
# Todo: Make this faster, so that only tiles of entities that can collide are searched.
tiles_with_collisions = self.get_all_tiles_with_collisions()
for tile in tiles_with_collisions:
guests = tile.guests_that_can_collide
for i, guest in enumerate(guests):
for j, collision in enumerate(guests):
if j != i and hasattr(guest, 'step_result'):
guest.step_result['collisions'].append(collision)
done = False
if self.done_at_collision:
if done_at_col := bool(tiles_with_collisions):
done = done_at_col
info.update(COLLISION_DONE=done_at_col)
additional_done, additional_done_info = self.check_additional_done()
done = done or additional_done
info.update(additional_done_info)
# Finalize
reward, reward_info = self.build_reward_result(rewards)
info.update(reward_info)
if self._steps >= self.max_steps:
done = True
info.update(step_reward=reward, step=self._steps)
if self._record_episodes:
info.update(self._summarize_state())
# Post step Hook for later use
for post_step_info in self.post_step_hook():
info.update(post_step_info)
obs, _ = self._build_observations()
return obs, reward, done, info
def _build_observations(self) -> np.typing.ArrayLike:
# Observation dict:
per_agent_expl_idx = dict()
per_agent_obsn = dict()
# Generel Observations
lvl_obs = self[c.WALLS].as_array()
if self.obs_prop.render_agents == a_obs.NOT:
global_agent_obs = None
elif self.obs_prop.omit_agent_self and self.n_agents == 1:
global_agent_obs = None
else:
global_agent_obs = self[c.AGENT].as_array().copy()
placeholder_obs = self[c.AGENT_PLACEHOLDER].as_array() if self[c.AGENT_PLACEHOLDER] else None
add_obs_dict = self.observations_hook()
for agent_idx, agent in enumerate(self[c.AGENT]):
obs_dict = dict()
# Build Agent Observations
if self.obs_prop.render_agents != a_obs.NOT:
if self.obs_prop.omit_agent_self and self.n_agents >= 2:
if self.obs_prop.render_agents == a_obs.SEPERATE:
other_agent_obs_idx = [x for x in range(self.n_agents) if x != agent_idx]
agent_obs = np.take(global_agent_obs, other_agent_obs_idx, axis=0)
else:
agent_obs = global_agent_obs.copy()
agent_obs[(0, *agent.pos)] -= agent.encoding
else:
agent_obs = global_agent_obs.copy()
else:
# agent_obs == None!!!!!
agent_obs = global_agent_obs
# Build Level Observations
if self.obs_prop.render_agents == a_obs.LEVEL:
assert agent_obs is not None
lvl_obs = lvl_obs.copy()
lvl_obs += agent_obs
obs_dict[c.WALLS] = lvl_obs
if self.obs_prop.render_agents in [a_obs.SEPERATE, a_obs.COMBINED] and agent_obs is not None:
obs_dict[c.AGENT] = agent_obs[:]
if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None:
obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs
obs_dict.update(add_obs_dict)
obsn = np.vstack(list(obs_dict.values()))
if self.obs_prop.pomdp_r:
obsn = self._do_pomdp_cutout(agent, obsn)
raw_obs = self.per_agent_raw_observations_hook(agent)
raw_obs = {key: np.expand_dims(val, 0) if val.ndim != 3 else val for key, val in raw_obs.items()}
obsn = np.vstack((obsn, *raw_obs.values()))
keys = list(chain(obs_dict.keys(), raw_obs.keys()))
idxs = np.cumsum([x.shape[0] for x in chain(obs_dict.values(), raw_obs.values())]) - 1
per_agent_expl_idx[agent.name] = {key: list(range(d, b)) for key, d, b in
zip(keys, idxs, list(idxs[1:]) + [idxs[-1]+1, ])}
# Shadow Casting
if agent.step_result is not None:
pass
else:
assert self._steps == 0
agent.step_result = {'action_name': a.NOOP, 'action_valid': True,
'collisions': [], 'lightmap': None}
if self.obs_prop.cast_shadows:
try:
light_block_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
if self[key].is_blocking_light]
# Flatten
light_block_obs = [x for y in light_block_obs for x in y]
shadowed_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
if self[key].can_be_shadowed]
# Flatten
shadowed_obs = [x for y in shadowed_obs for x in y]
except AttributeError as e:
print('Check your Keys! Only use Constants as Keys!')
print(e)
raise e
obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL
light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int).squeeze())
if self._pomdp_r:
light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape))
else:
light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
agent.step_result['lightmap'] = light_block_map
obsn[shadowed_obs] = ((obsn[shadowed_obs] * light_block_map) + 0.) - (1 - light_block_map)
else:
if self._pomdp_r:
agent.step_result['lightmap'] = np.ones(self._obs_shape)
else:
agent.step_result['lightmap'] = None
per_agent_obsn[agent.name] = obsn
if self.n_agents == 1:
agent_name = self[c.AGENT][0].name
obs, explained_idx = per_agent_obsn[agent_name], per_agent_expl_idx[agent_name]
elif self.n_agents >= 2:
obs, explained_idx = np.stack(list(per_agent_obsn.values())), per_agent_expl_idx
else:
raise ValueError
return obs, explained_idx
def _do_pomdp_cutout(self, agent, obs_to_be_padded):
assert obs_to_be_padded.ndim == 3
ra, d = self._pomdp_r, self.pomdp_diameter
x0, x1 = max(0, agent.x - ra), min(agent.x + ra + 1, self._level_shape[0])
y0, y1 = max(0, agent.y - ra), min(agent.y + ra + 1, self._level_shape[1])
oobs = obs_to_be_padded[:, x0:x1, y0:y1]
if oobs.shape[1:] != (d, d):
if xd := oobs.shape[1] % d:
if agent.x > ra:
x0_pad = 0
x1_pad = (d - xd)
else:
x0_pad = ra - agent.x
x1_pad = 0
else:
x0_pad, x1_pad = 0, 0
if yd := oobs.shape[2] % d:
if agent.y > ra:
y0_pad = 0
y1_pad = (d - yd)
else:
y0_pad = ra - agent.y
y1_pad = 0
else:
y0_pad, y1_pad = 0, 0
oobs = np.pad(oobs, ((0, 0), (x0_pad, x1_pad), (y0_pad, y1_pad)), 'constant')
return oobs
def get_all_tiles_with_collisions(self) -> List[Floor]:
tiles = [x for x in self[c.FLOOR] if len(x.guests_that_can_collide) > 1]
if False:
tiles_with_collisions = list()
for tile in self[c.FLOOR]:
if tile.is_occupied():
guests = tile.guests_that_can_collide
if len(guests) >= 2:
tiles_with_collisions.append(tile)
return tiles
def _do_move_action(self, agent: Agent, action: Action) -> (dict, dict):
info_dict = dict()
new_tile, valid = self._check_agent_move(agent, action)
if valid:
# Does not collide width level boundaries
valid = agent.move(new_tile)
if valid:
# This will spam your logs, beware!
self.print(f'{agent.name} just moved {action.identifier} from {agent.last_pos} to {agent.pos}.')
info_dict.update({f'{agent.name}_move': 1, 'move': 1})
pass
else:
valid = c.NOT_VALID
self.print(f'{agent.name} just hit the wall at {agent.pos}. ({action.identifier})')
info_dict.update({f'{agent.name}_wall_collide': 1, 'wall_collide': 1})
else:
# Agent seems to be trying to Leave the level
self.print(f'{agent.name} tried to leave the level {agent.pos}. ({action.identifier})')
info_dict.update({f'{agent.name}_wall_collide': 1, 'wall_collide': 1})
reward_value = self.rewards_base.MOVEMENTS_VALID if valid else self.rewards_base.MOVEMENTS_FAIL
reward = {'value': reward_value, 'reason': action.identifier, 'info': info_dict}
return valid, reward
def _check_agent_move(self, agent, action: Action) -> (Floor, bool):
# Actions
x_diff, y_diff = a.resolve_movement_action_to_coords(action.identifier)
x_new = agent.x + x_diff
y_new = agent.y + y_diff
new_tile = self[c.FLOOR].by_pos((x_new, y_new))
if new_tile and not np.any([x.is_blocking for x in new_tile.guests]):
valid = c.VALID
else:
tile = agent.tile
valid = c.VALID
return tile, valid
return new_tile, valid
def build_reward_result(self, global_env_rewards: list) -> (int, dict):
# Returns: Reward, Info
info = defaultdict(lambda: 0.0)
# Gather additional sub-env rewards and calculate collisions
for agent in self[c.AGENT]:
rewards = self.per_agent_reward_hook(agent)
for reward in rewards:
agent.step_result['rewards'].append(reward)
if collisions := agent.step_result['collisions']:
self.print(f't = {self._steps}\t{agent.name} has collisions with {collisions}')
info[c.COLLISION] += 1
reward = {'value': self.rewards_base.COLLISION,
'reason': c.COLLISION,
'info': {f'{agent.name}_{c.COLLISION}': 1}}
agent.step_result['rewards'].append(reward)
else:
# No Collisions, nothing to do
pass
comb_rewards = {agent.name: sum(x['value'] for x in agent.step_result['rewards']) for agent in self[c.AGENT]}
# Combine the per_agent_info_dict:
combined_info_dict = defaultdict(lambda: 0)
for agent in self[c.AGENT]:
for reward in agent.step_result['rewards']:
combined_info_dict.update(reward['info'])
# Combine Info dicts into a global one
combined_info_dict = dict(combined_info_dict)
combined_info_dict.update(info)
global_reward_sum = sum(global_env_rewards)
if self.individual_rewards:
self.print(f"rewards are {comb_rewards}")
reward = list(comb_rewards.values())
reward = [x + global_reward_sum for x in reward]
return reward, combined_info_dict
else:
reward = sum(comb_rewards.values()) + global_reward_sum
self.print(f"reward is {reward}")
return reward, combined_info_dict
def start_recording(self):
self._record_episodes = True
return self._record_episodes
def stop_recording(self):
self._record_episodes = False
return not self._record_episodes
# noinspection PyGlobalUndefined
def render(self, mode='human'):
if not self._renderer: # lazy init
from environments.factory.base.renderer import Renderer, RenderEntity
global Renderer, RenderEntity
self._renderer = Renderer(self._level_shape, view_radius=self._pomdp_r, fps=7)
# noinspection PyUnboundLocalVariable
walls = [RenderEntity('wall', wall.pos) for wall in self[c.WALLS]]
agents = []
for i, agent in enumerate(self[c.AGENT]):
name, state = h.asset_str(agent)
agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.step_result['lightmap']))
additional_assets = self.render_assets_hook()
return self._renderer.render(walls + additional_assets + agents)
def save_params(self, filepath: Path):
# noinspection PyProtectedMember
d = self.params
filepath.parent.mkdir(parents=True, exist_ok=True)
with filepath.open('w') as f:
simplejson.dump(d, f, indent=4, namedtuple_as_object=True)
def get_injected_agents(self) -> list:
if hasattr(self, '_initialized_injections'):
return self._initialized_injections
else:
return []
def _summarize_state(self, stateless_entities=False):
summary = {f'{REC_TAC}step': self._steps}
for entity_group in self._entities:
if entity_group.is_stateless == stateless_entities:
summary.update({f'{REC_TAC}{entity_group.name}': entity_group.summarize_states()})
return summary
def print(self, string):
if self.verbose:
print(string)
# Properties which are called by the base class to extend beyond attributes of the base class
@property
@abc.abstractmethod
def actions_hook(self) -> Union[Action, List[Action]]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A list of Actions-object holding all additional actions.
:rtype: List[Action]
"""
return []
@property
@abc.abstractmethod
def entities_hook(self) -> Dict[(str, Entities)]:
"""
When heriting from this Base Class, you musst implement this methode!!!
:return: A single Entites collection or a list of such.
:rtype: Union[Entities, List[Entities]]
"""
return {}
# Functions which provide additions to functions of the base class
# Always call super!!!!!!
@abc.abstractmethod
def reset_hook(self) -> None:
pass
@abc.abstractmethod
def pre_step_hook(self) -> None:
pass
@abc.abstractmethod
def do_additional_actions(self, agent: Agent, action: Action) -> (bool, dict):
return None
@abc.abstractmethod
def step_hook(self) -> (List[dict], dict):
return [], {}
@abc.abstractmethod
def check_additional_done(self) -> (bool, dict):
return False, {}
@abc.abstractmethod
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
return {}
@abc.abstractmethod
def per_agent_reward_hook(self, agent: Agent) -> List[dict]:
return []
@abc.abstractmethod
def post_step_hook(self) -> List[dict]:
return []
@abc.abstractmethod
def per_agent_raw_observations_hook(self, agent) -> Dict[str, np.typing.ArrayLike]:
additional_raw_observations = {}
if self.obs_prop.show_global_position_info:
global_pos_obs = np.zeros(self._obs_shape)
global_pos_obs[:2, 0] = self[c.GLOBAL_POSITION].by_entity(agent).encoding
additional_raw_observations.update({c.GLOBAL_POSITION: global_pos_obs})
return additional_raw_observations
@abc.abstractmethod
def render_assets_hook(self):
return []

View File

@ -1,338 +0,0 @@
from collections import defaultdict
from typing import Union, List
import numpy as np
from environments import helpers as h
from environments.helpers import Constants as c
##########################################################################
# ##################### Base Object Building Blocks ######################### #
##########################################################################
# TODO: Missing Documentation
class Object:
"""Generell Objects for Organisation and Maintanance such as Actions etc..."""
_u_idx = defaultdict(lambda: 0)
def __bool__(self):
return True
@property
def name(self):
return self._name
@property
def identifier(self):
if self._str_ident is not None:
return self._str_ident
else:
return self._name
def __init__(self, str_ident: Union[str, None] = None, **kwargs):
self._str_ident = str_ident
if self._str_ident is not None:
self._name = f'{self.__class__.__name__}[{self._str_ident}]'
elif self._str_ident is None:
self._name = f'{self.__class__.__name__}#{Object._u_idx[self.__class__.__name__]}'
Object._u_idx[self.__class__.__name__] += 1
else:
raise ValueError('Please use either of the idents.')
if kwargs:
print(f'Following kwargs were passed, but ignored: {kwargs}')
def __repr__(self):
return f'{self.name}'
def __eq__(self, other) -> bool:
return other == self.identifier
# Base
# TODO: Missing Documentation
class EnvObject(Object):
"""Objects that hold Information that are observable, but have no position on the env grid. Inventories etc..."""
_u_idx = defaultdict(lambda: 0)
@property
def can_collide(self):
return False
@property
def encoding(self):
return c.OCCUPIED_CELL
def __init__(self, collection, **kwargs):
super(EnvObject, self).__init__(**kwargs)
self._collection = collection
def change_parent_collection(self, other_collection):
other_collection.add_item(self)
self._collection.delete_env_object(self)
self._collection = other_collection
return self._collection == other_collection
# With Rendering
# TODO: Missing Documentation
class Entity(EnvObject):
"""Full Env Entity that lives on the env Grid. Doors, Items, DirtPile etc..."""
@property
def is_blocking(self):
return False
@property
def can_collide(self):
return False
@property
def x(self):
return self.pos[0]
@property
def y(self):
return self.pos[1]
@property
def pos(self):
return self._tile.pos
@property
def tile(self):
return self._tile
def __init__(self, tile, *args, **kwargs):
super().__init__(*args, **kwargs)
self._tile = tile
tile.enter(self)
def summarize_state(self) -> dict:
return dict(name=str(self.name), x=int(self.x), y=int(self.y),
tile=str(self.tile.name), can_collide=bool(self.can_collide))
def __repr__(self):
return super(Entity, self).__repr__() + f'(@{self.pos})'
# TODO: Missing Documentation
class MoveableEntity(Entity):
@property
def last_tile(self):
return self._last_tile
@property
def last_pos(self):
if self._last_tile:
return self._last_tile.pos
else:
return c.NO_POS
@property
def direction_of_view(self):
last_x, last_y = self.last_pos
curr_x, curr_y = self.pos
return last_x-curr_x, last_y-curr_y
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._last_tile = None
def move(self, next_tile):
curr_tile = self.tile
if curr_tile != next_tile:
next_tile.enter(self)
curr_tile.leave(self)
self._tile = next_tile
self._last_tile = curr_tile
self._collection.notify_change_to_value(self)
return c.VALID
else:
return c.NOT_VALID
# Can Move
# TODO: Missing Documentation
class BoundingMixin(Object):
@property
def bound_entity(self):
return self._bound_entity
def __init__(self,entity_to_be_bound, *args, **kwargs):
super(BoundingMixin, self).__init__(*args, **kwargs)
assert entity_to_be_bound is not None
self._bound_entity = entity_to_be_bound
@property
def name(self):
return f'{super(BoundingMixin, self).name}({self._bound_entity.name})'
def belongs_to_entity(self, entity):
return entity == self.bound_entity
##########################################################################
# ####################### Objects and Entitys ########################## #
##########################################################################
class Action(Object):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
class PlaceHolder(Object):
def __init__(self, *args, fill_value=0, **kwargs):
super().__init__(*args, **kwargs)
self._fill_value = fill_value
@property
def can_collide(self):
return False
@property
def encoding(self):
return self._fill_value
@property
def name(self):
return "PlaceHolder"
class GlobalPosition(BoundingMixin, EnvObject):
@property
def encoding(self):
if self._normalized:
return tuple(np.divide(self._bound_entity.pos, self._level_shape))
else:
return self.bound_entity.pos
def __init__(self, level_shape: (int, int), *args, normalized: bool = True, **kwargs):
super(GlobalPosition, self).__init__(*args, **kwargs)
self._level_shape = level_shape
self._normalized = normalized
class Floor(EnvObject):
@property
def neighboring_floor_pos(self):
return [x.pos for x in self.neighboring_floor]
@property
def neighboring_floor(self):
if self._neighboring_floor:
pass
else:
self._neighboring_floor = [x for x in [self._collection.by_pos(np.add(self.pos, pos))
for pos in h.POS_MASK.reshape(-1, 2)
if not np.all(pos == [0, 0])]
if x]
return self._neighboring_floor
@property
def encoding(self):
return c.FREE_CELL
@property
def guests_that_can_collide(self):
return [x for x in self.guests if x.can_collide]
@property
def guests(self):
return self._guests.values()
@property
def x(self):
return self.pos[0]
@property
def y(self):
return self.pos[1]
@property
def pos(self):
return self._pos
def __init__(self, pos, *args, **kwargs):
super(Floor, self).__init__(*args, **kwargs)
self._guests = dict()
self._pos = tuple(pos)
self._neighboring_floor: List[Floor] = list()
def __len__(self):
return len(self._guests)
def is_empty(self):
return not len(self._guests)
def is_occupied(self):
return bool(len(self._guests))
def enter(self, guest):
if guest.name not in self._guests:
self._guests.update({guest.name: guest})
return True
else:
return False
def leave(self, guest):
try:
del self._guests[guest.name]
except (ValueError, KeyError):
return False
return True
def __repr__(self):
return f'{self.name}(@{self.pos})'
def summarize_state(self, **_):
return dict(name=self.name, x=int(self.x), y=int(self.y))
class Wall(Floor):
@property
def can_collide(self):
return True
@property
def encoding(self):
return c.OCCUPIED_CELL
pass
class Agent(MoveableEntity):
@property
def can_collide(self):
return True
def __init__(self, *args, **kwargs):
super(Agent, self).__init__(*args, **kwargs)
self.clear_temp_state()
# noinspection PyAttributeOutsideInit
def clear_temp_state(self):
# for attr in cls.__dict__:
# if attr.startswith('temp'):
self.step_result = None
def summarize_state(self):
state_dict = super().summarize_state()
state_dict.update(valid=bool(self.step_result['action_valid']), action=str(self.step_result['action_name']))
return state_dict

View File

@ -1,517 +0,0 @@
import numbers
import random
from abc import ABC
from typing import List, Union, Dict, Tuple
import numpy as np
import six
from environments.factory.base.objects import Entity, Floor, Agent, Action, Wall, PlaceHolder, GlobalPosition, \
Object, EnvObject
from environments.utility_classes import MovementProperties
from environments import helpers as h
from environments.helpers import Constants as c
##########################################################################
# ################## Base Collections Definition ####################### #
##########################################################################
class ObjectCollection:
_accepted_objects = Object
_stateless_entities = False
@property
def is_stateless(self):
return self._stateless_entities
@property
def name(self):
return f'{self.__class__.__name__}'
def __init__(self, *args, **kwargs):
self._collection = dict()
def __len__(self):
return len(self._collection)
def __iter__(self):
return iter(self.values())
def add_item(self, other: _accepted_objects):
assert isinstance(other, self._accepted_objects), f'All item names have to be of type ' \
f'{self._accepted_objects}, ' \
f'but were {other.__class__}.,'
self._collection.update({other.name: other})
return self
def add_additional_items(self, others: List[_accepted_objects]):
for other in others:
self.add_item(other)
return self
def keys(self):
return self._collection.keys()
def values(self):
return self._collection.values()
def items(self):
return self._collection.items()
def _get_index(self, item):
try:
return next(i for i, v in enumerate(self._collection.values()) if v == item)
except StopIteration:
return None
def __getitem__(self, item):
if isinstance(item, (int, np.int64, np.int32)):
if item < 0:
item = len(self._collection) - abs(item)
try:
return next(v for i, v in enumerate(self._collection.values()) if i == item)
except StopIteration:
return None
try:
return self._collection[item]
except KeyError:
return None
def __repr__(self):
return f'{self.__class__.__name__}[{self._collection}]'
class EnvObjectCollection(ObjectCollection):
_accepted_objects = EnvObject
@property
def encodings(self):
return [x.encoding for x in self]
def __init__(self, obs_shape: (int, int), *args,
individual_slices: bool = False,
is_blocking_light: bool = False,
can_collide: bool = False,
can_be_shadowed: bool = True, **kwargs):
super(EnvObjectCollection, self).__init__(*args, **kwargs)
self._shape = obs_shape
self._array = None
self._individual_slices = individual_slices
self._lazy_eval_transforms = []
self.is_blocking_light = is_blocking_light
self.can_be_shadowed = can_be_shadowed
self.can_collide = can_collide
def add_item(self, other: EnvObject):
super(EnvObjectCollection, self).add_item(other)
if self._array is None:
self._array = np.zeros((1, *self._shape))
else:
if self._individual_slices:
self._array = np.vstack((self._array, np.zeros((1, *self._shape))))
self.notify_change_to_value(other)
def as_array(self):
if self._lazy_eval_transforms:
idxs, values = zip(*self._lazy_eval_transforms)
# nuumpy put repects the ordering so that
np.put(self._array, idxs, values)
self._lazy_eval_transforms = []
return self._array
def summarize_states(self):
return [entity.summarize_state() for entity in self.values()]
def notify_change_to_free(self, env_object: EnvObject):
self._array_change_notifyer(env_object, value=c.FREE_CELL)
def notify_change_to_value(self, env_object: EnvObject):
self._array_change_notifyer(env_object)
def _array_change_notifyer(self, env_object: EnvObject, value=None):
pos = self._get_index(env_object)
value = value if value is not None else env_object.encoding
self._lazy_eval_transforms.append((pos, value))
if self._individual_slices:
idx = (self._get_index(env_object) * np.prod(self._shape[1:]), value)
self._lazy_eval_transforms.append((idx, value))
else:
self._lazy_eval_transforms.append((pos, value))
def _refresh_arrays(self):
poss, values = zip(*[(idx, x.encoding) for idx,x in enumerate(self.values())])
for pos, value in zip(poss, values):
self._lazy_eval_transforms.append((pos, value))
def __delitem__(self, name):
idx, obj = next((i, obj) for i, obj in enumerate(self) if obj.name == name)
if self._individual_slices:
self._array = np.delete(self._array, idx, axis=0)
else:
self.notify_change_to_free(self._collection[name])
# Dirty Hack to check if not beeing subclassed. In that case we need to refresh the array since positions
# in the observation array are result of enumeration. They can overide each other.
# Todo: Find a better solution
if not issubclass(self.__class__, EntityCollection) and issubclass(self.__class__, EnvObjectCollection):
self._refresh_arrays()
del self._collection[name]
def delete_env_object(self, env_object: EnvObject):
del self[env_object.name]
def delete_env_object_by_name(self, name):
del self[name]
class EntityCollection(EnvObjectCollection, ABC):
_accepted_objects = Entity
@classmethod
def from_tiles(cls, tiles, *args, entity_kwargs=None, **kwargs):
# objects_name = cls._accepted_objects.__name__
collection = cls(*args, **kwargs)
entities = [cls._accepted_objects(tile, collection, str_ident=i,
**entity_kwargs if entity_kwargs is not None else {})
for i, tile in enumerate(tiles)]
collection.add_additional_items(entities)
return collection
@classmethod
def from_argwhere_coordinates(cls, positions: [(int, int)], tiles, *args, entity_kwargs=None, **kwargs, ):
return cls.from_tiles([tiles.by_pos(position) for position in positions], *args, entity_kwargs=entity_kwargs,
**kwargs)
@property
def positions(self):
return [x.pos for x in self]
@property
def tiles(self):
return [entity.tile for entity in self]
def __init__(self, level_shape, *args, **kwargs):
super(EntityCollection, self).__init__(level_shape, *args, **kwargs)
self._lazy_eval_transforms = []
def __delitem__(self, name):
idx, obj = next((i, obj) for i, obj in enumerate(self) if obj.name == name)
obj.tile.leave(obj)
super(EntityCollection, self).__delitem__(name)
def as_array(self):
if self._lazy_eval_transforms:
idxs, values = zip(*self._lazy_eval_transforms)
# numpy put repects the ordering so that
# Todo: Export the index building in a seperate function
np.put(self._array, [np.ravel_multi_index(idx, self._array.shape) for idx in idxs], values)
self._lazy_eval_transforms = []
return self._array
def _array_change_notifyer(self, entity, pos=None, value=None):
# Todo: Export the contruction in a seperate function
pos = pos if pos is not None else entity.pos
value = value if value is not None else entity.encoding
x, y = pos
if self._individual_slices:
idx = (self._get_index(entity), x, y)
else:
idx = (0, x, y)
self._lazy_eval_transforms.append((idx, value))
def by_pos(self, pos: Tuple[int, int]):
try:
return next(item for item in self if item.pos == tuple(pos))
except StopIteration:
return None
class BoundEnvObjCollection(EnvObjectCollection, ABC):
def __init__(self, entity_to_be_bound, *args, **kwargs):
super().__init__(*args, **kwargs)
self._bound_entity = entity_to_be_bound
def belongs_to_entity(self, entity):
return self._bound_entity == entity
def by_entity(self, entity):
try:
return next((x for x in self if x.belongs_to_entity(entity)))
except StopIteration:
return None
def idx_by_entity(self, entity):
try:
return next((idx for idx, x in enumerate(self) if x.belongs_to_entity(entity)))
except StopIteration:
return None
def as_array_by_entity(self, entity):
return self._array[self.idx_by_entity(entity)]
class MovingEntityObjectCollection(EntityCollection, ABC):
def __init__(self, *args, **kwargs):
super(MovingEntityObjectCollection, self).__init__(*args, **kwargs)
def notify_change_to_value(self, entity):
super(MovingEntityObjectCollection, self).notify_change_to_value(entity)
if entity.last_pos != c.NO_POS:
try:
self._array_change_notifyer(entity, entity.last_pos, value=c.FREE_CELL)
except AttributeError:
pass
##########################################################################
# ################# Objects and Entity Collection ###################### #
##########################################################################
class GlobalPositions(EnvObjectCollection):
_accepted_objects = GlobalPosition
def __init__(self, *args, **kwargs):
super(GlobalPositions, self).__init__(*args, is_per_agent=True, individual_slices=True, is_blocking_light = False,
can_be_shadowed = False, can_collide = False, **kwargs)
def as_array(self):
# FIXME DEBUG!!! make this lazy?
return np.stack([gp.as_array() for inv_idx, gp in enumerate(self)])
def as_array_by_entity(self, entity):
# FIXME DEBUG!!! make this lazy?
return np.stack([gp.as_array() for inv_idx, gp in enumerate(self)])
def spawn_global_position_objects(self, agents):
# Todo, change to 'from xy'-form
global_positions = [self._accepted_objects(self._shape, agent, self)
for _, agent in enumerate(agents)]
# noinspection PyTypeChecker
self.add_additional_items(global_positions)
def idx_by_entity(self, entity):
try:
return next((idx for idx, inv in enumerate(self) if inv.belongs_to_entity(entity)))
except StopIteration:
return None
def by_entity(self, entity):
try:
return next((inv for inv in self if inv.belongs_to_entity(entity)))
except StopIteration:
return None
class PlaceHolders(EnvObjectCollection):
_accepted_objects = PlaceHolder
def __init__(self, *args, **kwargs):
assert 'individual_slices' not in kwargs, 'Keyword - "individual_slices": "True" and must not be altered'
kwargs.update(individual_slices=False)
super().__init__(*args, **kwargs)
@classmethod
def from_values(cls, values: Union[str, numbers.Number, List[Union[str, numbers.Number]]],
*args, object_kwargs=None, **kwargs):
# objects_name = cls._accepted_objects.__name__
if isinstance(values, (str, numbers.Number)):
values = [values]
collection = cls(*args, **kwargs)
objects = [cls._accepted_objects(collection, str_ident=i, fill_value=value,
**object_kwargs if object_kwargs is not None else {})
for i, value in enumerate(values)]
collection.add_additional_items(objects)
return collection
# noinspection DuplicatedCode
def as_array(self):
for idx, placeholder in enumerate(self):
if isinstance(placeholder.encoding, numbers.Number):
self._array[idx][:] = placeholder.fill_value
elif isinstance(placeholder.fill_value, str):
if placeholder.fill_value.lower() in ['normal', 'n']:
self._array[:] = np.random.normal(size=self._array.shape)
else:
raise ValueError('Choose one of: ["normal", "N"]')
else:
raise TypeError('Objects of type "str" or "number" is required here.')
return self._array
class Entities(ObjectCollection):
_accepted_objects = EntityCollection
@property
def arrays(self):
return {key: val.as_array() for key, val in self.items()}
@property
def names(self):
return list(self._collection.keys())
def __init__(self):
super(Entities, self).__init__()
def iter_individual_entitites(self):
return iter((x for sublist in self.values() for x in sublist))
def add_item(self, other: dict):
assert not any([key for key in other.keys() if key in self.keys()]), \
"This group of entities has already been added!"
self._collection.update(other)
return self
def add_additional_items(self, others: Dict):
return self.add_item(others)
def by_pos(self, pos: (int, int)):
found_entities = [y for y in (x.by_pos(pos) for x in self.values() if hasattr(x, 'by_pos')) if y is not None]
return found_entities
class Walls(EntityCollection):
_accepted_objects = Wall
_stateless_entities = True
def as_array(self):
if not np.any(self._array):
# Which is Faster?
# indices = [x.pos for x in cls]
# np.put(cls._array, [np.ravel_multi_index((0, *x), cls._array.shape) for x in indices], cls.encodings)
x, y = zip(*[x.pos for x in self])
self._array[0, x, y] = self._value
return self._array
def __init__(self, *args, is_blocking_light=True, **kwargs):
super(Walls, self).__init__(*args, individual_slices=False,
can_collide=True,
is_blocking_light=is_blocking_light, **kwargs)
self._value = c.OCCUPIED_CELL
@classmethod
def from_argwhere_coordinates(cls, argwhere_coordinates, *args, **kwargs):
tiles = cls(*args, **kwargs)
# noinspection PyTypeChecker
tiles.add_additional_items(
[cls._accepted_objects(pos, tiles)
for pos in argwhere_coordinates]
)
return tiles
@classmethod
def from_tiles(cls, tiles, *args, **kwargs):
raise RuntimeError()
class Floors(Walls):
_accepted_objects = Floor
_stateless_entities = True
def __init__(self, *args, is_blocking_light=False, **kwargs):
super(Floors, self).__init__(*args, is_blocking_light=is_blocking_light, **kwargs)
self._value = c.FREE_CELL
@property
def occupied_tiles(self):
tiles = [tile for tile in self if tile.is_occupied()]
random.shuffle(tiles)
return tiles
@property
def empty_tiles(self) -> List[Floor]:
tiles = [tile for tile in self if tile.is_empty()]
random.shuffle(tiles)
return tiles
@classmethod
def from_tiles(cls, tiles, *args, **kwargs):
raise RuntimeError()
class Agents(MovingEntityObjectCollection):
_accepted_objects = Agent
def __init__(self, *args, **kwargs):
super().__init__(*args, can_collide=True, **kwargs)
@property
def positions(self):
return [agent.pos for agent in self]
def replace_agent(self, key, agent):
old_agent = self[key]
self[key].tile.leave(self[key])
agent._name = old_agent.name
self._collection[agent.name] = agent
class Actions(ObjectCollection):
_accepted_objects = Action
@property
def movement_actions(self):
return self._movement_actions
# noinspection PyTypeChecker
def __init__(self, movement_properties: MovementProperties):
self.allow_no_op = movement_properties.allow_no_op
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
self.allow_square_movement = movement_properties.allow_square_movement
super(Actions, self).__init__()
# Move this to Baseclass, Env init?
if self.allow_square_movement:
self.add_additional_items([self._accepted_objects(str_ident=direction)
for direction in h.EnvActions.square_move()])
if self.allow_diagonal_movement:
self.add_additional_items([self._accepted_objects(str_ident=direction)
for direction in h.EnvActions.diagonal_move()])
self._movement_actions = self._collection.copy()
if self.allow_no_op:
self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.NOOP)])
def is_moving_action(self, action: Union[int]):
return action in self.movement_actions.values()
def summarize(self):
return [dict(name=action.identifier) for action in self]
class Zones(ObjectCollection):
@property
def accounting_zones(self):
return [self[idx] for idx, name in self.items() if name != c.DANGER_ZONE]
def __init__(self, parsed_level):
raise NotImplementedError('This needs a Rework')
super(Zones, self).__init__()
slices = list()
self._accounting_zones = list()
self._danger_zones = list()
for symbol in np.unique(parsed_level):
if symbol == c.WALL:
continue
elif symbol == c.DANGER_ZONE:
self + symbol
slices.append(h.one_hot_level(parsed_level, symbol))
self._danger_zones.append(symbol)
else:
self + symbol
slices.append(h.one_hot_level(parsed_level, symbol))
self._accounting_zones.append(symbol)
self._zone_slices = np.stack(slices)
def __getitem__(self, item):
return self._zone_slices[item]
def add_additional_items(self, other: Union[str, List[str]]):
raise AttributeError('You are not allowed to add additional Zones in runtime.')

View File

@ -1,85 +0,0 @@
import numpy as np
from environments.helpers import Constants as c
# Multipliers for transforming coordinates to other octants:
mult_array = np.asarray([
[1, 0, 0, -1, -1, 0, 0, 1],
[0, 1, -1, 0, 0, -1, 1, 0],
[0, 1, 1, 0, 0, -1, -1, 0],
[1, 0, 0, 1, -1, 0, 0, -1]
])
class Map(object):
def __init__(self, map_array: np.typing.ArrayLike, diamond_slope: float = 0.9):
self.data = map_array
self.width, self.height = map_array.shape
self.light = np.full_like(self.data, c.FREE_CELL)
self.flag = c.FREE_CELL
self.d_slope = diamond_slope
def blocked(self, x, y):
return (x < 0 or y < 0
or x >= self.width or y >= self.height
or self.data[x, y] == c.OCCUPIED_CELL)
def lit(self, x, y):
return self.light[x, y] == self.flag
def set_lit(self, x, y):
if 0 <= x < self.width and 0 <= y < self.height:
self.light[x, y] = self.flag
def _cast_light(self, cx, cy, row, start, end, radius, xx, xy, yx, yy, id):
"""Recursive lightcasting function"""
if start < end:
return
radius_squared = radius*radius
new_start = None
for j in range(row, radius+1):
dx, dy = -j-1, -j
blocked = False
while dx <= 0:
dx += 1
# Translate the dx, dy coordinates into map coordinates:
X, Y = cx + dx * xx + dy * xy, cy + dx * yx + dy * yy
# l_slope and r_slope store the slopes of the left and right
# extremities of the square_move we're considering:
l_slope, r_slope = (dx-self.d_slope)/(dy+self.d_slope), (dx+self.d_slope)/(dy-self.d_slope)
if start < r_slope:
continue
elif end > l_slope:
break
else:
# Our light beam is touching this square_move; light it:
if dx*dx + dy*dy < radius_squared:
self.set_lit(X, Y)
if blocked:
# we're scanning a row of blocked squares:
if self.blocked(X, Y):
new_start = r_slope
continue
else:
blocked = False
start = new_start
else:
if self.blocked(X, Y) and j < radius:
# This is a blocking square_move, start a child scan:
blocked = True
self._cast_light(cx, cy, j+1, start, l_slope,
radius, xx, xy, yx, yy, id+1)
new_start = r_slope
# Row is scanned; do next row unless last square_move was blocked:
if blocked:
break
def do_fov(self, x, y, radius):
"Calculate lit squares from the given location and radius"
self.flag += 1
for oct in range(8):
self._cast_light(x, y, 1, 1.0, 0.0, radius,
mult_array[0, oct], mult_array[1, oct],
mult_array[2, oct], mult_array[3, oct], 0)
self.light[x, y] = self.flag
return self.light

View File

@ -1,59 +0,0 @@
from typing import Dict, List, Union
import numpy as np
from environments.factory.base.objects import Agent, Entity, Action
from environments.factory.factory_dirt import DirtFactory
from environments.factory.additional.dirt.dirt_collections import DirtPiles
from environments.factory.additional.dirt.dirt_entity import DirtPile
from environments.factory.base.objects import Floor
from environments.factory.base.registers import Floors, Entities, EntityCollection
class Machines(EntityCollection):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
class Machine(Entity):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
class StationaryMachinesDirtFactory(DirtFactory):
def __init__(self, *args, **kwargs):
self._machine_coords = [(6, 6), (12, 13)]
super().__init__(*args, **kwargs)
def entities_hook(self) -> Dict[(str, Entities)]:
super_entities = super().entities_hook()
return super_entities
def reset_hook(self) -> None:
pass
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
pass
def actions_hook(self) -> Union[Action, List[Action]]:
pass
def step_hook(self) -> (List[dict], dict):
pass
def per_agent_raw_observations_hook(self, agent) -> Dict[str, np.typing.ArrayLike]:
super_per_agent_raw_observations = super().per_agent_raw_observations_hook(agent)
return super_per_agent_raw_observations
def per_agent_reward_hook(self, agent: Agent) -> List[dict]:
return super(StationaryMachinesDirtFactory, self).per_agent_reward_hook(agent)
def pre_step_hook(self) -> None:
pass
def post_step_hook(self) -> dict:
pass

View File

@ -1,30 +0,0 @@
parse_doors: True
doors_have_area: True
done_at_collision: False
level_name: "rooms"
mv_prop:
allow_diagonal_movement: True
allow_square_movement: True
allow_no_op: False
dirt_prop:
initial_dirt_ratio: 0.35
initial_dirt_spawn_r_var : 0.1
clean_amount: 0.34
max_spawn_amount: 0.1
max_global_amount: 20
max_local_amount: 1
spawn_frequency: 0
max_spawn_ratio: 0.05
dirt_smear_amount: 0.0
done_when_clean: True
rewards_base:
MOVEMENTS_VALID: 0
MOVEMENTS_FAIL: 0
NOOP: 0
USE_DOOR_VALID: 0
USE_DOOR_FAIL: 0
COLLISION: 0
rewards_dirt:
CLEAN_UP_VALID: 1
CLEAN_UP_FAIL: 0
CLEAN_UP_LAST_PIECE: 5

View File

@ -1,106 +0,0 @@
from typing import NamedTuple, Union
import gym
from gym.wrappers.frame_stack import FrameStack
class EnvCombiner(object):
def __init__(self, *envs_cls):
self._env_dict = {env_cls.__name__: env_cls for env_cls in envs_cls}
@staticmethod
def combine_cls(name, *envs_cls):
return type(name,envs_cls,{})
def build(self):
name = f'{"".join([x.lower().replace("factory").capitalize() for x in self._env_dict.keys()])}Factory'
return self.combine_cls(name, tuple(self._env_dict.values()))
class AgentRenderOptions(object):
"""
Class that specifies the available options for the way agents are represented in the env observation.
SEPERATE:
Each agent is represented in a seperate slice as Constant.OCCUPIED_CELL value (one hot)
COMBINED:
For all agent, value of Constant.OCCUPIED_CELL is added to a zero-value slice at the agents position (sum(SEPERATE))
LEVEL:
The combined slice is added to the LEVEL-slice. (Agents appear as obstacle / wall)
NOT:
The position of individual agents can not be read from the observation.
"""
SEPERATE = 'seperate'
COMBINED = 'combined'
LEVEL = 'lvl'
NOT = 'not'
class MovementProperties(NamedTuple):
"""
Property holder; for setting multiple related parameters through a single parameter. Comes with default values.
"""
"""Allow the manhattan style movement on a grid (move to cells that are connected by square edges)."""
allow_square_movement: bool = True
"""Allow diagonal movement on the grid (move to cells that are connected by square corners)."""
allow_diagonal_movement: bool = False
"""Allow the agent to just do nothing; not move (NO-OP)."""
allow_no_op: bool = False
class ObservationProperties(NamedTuple):
"""
Property holder; for setting multiple related parameters through a single parameter. Comes with default values.
"""
"""How to represent agents in the observation space. This may also alter the obs-shape."""
render_agents: AgentRenderOptions = AgentRenderOptions.SEPERATE
"""Obserations are build per agent; whether the current agent should be represented in its own observation."""
omit_agent_self: bool = True
"""Their might be the case you want to modify the agents obs-space, so that it can be used with additional obs.
The additional slice can be filled with any number"""
additional_agent_placeholder: Union[None, str, int] = None
"""Whether to cast shadows (make floortiles and items hidden).; """
cast_shadows: bool = True
"""Frame Stacking is a methode do give some temporal information to the agents.
This paramters controls how many "old-frames" """
frames_to_stack: int = 0
"""Specifies the radius (_r) of the agents field of view. Please note, that the agents grid cellis not taken
accountance for. This means, that the resulting field of view diameter = `pomdp_r * 2 + 1`.
A 'pomdp_r' of 0 always returns the full env == no partial observability."""
pomdp_r: int = 2
"""Whether to place a visual encoding on walkable tiles around the doors. This is helpfull when the doors can be
operated from their surrounding area. So the agent can more easily get a notion of where to choose the door option.
However, this is not necesarry at all.
"""
indicate_door_area: bool = False
"""Whether to add the agents normalized global position as float values (2,1) to a seperate information slice.
More optional informations are to come.
"""
show_global_position_info: bool = False
class MarlFrameStack(gym.ObservationWrapper):
"""todo @romue404"""
def __init__(self, env):
super().__init__(env)
def observation(self, observation):
if isinstance(self.env, FrameStack) and self.env.unwrapped.n_agents > 1:
return observation[0:].swapaxes(0, 1)
return observation

View File

@ -0,0 +1,11 @@
TEMPLATE = '#' # TEMPLATE _identifier. Define your own!
# Movements
NORTH = 'north'
EAST = 'east'
SOUTH = 'south'
WEST = 'west'
NORTHEAST = 'north_east'
SOUTHEAST = 'south_east'
SOUTHWEST = 'south_west'
NORTHWEST = 'north_west'

View File

@ -0,0 +1,24 @@
from typing import List
from environment.rules import Rule
from environment.utils.results import TickResult, DoneResult
class TemplateRule(Rule):
def __init__(self, *args, **kwargs):
super(TemplateRule, self).__init__(*args, **kwargs)
def on_init(self, state):
pass
def tick_pre_step(self, state) -> List[TickResult]:
pass
def tick_step(self, state) -> List[TickResult]:
pass
def tick_post_step(self, state) -> List[TickResult]:
pass
def on_check_done(self, state) -> List[DoneResult]:
pass

View File

@ -0,0 +1,26 @@
from typing import Union
from environment.actions import Action
from environment.utils.results import ActionResult
from modules.batteries import constants as b, rewards as r
from environment import constants as c
class BtryCharge(Action):
def __init__(self):
super().__init__(b.CHARGE)
def do(self, entity, state) -> Union[None, ActionResult]:
if charge_pod := state[b.CHARGE_PODS].by_pos(entity.pos):
valid = charge_pod.charge_battery(state[b.BATTERIES].by_entity(entity))
if valid:
state.print(f'{entity.name} just charged batteries at {charge_pod.name}.')
else:
state.print(f'{entity.name} failed to charged batteries at {charge_pod.name}.')
else:
valid = c.NOT_VALID
state.print(f'{entity.name} failed to charged batteries at {entity.pos}.')
return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
reward=r.CHARGE_VALID if valid else r.CHARGE_FAIL)

View File

@ -0,0 +1,19 @@
from typing import NamedTuple, Union
# Battery Env
CHARGE_PODS = 'ChargePods'
BATTERIES = 'Batteries'
BATTERY_DISCHARGED = 'DISCHARGED'
CHARGE_POD_SYMBOL = 1
CHARGE = 'do_charge_action'
class BatteryProperties(NamedTuple):
initial_charge: float = 0.8 #
charge_rate: float = 0.4 #
charge_locations: int = 20 #
per_action_costs: Union[dict, float] = 0.02
done_when_discharged: bool = False
multi_charge: bool = False

View File

@ -1,21 +1,31 @@
from environments import helpers as h
from environments.factory.base.objects import BoundingMixin, EnvObject, Entity
from environments.factory.additional.btry.btry_util import Constants as c
from environment.entity.mixin import BoundEntityMixin
from environment.entity.object import EnvObject
from environment.entity.entity import Entity
from environment import constants as c
from environment.utils.render import RenderEntity
from modules.batteries import constants as b
class Battery(BoundingMixin, EnvObject):
class Battery(BoundEntityMixin, EnvObject):
@property
def is_discharged(self):
return self.charge_level == 0
def __init__(self, initial_charge_level: float, *args, **kwargs):
super(Battery, self).__init__(*args, **kwargs)
self.charge_level = initial_charge_level
@property
def obs_tag(self):
return self.name
@property
def encoding(self):
return self.charge_level
def __init__(self, initial_charge_level: float, owner: Entity, *args, **kwargs):
super(Battery, self).__init__(*args, **kwargs)
self.charge_level = initial_charge_level
self.bind_to(owner)
def do_charge_action(self, amount):
if self.charge_level < 1:
# noinspection PyTypeChecker
@ -24,11 +34,10 @@ class Battery(BoundingMixin, EnvObject):
else:
return c.NOT_VALID
def decharge(self, amount) -> c:
def decharge(self, amount) -> float:
if self.charge_level != 0:
# noinspection PyTypeChecker
self.charge_level = max(0, amount + self.charge_level)
self._collection.notify_change_to_value(self)
return c.VALID
else:
return c.NOT_VALID
@ -38,12 +47,15 @@ class Battery(BoundingMixin, EnvObject):
attr_dict.update(dict(name=self.name, belongs_to=self._bound_entity.name))
return attr_dict
def render(self):
return None
class ChargePod(Entity):
@property
def encoding(self):
return c.CHARGE_POD
return b.CHARGE_POD_SYMBOL
def __init__(self, *args, charge_rate: float = 0.4,
multi_charge: bool = False, **kwargs):
@ -58,3 +70,6 @@ class ChargePod(Entity):
return c.NOT_VALID
valid = battery.do_charge_action(self.charge_rate)
return valid
def render(self):
return RenderEntity(b.CHARGE_PODS, self.pos)

View File

@ -0,0 +1,36 @@
from environment.groups.env_objects import EnvObjects
from environment.groups.mixins import PositionMixin, HasBoundedMixin
from modules.batteries.entitites import ChargePod, Battery
class Batteries(HasBoundedMixin, EnvObjects):
_entity = Battery
is_blocking_light: bool = False
can_collide: bool = False
@property
def obs_tag(self):
return self.__class__.__name__
@property
def obs_pairs(self):
return [(x.name, x) for x in self]
def __init__(self, *args, **kwargs):
super(Batteries, self).__init__(*args, **kwargs)
def spawn_batteries(self, agents, initial_charge_level):
batteries = [self._entity(initial_charge_level, agent) for _, agent in enumerate(agents)]
self.add_items(batteries)
class ChargePods(PositionMixin, EnvObjects):
_entity = ChargePod
def __init__(self, *args, **kwargs):
super(ChargePods, self).__init__(*args, **kwargs)
def __repr__(self):
return super(ChargePods, self).__repr__()

View File

@ -0,0 +1,3 @@
CHARGE_VALID: float = 0.1
CHARGE_FAIL: float = -0.1
BATTERY_DISCHARGED: float = -1.0

View File

@ -0,0 +1,61 @@
from typing import List, Union
from environment.rules import Rule
from environment.utils.results import TickResult, DoneResult
from environment import constants as c
from modules.batteries import constants as b, rewards as r
class Btry(Rule):
def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02):
super().__init__()
self.per_action_costs = per_action_costs
self.initial_charge = initial_charge
def on_init(self, state):
state[b.BATTERIES].spawn_batteries(state[c.AGENT], self.initial_charge)
def tick_pre_step(self, state) -> List[TickResult]:
pass
def tick_step(self, state) -> List[TickResult]:
# Decharge
batteries = state[b.BATTERIES]
results = []
for agent in state[c.AGENT]:
if isinstance(self.per_action_costs, dict):
energy_consumption = self.per_action_costs[agent.step_result()['action']]
else:
energy_consumption = self.per_action_costs
batteries.by_entity(agent).decharge(energy_consumption)
results.append(TickResult(self.name, reward=0, entity=agent, validity=c.VALID))
return results
def tick_post_step(self, state) -> List[TickResult]:
results = []
for btry in state[b.BATTERIES]:
if btry.is_discharged:
state.print(f'Battery of {btry.bound_entity.name} is discharged!')
results.append(
TickResult(self.name, entity=btry.bound_entity, reward=r.BATTERY_DISCHARGED, validity=c.VALID))
else:
pass
return results
class BtryDoneAtDischarge(Rule):
def __init__(self):
super().__init__()
def on_check_done(self, state) -> List[DoneResult]:
if btry_done := any(battery.is_discharged for battery in state[b.BATTERIES]):
return [DoneResult(self.name, validity=c.VALID, reward=r.BATTERY_DISCHARGED)]
else:
return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]

View File

@ -0,0 +1,36 @@
from typing import Union
from environment.actions import Action
from environment.utils.results import ActionResult
from modules.clean_up import constants as d, rewards as r
from environment import constants as c
class CleanUp(Action):
def __init__(self):
super().__init__(d.CLEAN_UP)
def do(self, entity, state) -> Union[None, ActionResult]:
if dirt := state[d.DIRT].by_pos(entity.pos):
new_dirt_amount = dirt.amount - state[d.DIRT].clean_amount
if new_dirt_amount <= 0:
state[d.DIRT].delete_env_object(dirt)
else:
dirt.set_new_amount(max(new_dirt_amount, c.VALUE_FREE_CELL))
valid = c.VALID
print_str = f'{entity.name} did just clean up some dirt at {entity.pos}.'
state.print(print_str)
reward = r.CLEAN_UP_VALID
identifier = d.CLEAN_UP
else:
valid = c.NOT_VALID
print_str = f'{entity.name} just tried to clean up some dirt at {entity.pos}, but failed.'
state.print(print_str)
reward = r.CLEAN_UP_FAIL
identifier = d.CLEAN_UP_FAIL
return ActionResult(identifier=identifier, validity=valid, reward=reward, entity=entity)

View File

@ -0,0 +1,7 @@
DIRT = 'DirtPiles'
CLEAN_UP = 'do_cleanup_action'
CLEAN_UP_VALID = 'clean_up_valid'
CLEAN_UP_FAIL = 'clean_up_fail'
CLEAN_UP_ALL = 'all_cleaned_up'

View File

Before

Width:  |  Height:  |  Size: 38 KiB

After

Width:  |  Height:  |  Size: 38 KiB

Some files were not shown because too many files have changed in this diff Show More