major redesign ob observations and entittes
@ -1,100 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from networkx.algorithms.approximation import traveling_salesman as tsp
|
||||
|
||||
from environments.factory.base.objects import Agent
|
||||
from environments.helpers import points_to_graph
|
||||
from environments import helpers as h
|
||||
|
||||
from environments.helpers import Constants as BaseConstants
|
||||
from environments.helpers import EnvActions as BaseActions
|
||||
|
||||
|
||||
class Constants(BaseConstants):
|
||||
DIRT = 'DirtPile'
|
||||
|
||||
|
||||
class Actions(BaseActions):
|
||||
CLEAN_UP = 'do_cleanup_action'
|
||||
|
||||
|
||||
a = Actions
|
||||
c = Constants
|
||||
|
||||
future_planning = 7
|
||||
|
||||
|
||||
class TSPDirtAgent(Agent):
|
||||
|
||||
def __init__(self, env, *args,
|
||||
static_problem: bool = True, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.static_problem = static_problem
|
||||
self.local_optimization = True
|
||||
self._env = env
|
||||
self._floortile_graph = points_to_graph(self._env[c.FLOOR].positions,
|
||||
allow_euclidean_connections=self._env._actions.allow_diagonal_movement,
|
||||
allow_manhattan_connections=self._env._actions.allow_square_movement)
|
||||
self._static_route = None
|
||||
|
||||
def predict(self, *_, **__):
|
||||
if self._env[c.DIRT].by_pos(self.pos) is not None:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = a.CLEAN_UP
|
||||
elif any('door' in x.name.lower() for x in self.tile.guests):
|
||||
door = next(x for x in self.tile.guests if 'door' in x.name.lower())
|
||||
if door.is_closed:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = h.EnvActions.USE_DOOR
|
||||
else:
|
||||
action = self._predict_move()
|
||||
else:
|
||||
action = self._predict_move()
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action_obj = next(action_i for action_name, action_i in self._env.named_action_space.items() if action_name == action)
|
||||
return action_obj
|
||||
|
||||
def _predict_move(self):
|
||||
if len(self._env[c.DIRT]) >= 1:
|
||||
if self.static_problem:
|
||||
if not self._static_route:
|
||||
self._static_route = self.calculate_tsp_route()
|
||||
else:
|
||||
pass
|
||||
next_pos = self._static_route.pop(0)
|
||||
while next_pos == self.pos:
|
||||
next_pos = self._static_route.pop(0)
|
||||
else:
|
||||
if not self._static_route:
|
||||
self._static_route = self.calculate_tsp_route()[:7]
|
||||
next_pos = self._static_route.pop(0)
|
||||
while next_pos == self.pos:
|
||||
next_pos = self._static_route.pop(0)
|
||||
|
||||
diff = np.subtract(next_pos, self.pos)
|
||||
# Retrieve action based on the pos dif (like in: What do i have to do to get there?)
|
||||
try:
|
||||
action = next(action for action, pos_diff in h.ACTIONMAP.items()
|
||||
if (diff == pos_diff).all())
|
||||
except StopIteration:
|
||||
print('This Should not happen!')
|
||||
else:
|
||||
action = int(np.random.randint(self._env.action_space.n))
|
||||
return action
|
||||
|
||||
def calculate_tsp_route(self):
|
||||
if self.local_optimization:
|
||||
nodes = \
|
||||
[self.pos] + \
|
||||
[x for x in self._env[c.DIRT].positions if max(abs(np.subtract(x, self.pos))) < 3]
|
||||
try:
|
||||
while len(nodes) < 7:
|
||||
nodes += [next(x for x in self._env[c.DIRT].positions if x not in nodes)]
|
||||
except StopIteration:
|
||||
nodes = [self.pos] + self._env[c.DIRT].positions
|
||||
|
||||
else:
|
||||
nodes = [self.pos] + self._env[c.DIRT].positions
|
||||
route = tsp.traveling_salesman_problem(self._floortile_graph,
|
||||
nodes=nodes, cycle=True, method=tsp.greedy_tsp)
|
||||
return route
|
@ -1,5 +1,5 @@
|
||||
import torch
|
||||
from typing import Union, List
|
||||
from typing import Union, List, Dict
|
||||
import numpy as np
|
||||
from torch.distributions import Categorical
|
||||
from algorithms.marl.memory import MARLActorCriticMemory
|
||||
@ -74,7 +74,7 @@ class BaseActorCritic:
|
||||
actions = [Categorical(logits=logits).sample().item() for logits in out[nms.LOGITS]]
|
||||
return actions
|
||||
|
||||
def init_hidden(self) -> dict[ListOrTensor]:
|
||||
def init_hidden(self) -> Dict[str, ListOrTensor]:
|
||||
pass
|
||||
|
||||
def forward(self,
|
||||
@ -82,7 +82,7 @@ class BaseActorCritic:
|
||||
actions: ListOrTensor,
|
||||
hidden_actor: ListOrTensor,
|
||||
hidden_critic: ListOrTensor
|
||||
) -> dict[ListOrTensor]:
|
||||
) -> Dict[str, ListOrTensor]:
|
||||
pass
|
||||
|
||||
@torch.no_grad()
|
||||
|
@ -39,7 +39,7 @@ class LoopIAC(BaseActorCritic):
|
||||
def forward(self, observations, actions, hidden_actor, hidden_critic):
|
||||
outputs = [
|
||||
net(
|
||||
self._as_torch(observations[ag_i]).unsqueeze(0).unsqueeze(0), # agents x time
|
||||
self._as_torch(observations[ag_i]).unsqueeze(0).unsqueeze(0), # agent x time
|
||||
self._as_torch(actions[ag_i]).unsqueeze(0),
|
||||
hidden_actor[ag_i],
|
||||
hidden_critic[ag_i]
|
||||
|
@ -46,7 +46,7 @@ class LoopMAPPO(LoopSNAC):
|
||||
|
||||
# monte carlo returns
|
||||
mc_returns = self.monte_carlo_returns(batch[nms.REWARD], batch[nms.DONE], gamma)
|
||||
mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) #todo: norm across agents ok?
|
||||
mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) #todo: norm across agent ok?
|
||||
advantages = mc_returns - out[nms.CRITIC][:, :-1]
|
||||
|
||||
# policy loss
|
||||
|
@ -120,7 +120,7 @@ class MARLActorCriticMemory(object):
|
||||
|
||||
def __getattr__(self, attr):
|
||||
all_attrs = [getattr(mem, attr) for mem in self.memories]
|
||||
return torch.cat(all_attrs, 0) # agents x time ...
|
||||
return torch.cat(all_attrs, 0) # agent x time ...
|
||||
|
||||
def chunk_dataloader(self, chunk_len, k):
|
||||
datasets = [ExperienceChunks(mem, chunk_len, k) for mem in self.memories]
|
||||
|
95
algorithms/static/TSP_base_agent.py
Normal file
@ -0,0 +1,95 @@
|
||||
from random import choice
|
||||
|
||||
import numpy as np
|
||||
|
||||
from networkx.algorithms.approximation import traveling_salesman as tsp
|
||||
|
||||
from environment.utils.helpers import points_to_graph
|
||||
|
||||
from modules.doors import constants as do
|
||||
from environment import constants as c
|
||||
from environment.utils.helpers import MOVEMAP
|
||||
|
||||
from abc import abstractmethod, ABC
|
||||
|
||||
future_planning = 7
|
||||
|
||||
|
||||
class TSPBaseAgent(ABC):
|
||||
|
||||
def __init__(self, state, agent_i, static_problem: bool = True):
|
||||
self.static_problem = static_problem
|
||||
self.local_optimization = True
|
||||
self._env = state
|
||||
self.state = self._env.state[c.AGENT][agent_i]
|
||||
self._floortile_graph = points_to_graph(self._env[c.FLOOR].positions)
|
||||
self._static_route = None
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, *_, **__) -> int:
|
||||
return 0
|
||||
|
||||
def _use_door_or_move(self, door, target):
|
||||
if door.is_closed:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = do.ACTION_DOOR_USE
|
||||
else:
|
||||
action = self._predict_move(target)
|
||||
return action
|
||||
|
||||
def calculate_tsp_route(self, target_identifier):
|
||||
positions = [x for x in self._env.state[target_identifier].positions if x != c.VALUE_NO_POS]
|
||||
if self.local_optimization:
|
||||
nodes = \
|
||||
[self.state.pos] + \
|
||||
[x for x in positions if max(abs(np.subtract(x, self.state.pos))) < 3]
|
||||
try:
|
||||
while len(nodes) < 7:
|
||||
nodes += [next(x for x in positions if x not in nodes)]
|
||||
except StopIteration:
|
||||
nodes = [self.state.pos] + positions
|
||||
|
||||
else:
|
||||
nodes = [self.state.pos] + positions
|
||||
route = tsp.traveling_salesman_problem(self._floortile_graph,
|
||||
nodes=nodes, cycle=True, method=tsp.greedy_tsp)
|
||||
return route
|
||||
|
||||
def _door_is_close(self):
|
||||
try:
|
||||
return next(y for x in self.state.tile.neighboring_floor for y in x.guests if do.DOOR in y.name)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def _has_targets(self, target_identifier):
|
||||
return bool(len([x for x in self._env.state[target_identifier] if x.pos != c.VALUE_NO_POS]) >= 1)
|
||||
|
||||
def _predict_move(self, target_identifier):
|
||||
if self._has_targets(target_identifier):
|
||||
if self.static_problem:
|
||||
if not self._static_route:
|
||||
self._static_route = self.calculate_tsp_route(target_identifier)
|
||||
else:
|
||||
pass
|
||||
next_pos = self._static_route.pop(0)
|
||||
while next_pos == self.state.pos:
|
||||
next_pos = self._static_route.pop(0)
|
||||
else:
|
||||
if not self._static_route:
|
||||
self._static_route = self.calculate_tsp_route(target_identifier)[:7]
|
||||
next_pos = self._static_route.pop(0)
|
||||
while next_pos == self.state.pos:
|
||||
next_pos = self._static_route.pop(0)
|
||||
|
||||
diff = np.subtract(next_pos, self.state.pos)
|
||||
# Retrieve action based on the pos dif (like in: What do I have to do to get there?)
|
||||
try:
|
||||
action = next(action for action, pos_diff in MOVEMAP.items() if np.all(diff == pos_diff))
|
||||
except StopIteration:
|
||||
print(f'diff: {diff}')
|
||||
print('This Should not happen!')
|
||||
action = choice(self.state.actions).name
|
||||
else:
|
||||
action = choice(self.state.actions).name
|
||||
# noinspection PyUnboundLocalVariable
|
||||
return action
|
27
algorithms/static/TSP_dirt_agent.py
Normal file
@ -0,0 +1,27 @@
|
||||
from algorithms.static.TSP_base_agent import TSPBaseAgent
|
||||
|
||||
from modules.clean_up import constants as di
|
||||
|
||||
future_planning = 7
|
||||
|
||||
|
||||
class TSPDirtAgent(TSPBaseAgent):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(TSPDirtAgent, self).__init__(*args, **kwargs)
|
||||
|
||||
def predict(self, *_, **__):
|
||||
if self._env.state[di.DIRT].by_pos(self.state.pos) is not None:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = di.CLEAN_UP
|
||||
elif door := self._door_is_close():
|
||||
action = self._use_door_or_move(door, di.DIRT)
|
||||
else:
|
||||
action = self._predict_move(di.DIRT)
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
try:
|
||||
action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
|
||||
except (StopIteration, UnboundLocalError):
|
||||
print('Will not happen')
|
||||
raise EnvironmentError
|
||||
return action_obj
|
59
algorithms/static/TSP_item_agent.py
Normal file
@ -0,0 +1,59 @@
|
||||
import numpy as np
|
||||
|
||||
from algorithms.static.TSP_base_agent import TSPBaseAgent
|
||||
|
||||
from modules.items import constants as i
|
||||
|
||||
future_planning = 7
|
||||
inventory_size = 3
|
||||
|
||||
MODE_GET = 'Mode_Get'
|
||||
MODE_BRING = 'Mode_Bring'
|
||||
|
||||
|
||||
class TSPItemAgent(TSPBaseAgent):
|
||||
|
||||
def __init__(self, *args, mode=MODE_GET, **kwargs):
|
||||
super(TSPItemAgent, self).__init__(*args, **kwargs)
|
||||
self.mode = mode
|
||||
|
||||
def predict(self, *_, **__):
|
||||
if self._env.state[i.ITEM].by_pos(self.state.pos) is not None:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = i.ITEM_ACTION
|
||||
elif self._env.state[i.DROP_OFF].by_pos(self.state.pos) is not None:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = i.ITEM_ACTION
|
||||
elif door := self._door_is_close():
|
||||
action = self._use_door_or_move(door, i.DROP_OFF if self.mode == MODE_BRING else i.ITEM)
|
||||
else:
|
||||
action = self._choose()
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
try:
|
||||
action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
|
||||
except (StopIteration, UnboundLocalError):
|
||||
print('Will not happen')
|
||||
raise EnvironmentError
|
||||
# noinspection PyUnboundLocalVariable
|
||||
if self.mode == MODE_BRING and len(self._env[i.INVENTORY].by_entity(self.state)):
|
||||
pass
|
||||
elif self.mode == MODE_BRING and not len(self._env[i.INVENTORY].by_entity(self.state)):
|
||||
self.mode = MODE_GET
|
||||
elif self.mode == MODE_GET and len(self._env[i.INVENTORY].by_entity(self.state)) > inventory_size:
|
||||
self.mode = MODE_BRING
|
||||
else:
|
||||
pass
|
||||
return action_obj
|
||||
|
||||
def _choose(self):
|
||||
target = i.DROP_OFF if self.mode == MODE_BRING else i.ITEM
|
||||
if len(self._env.state[i.ITEM]) >= 1:
|
||||
action = self._predict_move(target)
|
||||
|
||||
elif len(self._env[i.INVENTORY].by_entity(self.state)):
|
||||
self.mode = MODE_BRING
|
||||
action = self._predict_move(target)
|
||||
else:
|
||||
action = int(np.random.randint(self._env.action_space.n))
|
||||
# noinspection PyUnboundLocalVariable
|
||||
return action
|
32
algorithms/static/TSP_target_agent.py
Normal file
@ -0,0 +1,32 @@
|
||||
from algorithms.static.TSP_base_agent import TSPBaseAgent
|
||||
|
||||
from modules.destinations import constants as d
|
||||
from modules.doors import constants as do
|
||||
|
||||
future_planning = 7
|
||||
|
||||
|
||||
class TSPTargetAgent(TSPBaseAgent):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(TSPTargetAgent, self).__init__(*args, **kwargs)
|
||||
|
||||
def _handle_doors(self):
|
||||
|
||||
try:
|
||||
return next(y for x in self.state.tile.neighboring_floor for y in x.guests if do.DOOR in y.name)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def predict(self, *_, **__):
|
||||
if door := self._door_is_close():
|
||||
action = self._use_door_or_move(door, d.DESTINATION)
|
||||
else:
|
||||
action = self._predict_move(d.DESTINATION)
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
try:
|
||||
action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
|
||||
except (StopIteration, UnboundLocalError):
|
||||
print('Will not happen')
|
||||
return action_obj
|
||||
|
15
algorithms/static/random_agent.py
Normal file
@ -0,0 +1,15 @@
|
||||
from random import randint
|
||||
|
||||
from algorithms.static.TSP_base_agent import TSPBaseAgent
|
||||
|
||||
future_planning = 7
|
||||
|
||||
|
||||
class TSPRandomAgent(TSPBaseAgent):
|
||||
|
||||
def __init__(self, n_actions, *args, **kwargs):
|
||||
super(TSPRandomAgent, self).__init__(*args, **kwargs)
|
||||
self.n_action = n_actions
|
||||
|
||||
def predict(self, *_, **__):
|
||||
return randint(0, self.n_action - 1)
|
@ -1,4 +1,3 @@
|
||||
import re
|
||||
import torch
|
||||
import numpy as np
|
||||
import yaml
|
||||
|
101
environment/actions.py
Normal file
@ -0,0 +1,101 @@
|
||||
import abc
|
||||
from typing import Union
|
||||
|
||||
from environment import rewards as r
|
||||
from environment import constants as c
|
||||
from environment.utils.helpers import MOVEMAP
|
||||
from environment.utils.results import ActionResult
|
||||
|
||||
|
||||
class Action(abc.ABC):
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self._identifier
|
||||
|
||||
@abc.abstractmethod
|
||||
def __init__(self, identifier: str):
|
||||
self._identifier = identifier
|
||||
|
||||
@abc.abstractmethod
|
||||
def do(self, entity, state) -> Union[None, ActionResult]:
|
||||
return
|
||||
|
||||
def __repr__(self):
|
||||
return f'Action[{self._identifier}]'
|
||||
|
||||
|
||||
class Noop(Action):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(c.NOOP)
|
||||
|
||||
def do(self, entity, *_) -> Union[None, ActionResult]:
|
||||
return ActionResult(identifier=self._identifier, validity=c.VALID,
|
||||
reward=r.NOOP, entity=entity)
|
||||
|
||||
|
||||
class Move(Action, abc.ABC):
|
||||
|
||||
@abc.abstractmethod
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def do(self, entity, env):
|
||||
new_pos = self._calc_new_pos(entity.pos)
|
||||
if next_tile := env[c.FLOOR].by_pos(new_pos):
|
||||
# noinspection PyUnresolvedReferences
|
||||
valid = entity.move(next_tile)
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
reward = r.MOVEMENTS_VALID if valid else r.MOVEMENTS_FAIL
|
||||
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, reward=reward)
|
||||
|
||||
def _calc_new_pos(self, pos):
|
||||
x_diff, y_diff = MOVEMAP[self._identifier]
|
||||
return pos[0] + x_diff, pos[1] + y_diff
|
||||
|
||||
|
||||
class North(Move):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(c.NORTH, *args, **kwargs)
|
||||
|
||||
|
||||
class NorthEast(Move):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(c.NORTHEAST, *args, **kwargs)
|
||||
|
||||
|
||||
class East(Move):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(c.EAST, *args, **kwargs)
|
||||
|
||||
|
||||
class SouthEast(Move):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(c.SOUTHEAST, *args, **kwargs)
|
||||
|
||||
|
||||
class South(Move):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(c.SOUTH, *args, **kwargs)
|
||||
|
||||
|
||||
class SouthWest(Move):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(c.SOUTHWEST, *args, **kwargs)
|
||||
|
||||
|
||||
class West(Move):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(c.WEST, *args, **kwargs)
|
||||
|
||||
|
||||
class NorthWest(Move):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(c.NORTHWEST, *args, **kwargs)
|
||||
|
||||
|
||||
Move4 = [North, East, South, West]
|
||||
# noinspection PyTypeChecker
|
||||
Move8 = Move4 + [NorthEast, SouthEast, SouthWest, NorthWest]
|
Before Width: | Height: | Size: 8.3 KiB After Width: | Height: | Size: 8.3 KiB |
Before Width: | Height: | Size: 3.3 KiB After Width: | Height: | Size: 3.3 KiB |
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 18 KiB |
Before Width: | Height: | Size: 1.6 KiB After Width: | Height: | Size: 1.6 KiB |
Before Width: | Height: | Size: 1.6 KiB After Width: | Height: | Size: 1.6 KiB |
Before Width: | Height: | Size: 5.8 KiB After Width: | Height: | Size: 5.8 KiB |
Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 5.6 KiB |
Before Width: | Height: | Size: 1.4 KiB After Width: | Height: | Size: 1.4 KiB |
60
environment/constants.py
Normal file
@ -0,0 +1,60 @@
|
||||
# Names
|
||||
DANGER_ZONE = 'x' # Dange Zone tile _identifier for resolving the string based map files.
|
||||
DEFAULTS = 'Defaults'
|
||||
SELF = 'Self'
|
||||
PLACEHOLDER = 'Placeholder'
|
||||
FLOOR = 'Floor' # Identifier of Floor-objects and groups (groups).
|
||||
FLOORS = 'Floors' # Identifier of Floor-objects and groups (groups).
|
||||
WALL = 'Wall' # Identifier of Wall-objects and groups (groups).
|
||||
WALLS = 'Walls' # Identifier of Wall-objects and groups (groups).
|
||||
LEVEL = 'Level' # Identifier of Level-objects and groups (groups).
|
||||
AGENT = 'Agent' # Identifier of Agent-objects and groups (groups).
|
||||
AGENTS = 'Agents' # Identifier of Agent-objects and groups (groups).
|
||||
OTHERS = 'Other'
|
||||
COMBINED = 'Combined'
|
||||
GLOBAL_POSITION = 'GLOBAL_POSITION' # Identifier of the global position slice
|
||||
|
||||
|
||||
# Attributes
|
||||
IS_BLOCKING_LIGHT = 'is_blocking_light'
|
||||
HAS_POSITION = 'has_position'
|
||||
HAS_NO_POSITION = 'has_no_position'
|
||||
ALL = 'All'
|
||||
|
||||
# Symbols (Read from map-files)
|
||||
SYMBOL_WALL = '#'
|
||||
SYMBOL_FLOOR = '-'
|
||||
|
||||
VALID = True # Identifier to rename boolean values in the context of actions.
|
||||
NOT_VALID = False # Identifier to rename boolean values in the context of actions.
|
||||
VALUE_FREE_CELL = 0 # Free-Cell value used in observation
|
||||
VALUE_OCCUPIED_CELL = 1 # Occupied-Cell value used in observation
|
||||
VALUE_NO_POS = (-9999, -9999) # Invalid Position value used in the environment (smth. is off-grid)
|
||||
|
||||
|
||||
ACTION = 'action' # Identifier of Action-objects and groups (groups).
|
||||
COLLISION = 'Collision' # Identifier to use in the context of collitions.
|
||||
LAST_POS = 'LAST_POS' # Identifiert for retrieving an enitites last pos.
|
||||
VALIDITY = 'VALIDITY' # Identifiert for retrieving the Validity of Action, Tick, etc. ...
|
||||
|
||||
# Actions
|
||||
# Movements
|
||||
NORTH = 'north'
|
||||
EAST = 'east'
|
||||
SOUTH = 'south'
|
||||
WEST = 'west'
|
||||
NORTHEAST = 'north_east'
|
||||
SOUTHEAST = 'south_east'
|
||||
SOUTHWEST = 'south_west'
|
||||
NORTHWEST = 'north_west'
|
||||
|
||||
# Move Groups
|
||||
MOVE8 = 'Move8'
|
||||
MOVE4 = 'Move4'
|
||||
|
||||
# No-Action / Wait
|
||||
NOOP = 'Noop'
|
||||
|
||||
# Result Identifier
|
||||
MOVEMENTS_VALID = 'motion_valid'
|
||||
MOVEMENTS_FAIL = 'motion_not_valid'
|
76
environment/entity/agent.py
Normal file
@ -0,0 +1,76 @@
|
||||
from typing import List, Union
|
||||
|
||||
from environment import constants as c
|
||||
from environment.actions import Action
|
||||
from environment.entity.entity import Entity
|
||||
from environment.utils.render import RenderEntity
|
||||
from environment.utils import renderer
|
||||
from environment.utils.helpers import is_move
|
||||
from environment.utils.results import ActionResult, Result
|
||||
|
||||
|
||||
class Agent(Entity):
|
||||
|
||||
@property
|
||||
def obs_tag(self):
|
||||
return self.name
|
||||
|
||||
@property
|
||||
def actions(self):
|
||||
return self._actions
|
||||
|
||||
@property
|
||||
def observations(self):
|
||||
return self._observations
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return True
|
||||
|
||||
def step_result(self):
|
||||
pass
|
||||
|
||||
@property
|
||||
def collection(self):
|
||||
return self._collection
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
return self._state or ActionResult(entity=self, identifier=c.NOOP, validity=c.VALID, reward=0)
|
||||
|
||||
def __init__(self, actions: List[Action], observations: List[str], *args, **kwargs):
|
||||
super(Agent, self).__init__(*args, **kwargs)
|
||||
self.step_result = dict()
|
||||
self._actions = actions
|
||||
self._observations = observations
|
||||
self._state: Union[Result, None] = None
|
||||
|
||||
# noinspection PyAttributeOutsideInit
|
||||
def clear_temp_state(self):
|
||||
self._state = None
|
||||
return self
|
||||
|
||||
def summarize_state(self):
|
||||
state_dict = super().summarize_state()
|
||||
state_dict.update(valid=bool(self.state.validity), action=str(self.state.identifier))
|
||||
return state_dict
|
||||
|
||||
def set_state(self, action_result):
|
||||
self._state = action_result
|
||||
|
||||
def render(self):
|
||||
i = next(idx for idx, x in enumerate(self._collection) if x.name == self.name)
|
||||
curr_state = self.state
|
||||
if curr_state.identifier == c.COLLISION:
|
||||
render_state = renderer.STATE_COLLISION
|
||||
elif curr_state.validity:
|
||||
if curr_state.identifier == c.NOOP:
|
||||
render_state = renderer.STATE_IDLE
|
||||
elif is_move(curr_state.identifier):
|
||||
render_state = renderer.STATE_MOVE
|
||||
else:
|
||||
render_state = renderer.STATE_VALID
|
||||
else:
|
||||
render_state = renderer.STATE_INVALID
|
||||
|
||||
return RenderEntity(c.AGENT, self.pos, 1, 'none', render_state, i + 1, real_name=self.name)
|
79
environment/entity/entity.py
Normal file
@ -0,0 +1,79 @@
|
||||
import abc
|
||||
|
||||
from environment import constants as c
|
||||
from environment.entity.object import EnvObject
|
||||
from environment.utils.render import RenderEntity
|
||||
|
||||
|
||||
class Entity(EnvObject, abc.ABC):
|
||||
"""Full Env Entity that lives on the env Grid. Doors, Items, DirtPile etc..."""
|
||||
|
||||
@property
|
||||
def has_position(self):
|
||||
return self.pos != c.VALUE_NO_POS
|
||||
|
||||
@property
|
||||
def x(self):
|
||||
return self.pos[0]
|
||||
|
||||
@property
|
||||
def y(self):
|
||||
return self.pos[1]
|
||||
|
||||
@property
|
||||
def pos(self):
|
||||
return self._tile.pos
|
||||
|
||||
@property
|
||||
def tile(self):
|
||||
return self._tile
|
||||
|
||||
@property
|
||||
def last_tile(self):
|
||||
try:
|
||||
return self._last_tile
|
||||
except AttributeError:
|
||||
# noinspection PyAttributeOutsideInit
|
||||
self._last_tile = None
|
||||
return self._last_tile
|
||||
|
||||
@property
|
||||
def last_pos(self):
|
||||
try:
|
||||
return self.last_tile.pos
|
||||
except AttributeError:
|
||||
return c.VALUE_NO_POS
|
||||
|
||||
@property
|
||||
def direction_of_view(self):
|
||||
last_x, last_y = self.last_pos
|
||||
curr_x, curr_y = self.pos
|
||||
return last_x - curr_x, last_y - curr_y
|
||||
|
||||
def move(self, next_tile):
|
||||
curr_tile = self.tile
|
||||
if not_same_tile := curr_tile != next_tile:
|
||||
if valid := next_tile.enter(self):
|
||||
curr_tile.leave(self)
|
||||
self._tile = next_tile
|
||||
self._last_tile = curr_tile
|
||||
for observer in self.observers:
|
||||
observer.notify_change_pos(self)
|
||||
return valid
|
||||
return not_same_tile
|
||||
|
||||
def __init__(self, tile, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self._tile = tile
|
||||
tile.enter(self)
|
||||
|
||||
def summarize_state(self) -> dict:
|
||||
return dict(name=str(self.name), x=int(self.x), y=int(self.y),
|
||||
tile=str(self.tile.name), can_collide=bool(self.can_collide))
|
||||
|
||||
@abc.abstractmethod
|
||||
def render(self):
|
||||
return RenderEntity(self.__class__.__name__.lower(), self.pos)
|
||||
|
||||
def __repr__(self):
|
||||
return super(Entity, self).__repr__() + f'(@{self.pos})'
|
18
environment/entity/mixin.py
Normal file
@ -0,0 +1,18 @@
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit
|
||||
class BoundEntityMixin:
|
||||
|
||||
@property
|
||||
def bound_entity(self):
|
||||
return self._bound_entity
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return f'{self.__class__.__name__}({self._bound_entity.name})'
|
||||
|
||||
def belongs_to_entity(self, entity):
|
||||
return entity == self.bound_entity
|
||||
|
||||
def bind_to(self, entity):
|
||||
self._bound_entity = entity
|
127
environment/entity/object.py
Normal file
@ -0,0 +1,127 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import defaultdict
|
||||
from typing import Union
|
||||
|
||||
from environment import constants as c
|
||||
|
||||
|
||||
class Object:
|
||||
|
||||
"""Generell Objects for Organisation and Maintanance such as Actions etc..."""
|
||||
|
||||
_u_idx = defaultdict(lambda: 0)
|
||||
|
||||
def __bool__(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def observers(self):
|
||||
return self._observers
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
if self._str_ident is not None:
|
||||
return f'{self.__class__.__name__}[{self._str_ident}]'
|
||||
return f'{self.__class__.__name__}#{self.identifier_int}'
|
||||
|
||||
@property
|
||||
def identifier(self):
|
||||
if self._str_ident is not None:
|
||||
return self._str_ident
|
||||
else:
|
||||
return self.name
|
||||
|
||||
def __init__(self, str_ident: Union[str, None] = None, **kwargs):
|
||||
self._observers = []
|
||||
self._str_ident = str_ident
|
||||
self.identifier_int = self._identify_and_count_up()
|
||||
self._collection = None
|
||||
|
||||
if kwargs:
|
||||
print(f'Following kwargs were passed, but ignored: {kwargs}')
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.name}'
|
||||
|
||||
def __eq__(self, other) -> bool:
|
||||
return other == self.identifier
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.identifier)
|
||||
|
||||
def _identify_and_count_up(self):
|
||||
idx = Object._u_idx[self.__class__.__name__]
|
||||
Object._u_idx[self.__class__.__name__] += 1
|
||||
return idx
|
||||
|
||||
def set_collection(self, collection):
|
||||
self._collection = collection
|
||||
|
||||
def add_observer(self, observer):
|
||||
self.observers.append(observer)
|
||||
observer.notify_change_pos(self)
|
||||
|
||||
def del_observer(self, observer):
|
||||
self.observers.remove(observer)
|
||||
|
||||
|
||||
class EnvObject(Object):
|
||||
|
||||
"""Objects that hold Information that are observable, but have no position on the env grid. Inventories etc..."""
|
||||
|
||||
_u_idx = defaultdict(lambda: 0)
|
||||
|
||||
@property
|
||||
def obs_tag(self):
|
||||
try:
|
||||
return self._collection.name or self.name
|
||||
except AttributeError:
|
||||
return self.name
|
||||
|
||||
@property
|
||||
def is_blocking_light(self):
|
||||
try:
|
||||
return self._collection.is_blocking_light or False
|
||||
except AttributeError:
|
||||
return False
|
||||
|
||||
@property
|
||||
def can_move(self):
|
||||
try:
|
||||
return self._collection.can_move or False
|
||||
except AttributeError:
|
||||
return False
|
||||
|
||||
@property
|
||||
def is_blocking_pos(self):
|
||||
try:
|
||||
return self._collection.is_blocking_pos or False
|
||||
except AttributeError:
|
||||
return False
|
||||
|
||||
@property
|
||||
def has_position(self):
|
||||
try:
|
||||
return self._collection.has_position or False
|
||||
except AttributeError:
|
||||
return False
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
try:
|
||||
return self._collection.can_collide or False
|
||||
except AttributeError:
|
||||
return False
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return c.VALUE_OCCUPIED_CELL
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(EnvObject, self).__init__(**kwargs)
|
||||
|
||||
def change_parent_collection(self, other_collection):
|
||||
other_collection.add_item(self)
|
||||
self._collection.delete_env_object(self)
|
||||
self._collection = other_collection
|
||||
return self._collection == other_collection
|
45
environment/entity/util.py
Normal file
@ -0,0 +1,45 @@
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environment.entity.mixin import BoundEntityMixin
|
||||
from environment.entity.object import Object, EnvObject
|
||||
|
||||
|
||||
##########################################################################
|
||||
# ####################### Objects and Entitys ########################## #
|
||||
##########################################################################
|
||||
|
||||
|
||||
class PlaceHolder(Object):
|
||||
|
||||
def __init__(self, *args, fill_value=0, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._fill_value = fill_value
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return self._fill_value
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return "PlaceHolder"
|
||||
|
||||
|
||||
class GlobalPosition(BoundEntityMixin, EnvObject):
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
if self._normalized:
|
||||
return tuple(np.divide(self._bound_entity.pos, self._level_shape))
|
||||
else:
|
||||
return self.bound_entity.pos
|
||||
|
||||
def __init__(self, *args, normalized: bool = True, **kwargs):
|
||||
super(GlobalPosition, self).__init__(*args, **kwargs)
|
||||
self._level_shape = math.sqrt(self.size)
|
||||
self._normalized = normalized
|
131
environment/entity/wall_floor.py
Normal file
@ -0,0 +1,131 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environment import constants as c
|
||||
from environment.entity.object import EnvObject
|
||||
from environment.utils.render import RenderEntity
|
||||
from environment.utils import helpers as h
|
||||
|
||||
|
||||
class Floor(EnvObject):
|
||||
|
||||
@property
|
||||
def has_position(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def can_move(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def is_blocking_pos(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def is_blocking_light(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def neighboring_floor_pos(self):
|
||||
return [x.pos for x in self.neighboring_floor]
|
||||
|
||||
@property
|
||||
def neighboring_floor(self):
|
||||
if self._neighboring_floor:
|
||||
pass
|
||||
else:
|
||||
self._neighboring_floor = [x for x in [self._collection.by_pos(np.add(self.pos, pos))
|
||||
for pos in h.POS_MASK.reshape(-1, 2)
|
||||
if not np.all(pos == [0, 0])]
|
||||
if x]
|
||||
return self._neighboring_floor
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return c.VALUE_OCCUPIED_CELL
|
||||
|
||||
@property
|
||||
def guests_that_can_collide(self):
|
||||
return [x for x in self.guests if x.can_collide]
|
||||
|
||||
@property
|
||||
def guests(self):
|
||||
return self._guests.values()
|
||||
|
||||
@property
|
||||
def x(self):
|
||||
return self.pos[0]
|
||||
|
||||
@property
|
||||
def y(self):
|
||||
return self.pos[1]
|
||||
|
||||
@property
|
||||
def is_blocked(self):
|
||||
return any([x.is_blocking_pos for x in self.guests])
|
||||
|
||||
def __init__(self, pos, **kwargs):
|
||||
super(Floor, self).__init__(**kwargs)
|
||||
self._guests = dict()
|
||||
self.pos = tuple(pos)
|
||||
self._neighboring_floor: List[Floor] = list()
|
||||
self._blocked_by = None
|
||||
|
||||
def __len__(self):
|
||||
return len(self._guests)
|
||||
|
||||
def is_empty(self):
|
||||
return not len(self._guests)
|
||||
|
||||
def is_occupied(self):
|
||||
return bool(len(self._guests))
|
||||
|
||||
def enter(self, guest):
|
||||
if (guest.name not in self._guests and not self.is_blocked) and not (guest.is_blocking_pos and self.is_occupied()):
|
||||
self._guests.update({guest.name: guest})
|
||||
return c.VALID
|
||||
else:
|
||||
return c.NOT_VALID
|
||||
|
||||
def leave(self, guest):
|
||||
try:
|
||||
del self._guests[guest.name]
|
||||
except (ValueError, KeyError):
|
||||
return c.NOT_VALID
|
||||
return c.VALID
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.name}(@{self.pos})'
|
||||
|
||||
def summarize_state(self, **_):
|
||||
return dict(name=self.name, x=int(self.x), y=int(self.y))
|
||||
|
||||
def render(self):
|
||||
return None
|
||||
|
||||
|
||||
class Wall(Floor):
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return c.VALUE_OCCUPIED_CELL
|
||||
|
||||
def render(self):
|
||||
return RenderEntity(c.WALL, self.pos)
|
||||
|
||||
@property
|
||||
def is_blocking_pos(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def is_blocking_light(self):
|
||||
return True
|
201
environment/factory.py
Normal file
@ -0,0 +1,201 @@
|
||||
import shutil
|
||||
|
||||
from collections import defaultdict
|
||||
from itertools import chain
|
||||
from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
import gymnasium as gym
|
||||
|
||||
from environment.utils.level_parser import LevelParser
|
||||
from environment.utils.observation_builder import OBSBuilder
|
||||
from environment.utils.config_parser import FactoryConfigParser
|
||||
from environment.utils import helpers as h
|
||||
import environment.constants as c
|
||||
|
||||
from environment.utils.states import Gamestate
|
||||
|
||||
REC_TAC = 'rec_'
|
||||
|
||||
|
||||
class BaseFactory(gym.Env):
|
||||
|
||||
@property
|
||||
def action_space(self):
|
||||
return self.state[c.AGENT].action_space
|
||||
|
||||
@property
|
||||
def named_action_space(self):
|
||||
return self.state[c.AGENT].named_action_space
|
||||
|
||||
@property
|
||||
def observation_space(self):
|
||||
return self.obs_builder.observation_space(self.state)
|
||||
|
||||
@property
|
||||
def named_observation_space(self):
|
||||
return self.obs_builder.named_observation_space(self.state)
|
||||
|
||||
@property
|
||||
def params(self) -> dict:
|
||||
import yaml
|
||||
config_path = Path(self._config_file)
|
||||
config_dict = yaml.safe_load(config_path.open())
|
||||
return config_dict
|
||||
|
||||
@property
|
||||
def summarize_header(self):
|
||||
summary_dict = self._summarize_state(stateless_entities=True)
|
||||
return summary_dict
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
def __init__(self, config_file: Union[str, PathLike]):
|
||||
self._config_file = config_file
|
||||
self.conf = FactoryConfigParser(self._config_file)
|
||||
# Attribute Assignment
|
||||
self.level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.conf.level_name}.txt'
|
||||
self._renderer = None # expensive - don't use it when not required !
|
||||
|
||||
parsed_entities = self.conf.load_entities()
|
||||
self.map = LevelParser(self.level_filepath, parsed_entities, self.conf.pomdp_r)
|
||||
|
||||
# Init for later usage:
|
||||
self.state: Gamestate
|
||||
self.map: LevelParser
|
||||
self.obs_builder: OBSBuilder
|
||||
|
||||
# TODO: Reset ---> document this
|
||||
self.reset()
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.state.entities[item]
|
||||
|
||||
def reset(self) -> (dict, dict):
|
||||
self.state = None
|
||||
|
||||
# Init entity:
|
||||
entities = self.map.do_init()
|
||||
|
||||
# Grab all rules:
|
||||
rules = self.conf.load_rules()
|
||||
|
||||
# Agents
|
||||
# noinspection PyAttributeOutsideInit
|
||||
self.state = Gamestate(entities, rules, self.conf.env_seed)
|
||||
|
||||
agents = self.conf.load_agents(self.map.size, self[c.FLOOR].empty_tiles)
|
||||
self.state.entities.add_item({c.AGENT: agents})
|
||||
|
||||
# All is set up, trigger additional init (after agent entity spawn etc)
|
||||
self.state.rules.do_all_init(self.state)
|
||||
|
||||
# Observations
|
||||
# noinspection PyAttributeOutsideInit
|
||||
self.obs_builder = OBSBuilder(self.map.level_shape, self.state, self.map.pomdp_r)
|
||||
return self.obs_builder.refresh_and_build_for_all(self.state)
|
||||
|
||||
def step(self, actions):
|
||||
|
||||
if not isinstance(actions, list):
|
||||
actions = [int(actions)]
|
||||
|
||||
# Apply rules, do actions, tick the state, etc...
|
||||
tick_result = self.state.tick(actions)
|
||||
|
||||
# Check Done Conditions
|
||||
done_results = self.state.check_done()
|
||||
|
||||
# Finalize
|
||||
reward, reward_info, done = self.summarize_step_results(tick_result, done_results)
|
||||
|
||||
info = reward_info
|
||||
|
||||
info.update(step_reward=sum(reward), step=self.state.curr_step)
|
||||
# TODO:
|
||||
# if self._record_episodes:
|
||||
# info.update(self._summarize_state())
|
||||
|
||||
obs, reset_info = self.obs_builder.refresh_and_build_for_all(self.state)
|
||||
info.update(reset_info)
|
||||
return None, [x for x in obs.values()], reward, done, info
|
||||
|
||||
def summarize_step_results(self, tick_results: list, done_check_results: list) -> (int, dict, bool):
|
||||
# Returns: Reward, Info
|
||||
rewards = defaultdict(lambda: 0.0)
|
||||
|
||||
# Gather per agent env rewards and
|
||||
# Combine Info dicts into a global one
|
||||
combined_info_dict = defaultdict(lambda: 0.0)
|
||||
for result in chain(tick_results, done_check_results):
|
||||
if result.reward is not None:
|
||||
try:
|
||||
rewards[result.entity.name] += result.reward
|
||||
except AttributeError:
|
||||
rewards['global'] += result.reward
|
||||
infos = result.get_infos()
|
||||
for info in infos:
|
||||
assert isinstance(info.value, (float, int))
|
||||
combined_info_dict[info.identifier] += info.value
|
||||
|
||||
# Check Done Rule Results
|
||||
try:
|
||||
done_reason = next(x for x in done_check_results if x.validity)
|
||||
done = True
|
||||
self.state.print(f'Env done, Reason: {done_reason.name}.')
|
||||
except StopIteration:
|
||||
done = False
|
||||
|
||||
if self.conf.individual_rewards:
|
||||
global_rewards = rewards['global']
|
||||
del rewards['global']
|
||||
reward = [rewards[agent.name] for agent in self.state[c.AGENT]]
|
||||
reward = [x + global_rewards for x in reward]
|
||||
self.state.print(f"rewards are {rewards}")
|
||||
return reward, combined_info_dict, done
|
||||
else:
|
||||
reward = sum(rewards.values())
|
||||
self.state.print(f"reward is {reward}")
|
||||
return reward, combined_info_dict, done
|
||||
|
||||
def start_recording(self):
|
||||
self.conf.do_record = True
|
||||
return self.conf.do_record
|
||||
|
||||
def stop_recording(self):
|
||||
self.conf.do_record = False
|
||||
return not self.conf.do_record
|
||||
|
||||
# noinspection PyGlobalUndefined
|
||||
def render(self, mode='human'):
|
||||
if not self._renderer: # lazy init
|
||||
from environment.utils.renderer import Renderer
|
||||
global Renderer
|
||||
self._renderer = Renderer(self.map.level_shape, view_radius=self.conf.pomdp_r, fps=20)
|
||||
|
||||
render_entities = self.state.entities.render()
|
||||
if self.conf.pomdp_r:
|
||||
for render_entity in render_entities:
|
||||
if render_entity.name == c.AGENT:
|
||||
render_entity.aux = self.obs_builder.curr_lightmaps[render_entity.real_name]
|
||||
return self._renderer.render(render_entities)
|
||||
|
||||
def _summarize_state(self, stateless_entities=False):
|
||||
summary = {f'{REC_TAC}step': self.state.curr_step}
|
||||
|
||||
for entity_group in self.state:
|
||||
if entity_group.is_stateless == stateless_entities:
|
||||
summary.update({f'{REC_TAC}{entity_group.name}': entity_group.summarize_states()})
|
||||
return summary
|
||||
|
||||
def print(self, string):
|
||||
if self.conf.verbose:
|
||||
print(string)
|
||||
|
||||
def save_params(self, filepath: Path):
|
||||
# noinspection PyProtectedMember
|
||||
filepath = Path(filepath)
|
||||
filepath.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copyfile(self._config_file, filepath)
|
30
environment/groups/agents.py
Normal file
@ -0,0 +1,30 @@
|
||||
from environment.groups.env_objects import EnvObjects
|
||||
from environment.groups.mixins import PositionMixin
|
||||
from environment.entity.agent import Agent
|
||||
import environment.constants as c
|
||||
|
||||
|
||||
class Agents(PositionMixin, EnvObjects):
|
||||
_entity = Agent
|
||||
is_blocking_light = False
|
||||
can_move = True
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@property
|
||||
def obs_pairs(self):
|
||||
return [(a.name, a) for a in self]
|
||||
|
||||
@property
|
||||
def action_space(self):
|
||||
from gymnasium import spaces
|
||||
space = spaces.Tuple([spaces.Discrete(len(x.actions)) for x in self])
|
||||
return space
|
||||
|
||||
@property
|
||||
def named_action_space(self):
|
||||
named_space = dict()
|
||||
for agent in self:
|
||||
named_space[agent.name] = {action.name: idx for idx, action in enumerate(agent.actions)}
|
||||
return named_space
|
33
environment/groups/env_objects.py
Normal file
@ -0,0 +1,33 @@
|
||||
from environment.groups.objects import Objects
|
||||
from environment.entity.object import EnvObject
|
||||
|
||||
|
||||
class EnvObjects(Objects):
|
||||
|
||||
_entity = EnvObject
|
||||
is_blocking_light: bool = False
|
||||
can_collide: bool = False
|
||||
has_position: bool = False
|
||||
can_move: bool = False
|
||||
|
||||
@property
|
||||
def encodings(self):
|
||||
return [x.encoding for x in self]
|
||||
|
||||
def __init__(self, size, *args, **kwargs):
|
||||
super(EnvObjects, self).__init__(*args, **kwargs)
|
||||
self.size = size
|
||||
|
||||
def add_item(self, item: EnvObject):
|
||||
assert self.has_position or (len(self) <= self.size)
|
||||
super(EnvObjects, self).add_item(item)
|
||||
return self
|
||||
|
||||
def summarize_states(self):
|
||||
return [entity.summarize_state() for entity in self.values()]
|
||||
|
||||
def delete_env_object(self, env_object: EnvObject):
|
||||
del self[env_object.name]
|
||||
|
||||
def delete_env_object_by_name(self, name):
|
||||
del self[name]
|
64
environment/groups/global_entities.py
Normal file
@ -0,0 +1,64 @@
|
||||
from collections import defaultdict
|
||||
from operator import itemgetter
|
||||
from typing import Dict
|
||||
|
||||
from environment.groups.objects import Objects
|
||||
from environment.entity.entity import Entity
|
||||
from environment.utils.helpers import POS_MASK
|
||||
|
||||
|
||||
class Entities(Objects):
|
||||
_entity = Objects
|
||||
|
||||
@staticmethod
|
||||
def neighboring_positions(pos):
|
||||
return (POS_MASK + pos).reshape(-1, 2)
|
||||
|
||||
def get_near_pos(self, pos):
|
||||
return [y for x in itemgetter(*(tuple(x) for x in self.neighboring_positions(pos)))(self.pos_dict) for y in x]
|
||||
|
||||
def render(self):
|
||||
return [y for x in self for y in x.render() if x is not None]
|
||||
|
||||
@property
|
||||
def names(self):
|
||||
return list(self._data.keys())
|
||||
|
||||
def __init__(self):
|
||||
self.pos_dict = defaultdict(list)
|
||||
super().__init__()
|
||||
|
||||
def iter_entities(self):
|
||||
return iter((x for sublist in self.values() for x in sublist))
|
||||
|
||||
def add_items(self, items: Dict):
|
||||
return self.add_item(items)
|
||||
|
||||
def add_item(self, item: dict):
|
||||
assert_str = 'This group of entity has already been added!'
|
||||
assert not any([key for key in item.keys() if key in self.keys()]), assert_str
|
||||
self._data.update(item)
|
||||
for val in item.values():
|
||||
val.add_observer(self)
|
||||
return self
|
||||
|
||||
def __delitem__(self, name):
|
||||
assert_str = 'This group of entity does not exist in this collection!'
|
||||
assert any([key for key in name.keys() if key in self.keys()]), assert_str
|
||||
self[name]._observers.delete(self)
|
||||
for entity in self[name]:
|
||||
entity.del_observer(self)
|
||||
return super(Entities, self).__delitem__(name)
|
||||
|
||||
@property
|
||||
def obs_pairs(self):
|
||||
return [y for x in self for y in x.obs_pairs]
|
||||
|
||||
def by_pos(self, pos: (int, int)):
|
||||
return self.pos_dict[pos]
|
||||
# found_entities = [y for y in (x.by_pos(pos) for x in self.values() if hasattr(x, 'by_pos')) if y is not None]
|
||||
# return found_entities
|
||||
|
||||
@property
|
||||
def positions(self):
|
||||
return [k for k, v in self.pos_dict.items() for _ in v]
|
102
environment/groups/mixins.py
Normal file
@ -0,0 +1,102 @@
|
||||
from abc import ABC
|
||||
from typing import Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environment import constants as c
|
||||
|
||||
from environment.entity.entity import Entity
|
||||
|
||||
|
||||
# noinspection PyUnresolvedReferences,PyTypeChecker,PyArgumentList
|
||||
class PositionMixin:
|
||||
|
||||
_entity = Entity
|
||||
is_blocking_light: bool = True
|
||||
can_collide: bool = True
|
||||
has_position: bool = True
|
||||
|
||||
def render(self):
|
||||
return [y for y in [x.render() for x in self] if y is not None]
|
||||
|
||||
@classmethod
|
||||
def from_tiles(cls, tiles, *args, entity_kwargs=None, **kwargs):
|
||||
collection = cls(*args, **kwargs)
|
||||
entities = [cls._entity(tile, str_ident=i,
|
||||
**entity_kwargs if entity_kwargs is not None else {})
|
||||
for i, tile in enumerate(tiles)]
|
||||
collection.add_items(entities)
|
||||
return collection
|
||||
|
||||
@classmethod
|
||||
def from_coordinates(cls, positions: [(int, int)], tiles, *args, entity_kwargs=None, **kwargs, ):
|
||||
return cls.from_tiles([tiles.by_pos(position) for position in positions], tiles.size, *args,
|
||||
entity_kwargs=entity_kwargs,
|
||||
**kwargs)
|
||||
|
||||
@property
|
||||
def tiles(self):
|
||||
return [entity.tile for entity in self]
|
||||
|
||||
def __delitem__(self, name):
|
||||
idx, obj = next((i, obj) for i, obj in enumerate(self) if obj.name == name)
|
||||
obj.tile.leave(obj)
|
||||
super().__delitem__(name)
|
||||
|
||||
def by_pos(self, pos: (int, int)):
|
||||
pos = tuple(pos)
|
||||
try:
|
||||
return next(e for e in self if e.pos == pos)
|
||||
except StopIteration:
|
||||
pass
|
||||
except ValueError:
|
||||
print()
|
||||
|
||||
@property
|
||||
def positions(self):
|
||||
return [e.pos for e in self]
|
||||
|
||||
def notify_del_entity(self, entity: Entity):
|
||||
try:
|
||||
self.pos_dict[entity.pos].remove(entity)
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
|
||||
|
||||
# noinspection PyUnresolvedReferences,PyTypeChecker
|
||||
class IsBoundMixin:
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return f'{self.__class__.__name__}({self._bound_entity.name})'
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.__class__.__name__}#{self._bound_entity.name}({self._data})'
|
||||
|
||||
def bind(self, entity):
|
||||
# noinspection PyAttributeOutsideInit
|
||||
self._bound_entity = entity
|
||||
return c.VALID
|
||||
|
||||
def belongs_to_entity(self, entity):
|
||||
return self._bound_entity == entity
|
||||
|
||||
|
||||
# noinspection PyUnresolvedReferences,PyTypeChecker
|
||||
class HasBoundedMixin:
|
||||
|
||||
@property
|
||||
def obs_names(self):
|
||||
return [x.name for x in self]
|
||||
|
||||
def by_entity(self, entity):
|
||||
try:
|
||||
return next((x for x in self if x.belongs_to_entity(entity)))
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def idx_by_entity(self, entity):
|
||||
try:
|
||||
return next((idx for idx, x in enumerate(self) if x.belongs_to_entity(entity)))
|
||||
except StopIteration:
|
||||
return None
|
141
environment/groups/objects.py
Normal file
@ -0,0 +1,141 @@
|
||||
from collections import defaultdict
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environment.entity.object import Object
|
||||
|
||||
|
||||
class Objects:
|
||||
_entity = Object
|
||||
|
||||
@property
|
||||
def observers(self):
|
||||
return self._observers
|
||||
|
||||
@property
|
||||
def obs_tag(self):
|
||||
return self.__class__.__name__
|
||||
|
||||
@staticmethod
|
||||
def render():
|
||||
return []
|
||||
|
||||
@property
|
||||
def obs_pairs(self):
|
||||
return [(self.name, self)]
|
||||
|
||||
@property
|
||||
def names(self):
|
||||
# noinspection PyUnresolvedReferences
|
||||
return [x.name for x in self]
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return f'{self.__class__.__name__}'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._data = defaultdict(lambda: None)
|
||||
self._observers = list()
|
||||
self.pos_dict = defaultdict(list)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._data)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.values())
|
||||
|
||||
def add_item(self, item: _entity):
|
||||
assert_str = f'All item names have to be of type {self._entity}, but were {item.__class__}.,'
|
||||
assert isinstance(item, self._entity), assert_str
|
||||
assert self._data[item.name] is None, f'{item.name} allready exists!!!'
|
||||
self._data.update({item.name: item})
|
||||
item.set_collection(self)
|
||||
for observer in self.observers:
|
||||
observer.notify_add_entity(item)
|
||||
return self
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
def del_observer(self, observer):
|
||||
self.observers.remove(observer)
|
||||
for entity in self:
|
||||
if observer in entity.observers:
|
||||
entity.del_observer(observer)
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
def add_observer(self, observer):
|
||||
self.observers.append(observer)
|
||||
for entity in self:
|
||||
if observer not in entity.observers:
|
||||
entity.add_observer(observer)
|
||||
|
||||
def __delitem__(self, name):
|
||||
for observer in self.observers:
|
||||
observer.notify_del_entity(name)
|
||||
# noinspection PyTypeChecker
|
||||
del self._data[name]
|
||||
|
||||
def add_items(self, items: List[_entity]):
|
||||
for item in items:
|
||||
self.add_item(item)
|
||||
return self
|
||||
|
||||
def keys(self):
|
||||
return self._data.keys()
|
||||
|
||||
def values(self):
|
||||
return self._data.values()
|
||||
|
||||
def items(self):
|
||||
return self._data.items()
|
||||
|
||||
def _get_index(self, item):
|
||||
try:
|
||||
return next(i for i, v in enumerate(self._data.values()) if v == item)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, (int, np.int64, np.int32)):
|
||||
if item < 0:
|
||||
item = len(self._data) - abs(item)
|
||||
try:
|
||||
return next(v for i, v in enumerate(self._data.values()) if i == item)
|
||||
except StopIteration:
|
||||
return None
|
||||
try:
|
||||
return self._data[item]
|
||||
except KeyError:
|
||||
return None
|
||||
except TypeError:
|
||||
print('Ups')
|
||||
raise TypeError
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.__class__.__name__}[{dict(self._data)}]'
|
||||
|
||||
def notify_change_pos(self, entity: object):
|
||||
try:
|
||||
self.pos_dict[entity.last_pos].remove(entity)
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
if entity.has_position:
|
||||
try:
|
||||
self.pos_dict[entity.pos].append(entity)
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
|
||||
def notify_del_entity(self, entity: Object):
|
||||
try:
|
||||
self.pos_dict[entity.pos].remove(entity)
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
|
||||
def notify_add_entity(self, entity: Object):
|
||||
try:
|
||||
entity.add_observer(self)
|
||||
self.pos_dict[entity.pos].append(entity)
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
78
environment/groups/utils.py
Normal file
@ -0,0 +1,78 @@
|
||||
import numbers
|
||||
from typing import List, Union, Dict
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environment.groups.env_objects import EnvObjects
|
||||
from environment.groups.objects import Objects
|
||||
from environment.groups.mixins import HasBoundedMixin, PositionMixin
|
||||
from environment.entity.util import PlaceHolder, GlobalPosition
|
||||
from environment.utils import helpers as h
|
||||
from environment import constants as c
|
||||
|
||||
|
||||
class Combined(PositionMixin, EnvObjects):
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return f'{super().name}({self._ident or self._names})'
|
||||
|
||||
@property
|
||||
def names(self):
|
||||
return self._names
|
||||
|
||||
def __init__(self, names: List[str], *args, identifier: Union[None, str] = None, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._ident = identifier
|
||||
self._names = names or list()
|
||||
|
||||
@property
|
||||
def obs_tag(self):
|
||||
return self.name
|
||||
|
||||
@property
|
||||
def obs_pairs(self):
|
||||
return [(name, None) for name in self.names]
|
||||
|
||||
|
||||
class GlobalPositions(HasBoundedMixin, EnvObjects):
|
||||
|
||||
_entity = GlobalPosition
|
||||
is_blocking_light = False,
|
||||
can_collide = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(GlobalPositions, self).__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class Zones(Objects):
|
||||
|
||||
@property
|
||||
def accounting_zones(self):
|
||||
return [self[idx] for idx, name in self.items() if name != c.DANGER_ZONE]
|
||||
|
||||
def __init__(self, parsed_level):
|
||||
raise NotImplementedError('This needs a Rework')
|
||||
super(Zones, self).__init__()
|
||||
slices = list()
|
||||
self._accounting_zones = list()
|
||||
self._danger_zones = list()
|
||||
for symbol in np.unique(parsed_level):
|
||||
if symbol == c.VALUE_OCCUPIED_CELL:
|
||||
continue
|
||||
elif symbol == c.DANGER_ZONE:
|
||||
self + symbol
|
||||
slices.append(h.one_hot_level(parsed_level, symbol))
|
||||
self._danger_zones.append(symbol)
|
||||
else:
|
||||
self + symbol
|
||||
slices.append(h.one_hot_level(parsed_level, symbol))
|
||||
self._accounting_zones.append(symbol)
|
||||
|
||||
self._zone_slices = np.stack(slices)
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self._zone_slices[item]
|
||||
|
||||
def add_items(self, other: Union[str, List[str]]):
|
||||
raise AttributeError('You are not allowed to add additional Zones in runtime.')
|
56
environment/groups/wall_n_floors.py
Normal file
@ -0,0 +1,56 @@
|
||||
import random
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environment import constants as c
|
||||
from environment.groups.env_objects import EnvObjects
|
||||
from environment.groups.mixins import PositionMixin
|
||||
from environment.entity.wall_floor import Wall, Floor
|
||||
|
||||
|
||||
class Walls(PositionMixin, EnvObjects):
|
||||
_entity = Wall
|
||||
symbol = c.SYMBOL_WALL
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Walls, self).__init__(*args, **kwargs)
|
||||
self._value = c.VALUE_OCCUPIED_CELL
|
||||
|
||||
@classmethod
|
||||
def from_coordinates(cls, argwhere_coordinates, *args, **kwargs):
|
||||
tiles = cls(*args, **kwargs)
|
||||
# noinspection PyTypeChecker
|
||||
tiles.add_items([cls._entity(pos) for pos in argwhere_coordinates])
|
||||
return tiles
|
||||
|
||||
@classmethod
|
||||
def from_tiles(cls, tiles, *args, **kwargs):
|
||||
raise RuntimeError()
|
||||
|
||||
|
||||
class Floors(Walls):
|
||||
_entity = Floor
|
||||
symbol = c.SYMBOL_FLOOR
|
||||
is_blocking_light: bool = False
|
||||
can_collide: bool = False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Floors, self).__init__(*args, **kwargs)
|
||||
self._value = c.VALUE_FREE_CELL
|
||||
|
||||
@property
|
||||
def occupied_tiles(self):
|
||||
tiles = [tile for tile in self if tile.is_occupied()]
|
||||
random.shuffle(tiles)
|
||||
return tiles
|
||||
|
||||
@property
|
||||
def empty_tiles(self) -> List[Floor]:
|
||||
tiles = [tile for tile in self if tile.is_empty()]
|
||||
random.shuffle(tiles)
|
||||
return tiles
|
||||
|
||||
@classmethod
|
||||
def from_tiles(cls, tiles, *args, **kwargs):
|
||||
raise RuntimeError()
|
@ -1,66 +1,50 @@
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Union
|
||||
from typing import Union
|
||||
|
||||
from stable_baselines3.common.callbacks import BaseCallback
|
||||
from gymnasium import Wrapper
|
||||
|
||||
from environments.helpers import IGNORED_DF_COLUMNS
|
||||
from environment.utils.helpers import IGNORED_DF_COLUMNS
|
||||
from environment.factory import REC_TAC
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from plotting.compare_runs import plot_single_run
|
||||
|
||||
|
||||
class EnvMonitor(BaseCallback):
|
||||
class EnvMonitor(Wrapper):
|
||||
|
||||
ext = 'png'
|
||||
|
||||
def __init__(self, env, filepath: Union[str, PathLike] = None):
|
||||
super(EnvMonitor, self).__init__()
|
||||
self.unwrapped = env
|
||||
super(EnvMonitor, self).__init__(env)
|
||||
self._filepath = filepath
|
||||
self._monitor_df = pd.DataFrame()
|
||||
self._monitor_dicts = defaultdict(dict)
|
||||
self._monitor_dict = dict()
|
||||
|
||||
def __getattr__(self, item):
|
||||
return getattr(self.unwrapped, item)
|
||||
|
||||
def step(self, action):
|
||||
obs, reward, done, info = self.unwrapped.step(action)
|
||||
self._read_info(0, info)
|
||||
self._read_done(0, done)
|
||||
return obs, reward, done, info
|
||||
obs_type, obs, reward, done, info = self.env.step(action)
|
||||
self._read_info(info)
|
||||
self._read_done(done)
|
||||
return obs_type, obs, reward, done, info
|
||||
|
||||
def reset(self):
|
||||
return self.unwrapped.reset()
|
||||
|
||||
def _on_training_start(self) -> None:
|
||||
pass
|
||||
|
||||
def _on_training_end(self) -> None:
|
||||
pass
|
||||
|
||||
def _on_step(self, alt_infos: List[Dict] = None, alt_dones: List[bool] = None) -> bool:
|
||||
for env_idx, info in enumerate(self.locals.get('infos', [])):
|
||||
self._read_info(env_idx, info)
|
||||
|
||||
for env_idx, done in list(
|
||||
enumerate(self.locals.get('dones', []))) + list(enumerate(self.locals.get('done', []))):
|
||||
self._read_done(env_idx, done)
|
||||
return True
|
||||
|
||||
def _read_info(self, env_idx, info: dict):
|
||||
self._monitor_dicts[env_idx][len(self._monitor_dicts[env_idx])] = {
|
||||
def _read_info(self, info: dict):
|
||||
self._monitor_dict[len(self._monitor_dict)] = {
|
||||
key: val for key, val in info.items() if
|
||||
key not in ['terminal_observation', 'episode'] and not key.startswith('rec_')}
|
||||
key not in ['terminal_observation', 'episode'] and not key.startswith(REC_TAC)}
|
||||
return
|
||||
|
||||
def _read_done(self, env_idx, done):
|
||||
def _read_done(self, done):
|
||||
if done:
|
||||
env_monitor_df = pd.DataFrame.from_dict(self._monitor_dicts[env_idx], orient='index')
|
||||
self._monitor_dicts[env_idx] = dict()
|
||||
env_monitor_df = pd.DataFrame.from_dict(self._monitor_dict, orient='index')
|
||||
self._monitor_dict = dict()
|
||||
columns = [col for col in env_monitor_df.columns if col not in IGNORED_DF_COLUMNS]
|
||||
env_monitor_df = env_monitor_df.aggregate(
|
||||
{col: 'mean' if col.endswith('ount') else 'sum' for col in columns}
|
@ -4,21 +4,20 @@ from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
from gymnasium import Wrapper
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import simplejson
|
||||
from deepdiff.operator import BaseOperator
|
||||
from stable_baselines3.common.callbacks import BaseCallback
|
||||
|
||||
from environments.factory.base.base_factory import REC_TAC
|
||||
from environment.factory import REC_TAC
|
||||
|
||||
|
||||
class EnvRecorder(BaseCallback):
|
||||
class EnvRecorder(Wrapper):
|
||||
|
||||
def __init__(self, env, entities: str = 'all', filepath: Union[str, PathLike] = None, freq: int = 0):
|
||||
super(EnvRecorder, self).__init__()
|
||||
super(EnvRecorder, self).__init__(env)
|
||||
self.filepath = filepath
|
||||
self.unwrapped = env
|
||||
self.freq = freq
|
||||
self._recorder_dict = defaultdict(list)
|
||||
self._recorder_out_list = list()
|
||||
@ -92,8 +91,8 @@ class EnvRecorder(BaseCallback):
|
||||
out_dict = {'episodes': self._recorder_out_list}
|
||||
out_dict.update(
|
||||
{'n_episodes': self._episode_counter,
|
||||
'env_params': self.unwrapped.params,
|
||||
'header': self.unwrapped.summarize_header
|
||||
'env_params': self.env.params,
|
||||
'header': self.env.summarize_header
|
||||
})
|
||||
try:
|
||||
simplejson.dump(out_dict, f, indent=4)
|
4
environment/rewards.py
Normal file
@ -0,0 +1,4 @@
|
||||
MOVEMENTS_VALID: float = -0.001
|
||||
MOVEMENTS_FAIL: float = -0.05
|
||||
NOOP: float = -0.01
|
||||
COLLISION: float = -0.5
|
83
environment/rules.py
Normal file
@ -0,0 +1,83 @@
|
||||
import abc
|
||||
from typing import Union, List
|
||||
|
||||
from environment.utils.results import Result, TickResult, DoneResult, ActionResult
|
||||
from environment import constants as c
|
||||
from environment import rewards as r
|
||||
|
||||
|
||||
class Rule(abc.ABC):
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.__class__.__name__
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.name}'
|
||||
|
||||
def on_init(self, state):
|
||||
return []
|
||||
|
||||
def on_reset(self):
|
||||
return []
|
||||
|
||||
def tick_pre_step(self, state) -> List[TickResult]:
|
||||
return []
|
||||
|
||||
def tick_step(self, state) -> List[TickResult]:
|
||||
return []
|
||||
|
||||
def tick_post_step(self, state) -> List[TickResult]:
|
||||
return []
|
||||
|
||||
def on_check_done(self, state) -> List[DoneResult]:
|
||||
return []
|
||||
|
||||
|
||||
class MaxStepsReached(Rule):
|
||||
|
||||
def __init__(self, max_steps: int = 500):
|
||||
super().__init__()
|
||||
self.max_steps = max_steps
|
||||
|
||||
def on_init(self, state):
|
||||
pass
|
||||
|
||||
def on_check_done(self, state):
|
||||
if self.max_steps <= state.curr_step:
|
||||
return [DoneResult(validity=c.VALID, identifier=self.name, reward=0)]
|
||||
return [DoneResult(validity=c.NOT_VALID, identifier=self.name, reward=0)]
|
||||
|
||||
|
||||
class Collision(Rule):
|
||||
|
||||
def __init__(self, done_at_collisions: bool = False):
|
||||
super().__init__()
|
||||
self.done_at_collisions = done_at_collisions
|
||||
self.curr_done = False
|
||||
|
||||
def tick_post_step(self, state) -> List[TickResult]:
|
||||
self.curr_done = False
|
||||
tiles_with_collisions = state.get_all_tiles_with_collisions()
|
||||
results = list()
|
||||
for tile in tiles_with_collisions:
|
||||
guests = tile.guests_that_can_collide
|
||||
if len(guests) >= 2:
|
||||
for i, guest in enumerate(guests):
|
||||
try:
|
||||
guest.set_state(TickResult(identifier=c.COLLISION, reward=r.COLLISION,
|
||||
validity=c.NOT_VALID, entity=self))
|
||||
except AttributeError:
|
||||
pass
|
||||
results.append(TickResult(entity=guest, identifier=c.COLLISION,
|
||||
reward=r.COLLISION, validity=c.VALID))
|
||||
self.curr_done = True
|
||||
return results
|
||||
|
||||
def on_check_done(self, state) -> List[DoneResult]:
|
||||
if self.curr_done and self.done_at_collisions:
|
||||
return [DoneResult(validity=c.VALID, identifier=c.COLLISION, reward=r.COLLISION)]
|
||||
return [DoneResult(validity=c.NOT_VALID, identifier=self.name, reward=0)]
|
120
environment/utils/config_parser.py
Normal file
@ -0,0 +1,120 @@
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from environment.groups.global_entities import Entities
|
||||
from environment.groups.agents import Agents
|
||||
from environment.entity.agent import Agent
|
||||
from environment.utils.helpers import locate_and_import_class
|
||||
from environment import constants as c
|
||||
|
||||
|
||||
DEFAULT_PATH = 'environment'
|
||||
MODULE_PATH = 'modules'
|
||||
|
||||
|
||||
class FactoryConfigParser(object):
|
||||
|
||||
default_entites = []
|
||||
default_rules = ['MaxStepsReached', 'Collision']
|
||||
default_actions = [c.MOVE8, c.NOOP]
|
||||
default_observations = [c.WALLS, c.AGENTS]
|
||||
|
||||
def __init__(self, config_path):
|
||||
self.config_path = Path(config_path)
|
||||
self.config = yaml.safe_load(config_path.open())
|
||||
self.do_record = False
|
||||
|
||||
def __getattr__(self, item):
|
||||
return self['General'][item]
|
||||
|
||||
def _get_sub_list(self, primary_key: str, sub_key: str):
|
||||
return [{key: [s for k, v in val.items() if k == sub_key for s in v] for key, val in x.items()
|
||||
} for x in self.config[primary_key]]
|
||||
|
||||
@property
|
||||
def agent_actions(self):
|
||||
return self._get_sub_list('Agents', "Actions")
|
||||
|
||||
@property
|
||||
def agent_observations(self):
|
||||
return self._get_sub_list('Agents', "Observations")
|
||||
|
||||
@property
|
||||
def rules(self):
|
||||
return self.config['Rules']
|
||||
|
||||
@property
|
||||
def agents(self):
|
||||
return self.config['Agents']
|
||||
|
||||
@property
|
||||
def entities(self):
|
||||
return self.config['Entities']
|
||||
|
||||
def __repr__(self):
|
||||
return str(self.config)
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.config[item]
|
||||
|
||||
def load_entities(self):
|
||||
# entites = Entities()
|
||||
entity_classes = dict()
|
||||
entities = []
|
||||
if c.DEFAULTS in self.entities:
|
||||
entities.extend(self.default_entites)
|
||||
entities.extend(x for x in self.entities if x != c.DEFAULTS)
|
||||
|
||||
for entity in entities:
|
||||
folder_path = MODULE_PATH if entity not in self.default_entites else DEFAULT_PATH
|
||||
entity_class = locate_and_import_class(entity, folder_path)
|
||||
entity_kwargs = self.entities.get(entity, {})
|
||||
entity_symbol = entity_class.symbol if hasattr(entity_class, 'symbol') else None
|
||||
entity_classes.update({entity: {'class': entity_class, 'kwargs': entity_kwargs, 'symbol': entity_symbol}})
|
||||
return entity_classes
|
||||
|
||||
def load_agents(self, size, free_tiles):
|
||||
agents = Agents(size)
|
||||
base_env_actions = self.default_actions.copy() + [c.MOVE4]
|
||||
for name in self.agents:
|
||||
# Actions
|
||||
actions = list()
|
||||
if c.DEFAULTS in self.agents[name]['Actions']:
|
||||
actions.extend(self.default_actions)
|
||||
actions.extend(x for x in self.agents[name]['Actions'] if x != c.DEFAULTS)
|
||||
parsed_actions = list()
|
||||
for action in actions:
|
||||
folder_path = MODULE_PATH if action not in base_env_actions else DEFAULT_PATH
|
||||
class_or_classes = locate_and_import_class(action, folder_path)
|
||||
try:
|
||||
parsed_actions.extend(class_or_classes)
|
||||
except TypeError:
|
||||
parsed_actions.append(class_or_classes)
|
||||
parsed_actions = [x() for x in parsed_actions]
|
||||
|
||||
# Observation
|
||||
observations = list()
|
||||
if c.DEFAULTS in self.agents[name]['Observations']:
|
||||
observations.extend(self.default_observations)
|
||||
observations.extend(x for x in self.agents[name]['Observations'] if x != c.DEFAULTS)
|
||||
agent = Agent(parsed_actions, observations, free_tiles.pop(), str_ident=name)
|
||||
agents.add_item(agent)
|
||||
return agents
|
||||
|
||||
def load_rules(self):
|
||||
# entites = Entities()
|
||||
rules_classes = dict()
|
||||
rules = []
|
||||
if c.DEFAULTS in self.rules:
|
||||
for rule in self.default_rules:
|
||||
if rule not in rules:
|
||||
rules.append(rule)
|
||||
rules.extend(x for x in self.rules if x != c.DEFAULTS)
|
||||
|
||||
for rule in rules:
|
||||
folder_path = MODULE_PATH if rule not in self.default_rules else DEFAULT_PATH
|
||||
rule_class = locate_and_import_class(rule, folder_path)
|
||||
rule_kwargs = self.rules.get(rule, {})
|
||||
rules_classes.update({rule: {'class': rule_class, 'kwargs': rule_kwargs}})
|
||||
return rules_classes
|
@ -1,12 +1,14 @@
|
||||
import importlib
|
||||
import itertools
|
||||
from collections import defaultdict
|
||||
from typing import Tuple, Union, Dict, List, NamedTuple
|
||||
from pathlib import PurePath, Path
|
||||
from typing import Union, Dict, List
|
||||
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
from numpy.typing import ArrayLike
|
||||
from stable_baselines3 import PPO, DQN, A2C
|
||||
|
||||
from environment import constants as c
|
||||
|
||||
"""
|
||||
This file is used for:
|
||||
@ -21,10 +23,7 @@ This file is used for:
|
||||
"""
|
||||
|
||||
|
||||
MODEL_MAP = dict(PPO=PPO, DQN=DQN, A2C=A2C) # For use in studies and experiments
|
||||
|
||||
|
||||
LEVELS_DIR = 'levels' # for use in studies and experiments
|
||||
LEVELS_DIR = 'modules/levels' # for use in studies and experiments
|
||||
STEPS_START = 1 # Define where to the stepcount; which is the first step
|
||||
|
||||
# Not used anymore? Clean!
|
||||
@ -37,132 +36,13 @@ POS_MASK = np.asarray([[[-1, -1], [0, -1], [1, -1]],
|
||||
[[-1, 0], [0, 0], [1, 0]],
|
||||
[[-1, 1], [0, 1], [1, 1]]])
|
||||
|
||||
|
||||
class Constants:
|
||||
|
||||
"""
|
||||
String based mapping. Use these to handle keys or define values, which can be then be used globaly.
|
||||
Please use class inheritance when defining new environments.
|
||||
"""
|
||||
|
||||
WALL = '#' # Wall tile identifier for resolving the string based map files.
|
||||
DANGER_ZONE = 'x' # Dange Zone tile identifier for resolving the string based map files.
|
||||
|
||||
WALLS = 'Walls' # Identifier of Wall-objects and sets (collections).
|
||||
FLOOR = 'Floor' # Identifier of Floor-objects and sets (collections).
|
||||
LEVEL = 'Level' # Identifier of Level-objects and sets (collections).
|
||||
AGENT = 'Agent' # Identifier of Agent-objects and sets (collections).
|
||||
AGENT_PLACEHOLDER = 'AGENT_PLACEHOLDER' # Identifier of Placeholder-objects and sets (collections).
|
||||
GLOBAL_POSITION = 'GLOBAL_POSITION' # Identifier of the global position slice
|
||||
|
||||
FREE_CELL = 0 # Free-Cell value used in observation
|
||||
OCCUPIED_CELL = 1 # Occupied-Cell value used in observation
|
||||
SHADOWED_CELL = -1 # Shadowed-Cell value used in observation
|
||||
|
||||
NO_POS = (-9999, -9999) # Invalid Position value used in the environment (something is off-grid)
|
||||
|
||||
ACTION = 'action' # Identifier of Action-objects and sets (collections).
|
||||
COLLISION = 'collision' # Identifier to use in the context of collitions.
|
||||
VALID = True # Identifier to rename boolean values in the context of actions.
|
||||
NOT_VALID = False # Identifier to rename boolean values in the context of actions.
|
||||
|
||||
|
||||
class EnvActions:
|
||||
"""
|
||||
String based mapping. Use these to identifiy actions, can be used globaly.
|
||||
Please use class inheritance when defining new environments with new actions.
|
||||
"""
|
||||
# Movements
|
||||
NORTH = 'north'
|
||||
EAST = 'east'
|
||||
SOUTH = 'south'
|
||||
WEST = 'west'
|
||||
NORTHEAST = 'north_east'
|
||||
SOUTHEAST = 'south_east'
|
||||
SOUTHWEST = 'south_west'
|
||||
NORTHWEST = 'north_west'
|
||||
|
||||
# Other
|
||||
# MOVE = 'move'
|
||||
NOOP = 'no_op'
|
||||
|
||||
_ACTIONMAP = defaultdict(lambda: (0, 0),
|
||||
{NORTH: (-1, 0), NORTHEAST: (-1, 1),
|
||||
EAST: (0, 1), SOUTHEAST: (1, 1),
|
||||
SOUTH: (1, 0), SOUTHWEST: (1, -1),
|
||||
WEST: (0, -1), NORTHWEST: (-1, -1)
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
|
||||
@classmethod
|
||||
def is_move(cls, action):
|
||||
"""
|
||||
Classmethod; checks if given action is a movement action or not. Depending on the env. configuration,
|
||||
Movement actions are either `manhattan` (square) style movements (up,down, left, right) and/or diagonal.
|
||||
|
||||
:param action: Action to be checked
|
||||
:type action: str
|
||||
:return: Whether the given action is a movement action.
|
||||
:rtype: bool
|
||||
"""
|
||||
return any([action == direction for direction in cls.movement_actions()])
|
||||
|
||||
@classmethod
|
||||
def square_move(cls):
|
||||
"""
|
||||
Classmethod; return a list of movement actions that are considered square or `manhattan` style movements.
|
||||
|
||||
:return: A list of movement actions.
|
||||
:rtype: list(str)
|
||||
"""
|
||||
return [cls.NORTH, cls.EAST, cls.SOUTH, cls.WEST]
|
||||
|
||||
@classmethod
|
||||
def diagonal_move(cls):
|
||||
"""
|
||||
Classmethod; return a list of movement actions that are considered diagonal movements.
|
||||
|
||||
:return: A list of movement actions.
|
||||
:rtype: list(str)
|
||||
"""
|
||||
return [cls.NORTHEAST, cls.SOUTHEAST, cls.SOUTHWEST, cls.NORTHWEST]
|
||||
|
||||
@classmethod
|
||||
def movement_actions(cls):
|
||||
"""
|
||||
Classmethod; return a list of all available movement actions.
|
||||
Please note, that this is indipendent from the env. properties
|
||||
|
||||
:return: A list of movement actions.
|
||||
:rtype: list(str)
|
||||
"""
|
||||
return list(itertools.chain(cls.square_move(), cls.diagonal_move()))
|
||||
|
||||
@classmethod
|
||||
def resolve_movement_action_to_coords(cls, action):
|
||||
"""
|
||||
Classmethod; resolve movement actions. Given a movement action, return the delta in coordinates it stands for.
|
||||
How does the current entity coordinate change if it performs the given action?
|
||||
Please note, this is indipendent from the env. properties
|
||||
|
||||
:return: Delta coorinates.
|
||||
:rtype: tuple(int, int)
|
||||
"""
|
||||
return cls._ACTIONMAP[action]
|
||||
|
||||
|
||||
class RewardsBase(NamedTuple):
|
||||
"""
|
||||
Value based mapping. Use these to define reward values for specific conditions (i.e. the action
|
||||
in a given context), can be used globaly.
|
||||
Please use class inheritance when defining new environments with new rewards.
|
||||
"""
|
||||
MOVEMENTS_VALID: float = -0.001
|
||||
MOVEMENTS_FAIL: float = -0.05
|
||||
NOOP: float = -0.01
|
||||
COLLISION: float = -0.5
|
||||
MOVEMAP = defaultdict(lambda: (0, 0),
|
||||
{c.NORTH: (-1, 0), c.NORTHEAST: (-1, 1),
|
||||
c.EAST: (0, 1), c.SOUTHEAST: (1, 1),
|
||||
c.SOUTH: (1, 0), c.SOUTHWEST: (1, -1),
|
||||
c.WEST: (0, -1), c.NORTHWEST: (-1, -1)
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class ObservationTranslator:
|
||||
@ -171,10 +51,10 @@ class ObservationTranslator:
|
||||
*per_agent_named_obs_spaces: Dict[str, dict],
|
||||
placeholder_fill_value: Union[int, str, None] = None):
|
||||
"""
|
||||
This is a helper class, which converts agents observations from joined environments.
|
||||
For example, agents trained in different environments may expect different observations.
|
||||
This is a helper class, which converts agent observations from joined environments.
|
||||
For example, agent trained in different environments may expect different observations.
|
||||
This class translates from larger observations spaces to smaller.
|
||||
A string identifier based approach is used.
|
||||
A string _identifier based approach is used.
|
||||
Currently, it is not possible to mix different obs shapes.
|
||||
|
||||
|
||||
@ -203,7 +83,7 @@ class ObservationTranslator:
|
||||
self._this_named_obs_space = this_named_observation_space
|
||||
self._per_agent_named_obs_space = list(per_agent_named_obs_spaces)
|
||||
|
||||
def translate_observation(self, agent_idx: int, obs: np.ndarray):
|
||||
def translate_observation(self, agent_idx: int, obs):
|
||||
target_obs_space = self._per_agent_named_obs_space[agent_idx]
|
||||
translation = dict()
|
||||
for name, idxs in target_obs_space.items():
|
||||
@ -232,10 +112,10 @@ class ActionTranslator:
|
||||
|
||||
def __init__(self, target_named_action_space: Dict[str, int], *per_agent_named_action_space: Dict[str, int]):
|
||||
"""
|
||||
This is a helper class, which converts agents action spaces to a joined environments action space.
|
||||
For example, agents trained in different environments may have different action spaces.
|
||||
This is a helper class, which converts agent action spaces to a joined environments action space.
|
||||
For example, agent trained in different environments may have different action spaces.
|
||||
This class translates from smaller individual agent action spaces to larger joined spaces.
|
||||
A string identifier based approach is used.
|
||||
A string _identifier based approach is used.
|
||||
|
||||
:param target_named_action_space: Joined `Named action space` for the current environment.
|
||||
:type target_named_action_space: Dict[str, dict]
|
||||
@ -282,14 +162,14 @@ def parse_level(path):
|
||||
return level
|
||||
|
||||
|
||||
def one_hot_level(level, wall_char: str = Constants.WALL):
|
||||
def one_hot_level(level, symbol: str):
|
||||
"""
|
||||
Given a string based level representation (list of lists, see function `parse_level`), this function creates a
|
||||
binary numpy array or `grid`. Grid values that equal `wall_char` become of `Constants.OCCUPIED_CELL` value.
|
||||
Can be changed to filter for any symbol.
|
||||
|
||||
:param level: String based level representation (list of lists, see function `parse_level`).
|
||||
:param wall_char: List[List[str]]
|
||||
:param symbol: List[List[str]]
|
||||
|
||||
:return: Binary numpy array
|
||||
:rtype: np.typing._array_like.ArrayLike
|
||||
@ -297,35 +177,12 @@ def one_hot_level(level, wall_char: str = Constants.WALL):
|
||||
|
||||
grid = np.array(level)
|
||||
binary_grid = np.zeros(grid.shape, dtype=np.int8)
|
||||
binary_grid[grid == wall_char] = Constants.OCCUPIED_CELL
|
||||
binary_grid[grid == symbol] = c.VALUE_OCCUPIED_CELL
|
||||
return binary_grid
|
||||
|
||||
|
||||
def check_position(slice_to_check_against: ArrayLike, position_to_check: Tuple[int, int]):
|
||||
"""
|
||||
Given a slice (2-D Arraylike object)
|
||||
|
||||
:param slice_to_check_against: The slice to check for accessability
|
||||
:type slice_to_check_against: np.typing._array_like.ArrayLike
|
||||
|
||||
:param position_to_check: Position in slice that should be checked. Can be outside of slice boundarys.
|
||||
:type position_to_check: tuple(int, int)
|
||||
|
||||
:return: Whether a position can be moved to.
|
||||
:rtype: bool
|
||||
"""
|
||||
x_pos, y_pos = position_to_check
|
||||
|
||||
# Check if agent colides with grid boundrys
|
||||
valid = not (
|
||||
x_pos < 0 or y_pos < 0
|
||||
or x_pos >= slice_to_check_against.shape[0]
|
||||
or y_pos >= slice_to_check_against.shape[1]
|
||||
)
|
||||
|
||||
# Check for collision with level walls
|
||||
valid = valid and not slice_to_check_against[x_pos, y_pos]
|
||||
return Constants.VALID if valid else Constants.NOT_VALID
|
||||
def is_move(action_name: str):
|
||||
return action_name in MOVEMAP.keys()
|
||||
|
||||
|
||||
def asset_str(agent):
|
||||
@ -339,18 +196,18 @@ def asset_str(agent):
|
||||
action = step_result['action_name']
|
||||
valid = step_result['action_valid']
|
||||
col_names = [x.name for x in step_result['collisions']]
|
||||
if any(Constants.AGENT in name for name in col_names):
|
||||
if any(c.AGENT in name for name in col_names):
|
||||
return 'agent_collision', 'blank'
|
||||
elif not valid or Constants.LEVEL in col_names or Constants.AGENT in col_names:
|
||||
return Constants.AGENT, 'invalid'
|
||||
elif valid and not EnvActions.is_move(action):
|
||||
return Constants.AGENT, 'valid'
|
||||
elif valid and EnvActions.is_move(action):
|
||||
return Constants.AGENT, 'move'
|
||||
elif not valid or c.LEVEL in col_names or c.AGENT in col_names:
|
||||
return c.AGENT, 'invalid'
|
||||
elif valid and not is_move(action):
|
||||
return c.AGENT, 'valid'
|
||||
elif valid and is_move(action):
|
||||
return c.AGENT, 'move'
|
||||
else:
|
||||
return Constants.AGENT, 'idle'
|
||||
return c.AGENT, 'idle'
|
||||
else:
|
||||
return Constants.AGENT, 'idle'
|
||||
return c.AGENT, 'idle'
|
||||
|
||||
|
||||
def points_to_graph(coordiniates_or_tiles, allow_euclidean_connections=True, allow_manhattan_connections=True):
|
||||
@ -386,3 +243,30 @@ def points_to_graph(coordiniates_or_tiles, allow_euclidean_connections=True, all
|
||||
elif allow_manhattan_connections and not allow_euclidean_connections and diff == 1:
|
||||
graph.add_edge(a, b)
|
||||
return graph
|
||||
|
||||
|
||||
def locate_and_import_class(class_name, folder_path: Union[str, PurePath] = ''):
|
||||
"""Locate an object by name or dotted path, importing as necessary."""
|
||||
import sys
|
||||
sys.path.append("..")
|
||||
folder_path = Path(folder_path)
|
||||
module_paths = [x for x in folder_path.rglob('*.py') if x.is_file() and '__init__' not in x.name]
|
||||
# possible_package_path = folder_path / '__init__.py'
|
||||
# package = str(possible_package_path) if possible_package_path.exists() else None
|
||||
all_found_modules = list()
|
||||
for module_path in module_paths:
|
||||
mod = importlib.import_module('.'.join([x.replace('.py', '') for x in module_path.parts]))
|
||||
all_found_modules.extend([x for x in dir(mod) if not(x.startswith('__') or len(x) < 2 or x.isupper())
|
||||
and x not in ['Entity', 'NamedTuple', 'List', 'Rule', 'Union', 'random', 'Floor'
|
||||
'TickResult', 'ActionResult', 'Action', 'Agent', 'deque',
|
||||
'BoundEntityMixin', 'RenderEntity', 'TemplateRule', 'defaultdict',
|
||||
'is_move', 'Objects', 'PositionMixin', 'IsBoundMixin', 'EnvObject',
|
||||
'EnvObjects',]])
|
||||
try:
|
||||
model_class = mod.__getattribute__(class_name)
|
||||
return model_class
|
||||
except AttributeError:
|
||||
continue
|
||||
raise AttributeError(f'Class "{class_name}" was not found!!!"\n'
|
||||
f'Check the {folder_path.name} name.\n'
|
||||
f'Possible Options are:\n{set(all_found_modules)}')
|
55
environment/utils/level_parser.py
Normal file
@ -0,0 +1,55 @@
|
||||
from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environment.groups.global_entities import Entities
|
||||
from environment.groups.wall_n_floors import Walls, Floors
|
||||
from environment.utils import helpers as h
|
||||
from environment import constants as c
|
||||
|
||||
|
||||
class LevelParser(object):
|
||||
|
||||
@property
|
||||
def pomdp_d(self):
|
||||
return self.pomdp_r * 2 + 1
|
||||
|
||||
def __init__(self, level_file_path: PathLike, entity_parse_dict: Dict[Entities, dict], pomdp_r=0):
|
||||
self.pomdp_r = pomdp_r
|
||||
self.e_p_dict = entity_parse_dict
|
||||
self._parsed_level = h.parse_level(Path(level_file_path))
|
||||
level_array = h.one_hot_level(self._parsed_level, c.SYMBOL_WALL)
|
||||
self.level_shape = level_array.shape
|
||||
self.size = self.pomdp_r**2 if self.pomdp_r else np.prod(self.level_shape)
|
||||
|
||||
def do_init(self):
|
||||
entities = Entities()
|
||||
# Walls
|
||||
level_array = h.one_hot_level(self._parsed_level, c.SYMBOL_WALL)
|
||||
|
||||
walls = Walls.from_coordinates(np.argwhere(level_array == c.VALUE_OCCUPIED_CELL), self.size)
|
||||
entities.add_items({c.WALL: walls})
|
||||
|
||||
# Floor
|
||||
floor = Floors.from_coordinates(np.argwhere(level_array == c.VALUE_FREE_CELL), self.size)
|
||||
entities.add_items({c.FLOOR: floor})
|
||||
|
||||
# All other
|
||||
for es_name in self.e_p_dict:
|
||||
e_class, e_kwargs = self.e_p_dict[es_name]['class'], self.e_p_dict[es_name]['kwargs']
|
||||
|
||||
if hasattr(e_class, 'symbol'):
|
||||
level_array = h.one_hot_level(self._parsed_level, symbol=e_class.symbol)
|
||||
if np.any(level_array):
|
||||
e = e_class.from_coordinates(np.argwhere(level_array == c.VALUE_OCCUPIED_CELL).tolist(),
|
||||
entities[c.FLOOR], self.size, entity_kwargs=e_kwargs
|
||||
)
|
||||
else:
|
||||
raise ValueError(f'No {e_class} (Symbol: {e_class.symbol}) could be found!\n'
|
||||
f'Check your level file!')
|
||||
else:
|
||||
e = e_class(self.size, **e_kwargs)
|
||||
entities.add_items({e.name: e})
|
||||
return entities
|
315
environment/utils/observation_builder.py
Normal file
@ -0,0 +1,315 @@
|
||||
import math
|
||||
from collections import defaultdict
|
||||
from itertools import product
|
||||
from typing import Dict, List
|
||||
|
||||
import numpy as np
|
||||
from numba import njit
|
||||
|
||||
from environment.groups.utils import Combined
|
||||
from environment.utils.states import Gamestate
|
||||
|
||||
from environment import constants as c
|
||||
|
||||
|
||||
class OBSBuilder(object):
|
||||
|
||||
default_obs = [c.WALLS, c.OTHERS]
|
||||
|
||||
@property
|
||||
def pomdp_d(self):
|
||||
if self.pomdp_r:
|
||||
return (self.pomdp_r * 2) + 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def __init__(self, level_shape: np.size, state: Gamestate, pomdp_r: int):
|
||||
self.all_obs = dict()
|
||||
self.light_blockers = defaultdict(lambda: False)
|
||||
self.positional = defaultdict(lambda: False)
|
||||
self.non_positional = defaultdict(lambda: False)
|
||||
self.ray_caster = dict()
|
||||
|
||||
self.level_shape = level_shape
|
||||
self.pomdp_r = pomdp_r
|
||||
self.obs_shape = (self.pomdp_d, self.pomdp_d) if self.pomdp_r else self.level_shape
|
||||
self.size = np.prod(self.obs_shape)
|
||||
|
||||
self.obs_layers = dict()
|
||||
|
||||
self.build_structured_obs_block(state)
|
||||
self.curr_lightmaps = dict()
|
||||
|
||||
def build_structured_obs_block(self, state):
|
||||
self.all_obs[c.PLACEHOLDER] = np.full(self.obs_shape, 0, dtype=float)
|
||||
self.all_obs.update({key: obj for key, obj in state.entities.obs_pairs})
|
||||
|
||||
def observation_space(self, state):
|
||||
from gymnasium.spaces import Tuple, Box
|
||||
obsn = self.refresh_and_build_for_all(state)
|
||||
if len(state[c.AGENT]) == 1:
|
||||
space = Box(low=0, high=1, shape=next(x for x in obsn.values()).shape, dtype=np.float32)
|
||||
else:
|
||||
space = Tuple([Box(low=0, high=1, shape=obs.shape, dtype=np.float32) for obs in obsn.values()])
|
||||
return space
|
||||
|
||||
def named_observation_space(self, state):
|
||||
return self.refresh_and_build_for_all(state)
|
||||
|
||||
def refresh_and_build_for_all(self, state) -> (dict, dict):
|
||||
self.build_structured_obs_block(state)
|
||||
info = {}
|
||||
return {agent.name: self.build_for_agent(agent, state)[0] for agent in state[c.AGENT]}, info
|
||||
|
||||
def refresh_and_build_named_for_all(self, state) -> Dict[str, Dict[str, np.ndarray]]:
|
||||
self.build_structured_obs_block(state)
|
||||
named_obs_dict = {}
|
||||
for agent in state[c.AGENT]:
|
||||
obs, names = self.build_for_agent(agent, state)
|
||||
named_obs_dict[agent.name] = {'observation': obs, 'names': names}
|
||||
return named_obs_dict
|
||||
|
||||
def build_for_agent(self, agent, state) -> (List[str], np.ndarray):
|
||||
try:
|
||||
agent_want_obs = self.obs_layers[agent.name]
|
||||
except KeyError:
|
||||
self._sort_and_name_observation_conf(agent)
|
||||
agent_want_obs = self.obs_layers[agent.name]
|
||||
|
||||
# Handle in-grid observations aka visible observations
|
||||
visible_entitites = self.ray_caster[agent.name].visible_entities(state.entities)
|
||||
pre_sort_obs = defaultdict(lambda: np.zeros((self.pomdp_d, self.pomdp_d)))
|
||||
for e in set(visible_entitites):
|
||||
x, y = (e.x - agent.x) + self.pomdp_r, (e.y - agent.y) + self.pomdp_r
|
||||
try:
|
||||
pre_sort_obs[e.obs_tag][x, y] += e.encoding
|
||||
except IndexError:
|
||||
# Seemded to be visible but is out or range
|
||||
pass
|
||||
|
||||
pre_sort_obs = dict(pre_sort_obs)
|
||||
obs = np.zeros((len(agent_want_obs), self.pomdp_d, self.pomdp_d))
|
||||
|
||||
for idx, l_name in enumerate(agent_want_obs):
|
||||
try:
|
||||
obs[idx] = pre_sort_obs[l_name]
|
||||
except KeyError:
|
||||
if c.COMBINED in l_name:
|
||||
if combined := [pre_sort_obs[x] for x in self.all_obs[f'{c.COMBINED}({agent.name})'].names
|
||||
if x in pre_sort_obs]:
|
||||
obs[idx] = np.sum(combined, axis=0)
|
||||
elif l_name == c.PLACEHOLDER:
|
||||
obs[idx] = self.all_obs[c.PLACEHOLDER]
|
||||
else:
|
||||
try:
|
||||
e = self.all_obs[l_name]
|
||||
except KeyError:
|
||||
try:
|
||||
e = self.all_obs[f'{l_name}({agent.name})']
|
||||
except KeyError:
|
||||
try:
|
||||
e = next(x for x in self.all_obs if l_name in x and agent.name in x)
|
||||
except StopIteration:
|
||||
raise KeyError(
|
||||
f'Check typing!\n{l_name} could not be found in:\n{dict(self.all_obs).keys()}')
|
||||
|
||||
try:
|
||||
positional = e.has_position
|
||||
except AttributeError:
|
||||
positional = False
|
||||
if positional:
|
||||
# Seems to be not visible, so just skip it
|
||||
# obs[idx] = np.zeros((self.pomdp_d, self.pomdp_d))
|
||||
# All good
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
v = e.encodings
|
||||
except AttributeError:
|
||||
try:
|
||||
v = e.encoding
|
||||
except AttributeError:
|
||||
raise AttributeError(f'This env. expects Entity-Clases to report their "encoding"')
|
||||
try:
|
||||
np.put(obs[idx], range(len(v)), v, mode='raise')
|
||||
except TypeError:
|
||||
np.put(obs[idx], 0, v, mode='raise')
|
||||
except IndexError:
|
||||
raise ValueError(f'Max(obs.size) for {e.name}: {obs[idx].size}, but was: {len(v)}.')
|
||||
|
||||
try:
|
||||
self.curr_lightmaps[agent.name] = pre_sort_obs[c.FLOORS].astype(bool)
|
||||
except KeyError:
|
||||
print()
|
||||
return obs, self.obs_layers[agent.name]
|
||||
|
||||
def _sort_and_name_observation_conf(self, agent):
|
||||
self.ray_caster[agent.name] = RayCaster(agent, self.pomdp_r)
|
||||
obs_layers = []
|
||||
|
||||
for obs_str in agent.observations:
|
||||
if isinstance(obs_str, dict):
|
||||
obs_str, vals = next(obs_str.items().__iter__())
|
||||
else:
|
||||
vals = None
|
||||
if obs_str == c.SELF:
|
||||
obs_layers.append(agent.name)
|
||||
elif obs_str == c.DEFAULTS:
|
||||
obs_layers.extend(self.default_obs)
|
||||
elif obs_str == c.COMBINED:
|
||||
if isinstance(vals, str):
|
||||
vals = [vals]
|
||||
names = list()
|
||||
for val in vals:
|
||||
if val == c.SELF:
|
||||
names.append(agent.name)
|
||||
elif val == c.OTHERS:
|
||||
names.extend([x.name for x in agent.collection if x.name != agent.name])
|
||||
else:
|
||||
names.append(val)
|
||||
combined = Combined(names, self.pomdp_r, identifier=agent.name)
|
||||
self.all_obs[combined.name] = combined
|
||||
obs_layers.append(combined.name)
|
||||
elif obs_str == c.OTHERS:
|
||||
obs_layers.extend([x for x in self.all_obs if x != agent.name and x.startswith(f'{c.AGENT}[')])
|
||||
elif obs_str == c.AGENTS:
|
||||
obs_layers.extend([x for x in self.all_obs if x.startswith(f'{c.AGENT}[')])
|
||||
else:
|
||||
obs_layers.append(obs_str)
|
||||
self.obs_layers[agent.name] = obs_layers
|
||||
self.curr_lightmaps[agent.name] = np.zeros((self.pomdp_d or self.level_shape[0],
|
||||
self.pomdp_d or self.level_shape[1]
|
||||
))
|
||||
|
||||
|
||||
class RayCaster:
|
||||
def __init__(self, agent, pomdp_r, degs=360):
|
||||
self.agent = agent
|
||||
self.pomdp_r = pomdp_r
|
||||
self.n_rays = 100 # (self.pomdp_r + 1) * 8
|
||||
self.degs = degs
|
||||
self.ray_targets = self.build_ray_targets()
|
||||
self.obs_shape_cube = np.array([self.pomdp_r, self.pomdp_r])
|
||||
|
||||
def build_ray_targets(self):
|
||||
north = np.array([0, -1])*self.pomdp_r
|
||||
thetas = [np.deg2rad(deg) for deg in np.linspace(-self.degs // 2, self.degs // 2, self.n_rays)[::-1]]
|
||||
rot_M = [
|
||||
[[math.cos(theta), -math.sin(theta)],
|
||||
[math.sin(theta), math.cos(theta)]] for theta in thetas
|
||||
]
|
||||
rot_M = np.stack(rot_M, 0)
|
||||
rot_M = np.unique(np.round(rot_M @ north), axis=0)
|
||||
return rot_M.astype(int)
|
||||
|
||||
@staticmethod
|
||||
def ray_block_cache(cache_dict, key, callback, ents):
|
||||
if key not in cache_dict:
|
||||
cache_dict[key] = callback()
|
||||
if any(True for e in ents.pos_dict[key] if e.is_blocking_light) and not cache_dict[key]:
|
||||
print()
|
||||
return cache_dict[key]
|
||||
|
||||
def visible_entities(self, entities):
|
||||
visible = list()
|
||||
cache_blocking = {}
|
||||
|
||||
for ray in self.get_rays():
|
||||
rx, ry = ray[0]
|
||||
for x, y in ray:
|
||||
cx, cy = x - rx, y - ry
|
||||
|
||||
entities_hit = entities.pos_dict[(x, y)]
|
||||
hits = self.ray_block_cache(cache_blocking,
|
||||
(x, y),
|
||||
lambda: any(True for e in entities_hit if e.is_blocking_light),
|
||||
entities)
|
||||
|
||||
diag_hits = all([
|
||||
self.ray_block_cache(
|
||||
cache_blocking,
|
||||
key,
|
||||
lambda: all(False for e in entities.pos_dict[key] if not e.is_blocking_light),
|
||||
entities)
|
||||
for key in ((x, y-cy), (x-cx, y))
|
||||
]) if (cx != 0 and cy != 0) else False
|
||||
|
||||
visible += entities_hit if not diag_hits else []
|
||||
if hits or diag_hits:
|
||||
break
|
||||
rx, ry = x, y
|
||||
try:
|
||||
d = next(x for x in visible if 'Door' in x.name)
|
||||
v = [x for x in visible if tuple(np.subtract(x.pos, d.pos)) in [(1, 0), (0, 1), (-1, 0), (0, -1)] and x.name.startswith('Floor')]
|
||||
if len(v) > 2:
|
||||
pass
|
||||
except StopIteration:
|
||||
pass
|
||||
return visible
|
||||
|
||||
def get_rays(self):
|
||||
a_pos = self.agent.pos
|
||||
outline = self.ray_targets + a_pos
|
||||
return self.bresenham_loop(a_pos, outline)
|
||||
|
||||
# todo do this once and cache the points!
|
||||
def get_fov_outline(self) -> np.ndarray:
|
||||
return self.ray_targets + self.agent.pos
|
||||
|
||||
def get_square_outline(self):
|
||||
agent = self.agent
|
||||
x_coords = range(agent.x - self.pomdp_r, agent.x + self.pomdp_r + 1)
|
||||
y_coords = range(agent.y - self.pomdp_r, agent.y + self.pomdp_r + 1)
|
||||
outline = list(product(x_coords, [agent.y - self.pomdp_r, agent.y + self.pomdp_r])) \
|
||||
+ list(product([agent.x - self.pomdp_r, agent.x + self.pomdp_r], y_coords))
|
||||
return outline
|
||||
|
||||
@staticmethod
|
||||
@njit
|
||||
def bresenham_loop(a_pos, points):
|
||||
results = []
|
||||
for end in points:
|
||||
x1, y1 = a_pos
|
||||
x2, y2 = end
|
||||
dx = x2 - x1
|
||||
dy = y2 - y1
|
||||
|
||||
# Determine how steep the line is
|
||||
is_steep = abs(dy) > abs(dx)
|
||||
|
||||
# Rotate line
|
||||
if is_steep:
|
||||
x1, y1 = y1, x1
|
||||
x2, y2 = y2, x2
|
||||
|
||||
# Swap start and end points if necessary and store swap state
|
||||
swapped = False
|
||||
if x1 > x2:
|
||||
x1, x2 = x2, x1
|
||||
y1, y2 = y2, y1
|
||||
swapped = True
|
||||
|
||||
# Recalculate differentials
|
||||
dx = x2 - x1
|
||||
dy = y2 - y1
|
||||
|
||||
# Calculate error
|
||||
error = int(dx / 2.0)
|
||||
ystep = 1 if y1 < y2 else -1
|
||||
|
||||
# Iterate over bounding box generating points between start and end
|
||||
y = y1
|
||||
points = []
|
||||
for x in range(int(x1), int(x2) + 1):
|
||||
coord = [y, x] if is_steep else [x, y]
|
||||
points.append(coord)
|
||||
error -= abs(dy)
|
||||
if error < 0:
|
||||
y += ystep
|
||||
error += dx
|
||||
|
||||
# Reverse the list if the coordinates were swapped
|
||||
if swapped:
|
||||
points.reverse()
|
||||
results.append(points)
|
||||
return results
|
16
environment/utils/render.py
Normal file
@ -0,0 +1,16 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
@dataclass
|
||||
class RenderEntity:
|
||||
name: str
|
||||
pos: np.array
|
||||
value: float = 1
|
||||
value_operation: str = 'none'
|
||||
state: str = None
|
||||
id: int = 0
|
||||
aux: Any = None
|
||||
real_name: str = 'none'
|
@ -1,32 +1,26 @@
|
||||
import sys
|
||||
import numpy as np
|
||||
|
||||
from pathlib import Path
|
||||
from collections import deque
|
||||
from itertools import product
|
||||
import pygame
|
||||
from typing import NamedTuple, Any
|
||||
from typing import Tuple, Union
|
||||
import time
|
||||
|
||||
import torch
|
||||
|
||||
from environment.utils.render import RenderEntity
|
||||
|
||||
class RenderEntity(NamedTuple):
|
||||
name: str
|
||||
pos: np.array
|
||||
value: float = 1
|
||||
value_operation: str = 'none'
|
||||
state: str = None
|
||||
id: int = 0
|
||||
aux: Any = None
|
||||
|
||||
|
||||
class RenderNames:
|
||||
AGENT: str = 'agent'
|
||||
BLANK: str = 'blank'
|
||||
DOOR: str = 'door'
|
||||
OPACITY: str = 'opacity'
|
||||
SCALE: str = 'scale'
|
||||
rn = RenderNames
|
||||
AGENT: str = 'agent'
|
||||
STATE_IDLE: str = 'idle'
|
||||
STATE_MOVE: str = 'move'
|
||||
STATE_VALID: str = 'valid'
|
||||
STATE_INVALID: str = 'invalid'
|
||||
STATE_COLLISION: str = 'agent_collision'
|
||||
BLANK: str = 'blank'
|
||||
DOOR: str = 'door'
|
||||
OPACITY: str = 'opacity'
|
||||
SCALE: str = 'scale'
|
||||
|
||||
|
||||
class Renderer:
|
||||
@ -34,11 +28,12 @@ class Renderer:
|
||||
WHITE = (223, 230, 233) # (200, 200, 200)
|
||||
AGENT_VIEW_COLOR = (9, 132, 227)
|
||||
ASSETS = Path(__file__).parent.parent / 'assets'
|
||||
MODULE_ASSETS = Path(__file__).parent.parent.parent / 'modules'
|
||||
|
||||
def __init__(self, lvl_shape=(16, 16),
|
||||
lvl_padded_shape=None,
|
||||
cell_size=40, fps=7,
|
||||
grid_lines=True, view_radius=2):
|
||||
def __init__(self, lvl_shape: Tuple[int, int] = (16, 16),
|
||||
lvl_padded_shape: Union[Tuple[int, int], None] = None,
|
||||
cell_size: int = 40, fps: int = 7,
|
||||
grid_lines: bool = True, view_radius: int = 2):
|
||||
self.grid_h, self.grid_w = lvl_shape
|
||||
self.lvl_padded_shape = lvl_padded_shape if lvl_padded_shape is not None else lvl_shape
|
||||
self.cell_size = cell_size
|
||||
@ -49,7 +44,7 @@ class Renderer:
|
||||
self.screen_size = (self.grid_w*cell_size, self.grid_h*cell_size)
|
||||
self.screen = pygame.display.set_mode(self.screen_size)
|
||||
self.clock = pygame.time.Clock()
|
||||
assets = list(self.ASSETS.rglob('*.png'))
|
||||
assets = list(self.ASSETS.rglob('*.png')) + list(self.MODULE_ASSETS.rglob('*.png'))
|
||||
self.assets = {path.stem: self.load_asset(str(path), 1) for path in assets}
|
||||
self.fill_bg()
|
||||
|
||||
@ -75,9 +70,9 @@ class Renderer:
|
||||
r, c = r - offset_r, c-offset_c
|
||||
|
||||
img = self.assets[entity.name.lower()]
|
||||
if entity.value_operation == rn.OPACITY:
|
||||
if entity.value_operation == OPACITY:
|
||||
img.set_alpha(255*entity.value)
|
||||
elif entity.value_operation == rn.SCALE:
|
||||
elif entity.value_operation == SCALE:
|
||||
re = img.get_rect()
|
||||
img = pygame.transform.smoothscale(
|
||||
img, (int(entity.value*re.width), int(entity.value*re.height))
|
||||
@ -116,19 +111,16 @@ class Renderer:
|
||||
sys.exit()
|
||||
self.fill_bg()
|
||||
blits = deque()
|
||||
for entity in [x for x in entities if rn.DOOR in x.name]:
|
||||
for entity in [x for x in entities]:
|
||||
bp = self.blit_params(entity)
|
||||
blits.append(bp)
|
||||
for entity in [x for x in entities if rn.DOOR not in x.name]:
|
||||
bp = self.blit_params(entity)
|
||||
blits.append(bp)
|
||||
if entity.name.lower() == rn.AGENT:
|
||||
if entity.name.lower() == AGENT:
|
||||
if self.view_radius > 0:
|
||||
vis_rects = self.visibility_rects(bp, entity.aux)
|
||||
blits.extendleft(vis_rects)
|
||||
if entity.state != rn.BLANK:
|
||||
if entity.state != BLANK:
|
||||
agent_state_blits = self.blit_params(
|
||||
RenderEntity(entity.state, (entity.pos[0] + 0.12, entity.pos[1]), 0.48, rn.SCALE)
|
||||
RenderEntity(entity.state, (entity.pos[0] + 0.12, entity.pos[1]), 0.48, SCALE)
|
||||
)
|
||||
textsurface = self.font.render(str(entity.id), False, (0, 0, 0))
|
||||
text_blit = dict(source=textsurface, dest=(bp['dest'].center[0]-.07*self.cell_size,
|
||||
@ -146,7 +138,6 @@ class Renderer:
|
||||
|
||||
if __name__ == '__main__':
|
||||
renderer = Renderer(fps=2, cell_size=40)
|
||||
for i in range(15):
|
||||
entity_1 = RenderEntity('agent_collision', [5, i], 1, 'idle', 'idle')
|
||||
for pos_i in range(15):
|
||||
entity_1 = RenderEntity('agent_collision', [5, pos_i], 1, 'idle', 'idle')
|
||||
renderer.render([entity_1])
|
||||
|
48
environment/utils/results.py
Normal file
@ -0,0 +1,48 @@
|
||||
from typing import Union
|
||||
from dataclasses import dataclass, asdict
|
||||
|
||||
from environment.entity.entity import Entity
|
||||
|
||||
TYPE_VALUE = 'value'
|
||||
TYPE_REWARD = 'reward'
|
||||
types = [TYPE_VALUE, TYPE_REWARD]
|
||||
|
||||
@dataclass
|
||||
class InfoObject:
|
||||
identifier: str
|
||||
val_type: str
|
||||
value: Union[float, int]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Result:
|
||||
identifier: str
|
||||
validity: bool
|
||||
reward: Union[float, None] = None
|
||||
value: Union[float, None] = None
|
||||
entity: Union[Entity, None] = None
|
||||
|
||||
def get_infos(self):
|
||||
n = self.entity.name if self.entity is not None else "Global"
|
||||
return [InfoObject(identifier=f'{n}_{self.identifier}_{t}',
|
||||
val_type=t, value=self.__getattribute__(t)) for t in types
|
||||
if self.__getattribute__(t) is not None]
|
||||
|
||||
def __repr__(self):
|
||||
valid = "not " if not self.validity else ""
|
||||
return f'{self.__class__.__name__}({self.identifier.capitalize()} {valid}valid: {self.reward})'
|
||||
|
||||
|
||||
@dataclass
|
||||
class TickResult(Result):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ActionResult(Result):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class DoneResult(Result):
|
||||
pass
|
112
environment/utils/states.py
Normal file
@ -0,0 +1,112 @@
|
||||
from typing import List, Dict
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environment.entity.wall_floor import Floor
|
||||
from environment.rules import Rule
|
||||
from environment.utils.results import Result
|
||||
from environment import constants as c
|
||||
|
||||
|
||||
class StepRules:
|
||||
def __init__(self, *args):
|
||||
if args:
|
||||
self.rules = list(args)
|
||||
else:
|
||||
self.rules = list()
|
||||
|
||||
def __repr__(self):
|
||||
return f'Rules{[x.name for x in self]}'
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.rules)
|
||||
|
||||
def append(self, item):
|
||||
assert isinstance(item, Rule)
|
||||
self.rules.append(item)
|
||||
return True
|
||||
|
||||
def do_all_init(self, state):
|
||||
for rule in self.rules:
|
||||
if rule_init_printline := rule.on_init(state):
|
||||
state.print(rule_init_printline)
|
||||
return c.VALID
|
||||
|
||||
def tick_step_all(self, state):
|
||||
results = list()
|
||||
for rule in self.rules:
|
||||
if tick_step_result := rule.tick_step(state):
|
||||
results.extend(tick_step_result)
|
||||
return results
|
||||
|
||||
def tick_pre_step_all(self, state):
|
||||
results = list()
|
||||
for rule in self.rules:
|
||||
if tick_pre_step_result := rule.tick_post_step(state):
|
||||
results.extend(tick_pre_step_result)
|
||||
return results
|
||||
|
||||
def tick_post_step_all(self, state):
|
||||
results = list()
|
||||
for rule in self.rules:
|
||||
if tick_post_step_result := rule.tick_post_step(state):
|
||||
results.extend(tick_post_step_result)
|
||||
return results
|
||||
|
||||
|
||||
class Gamestate(object):
|
||||
|
||||
@property
|
||||
def moving_entites(self):
|
||||
return [y for x in self.entities for y in x if x.can_move]
|
||||
|
||||
def __init__(self, entitites, rules: Dict[str, dict], env_seed=69, verbose=False):
|
||||
self.entities = entitites
|
||||
self.NO_POS_TILE = Floor(c.VALUE_NO_POS)
|
||||
self.curr_step = 0
|
||||
self.curr_actions = None
|
||||
self.verbose = verbose
|
||||
self.rng = np.random.default_rng(env_seed)
|
||||
self.rules = StepRules(*(v['class'](**v['kwargs']) for v in rules.values()))
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self.entities[item]
|
||||
|
||||
def __iter__(self):
|
||||
return iter(e for e in self.entities.values())
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.__class__.__name__}({len(self.entities)} Entitites @ Step {self.curr_step})'
|
||||
|
||||
def tick(self, actions) -> List[Result]:
|
||||
results = list()
|
||||
self.curr_step += 1
|
||||
|
||||
# Main Agent Step
|
||||
results.extend(self.rules.tick_pre_step_all(self))
|
||||
for idx, action_int in enumerate(actions):
|
||||
agent = self[c.AGENT][idx].clear_temp_state()
|
||||
action = agent.actions[action_int]
|
||||
action_result = action.do(agent, self)
|
||||
results.append(action_result)
|
||||
agent.set_state(action_result)
|
||||
results.extend(self.rules.tick_step_all(self))
|
||||
results.extend(self.rules.tick_post_step_all(self))
|
||||
return results
|
||||
|
||||
def print(self, string):
|
||||
if self.verbose:
|
||||
print(string)
|
||||
|
||||
def check_done(self):
|
||||
results = list()
|
||||
for rule in self.rules:
|
||||
if on_check_done_result := rule.on_check_done(self):
|
||||
results.extend(on_check_done_result)
|
||||
return results
|
||||
|
||||
def get_all_tiles_with_collisions(self) -> List[Floor]:
|
||||
tiles = [self[c.FLOOR].by_pos(pos) for pos, e in self.entities.pos_dict.items()
|
||||
if sum([x.can_collide for x in e]) > 1]
|
||||
# tiles = [x for x in self[c.FLOOR] if len(x.guests_that_can_collide) > 1]
|
||||
return tiles
|
27
environment/utils/utility_classes.py
Normal file
@ -0,0 +1,27 @@
|
||||
import gymnasium as gym
|
||||
|
||||
|
||||
class EnvCombiner(object):
|
||||
|
||||
def __init__(self, *envs_cls):
|
||||
self._env_dict = {env_cls.__name__: env_cls for env_cls in envs_cls}
|
||||
|
||||
@staticmethod
|
||||
def combine_cls(name, *envs_cls):
|
||||
return type(name, envs_cls, {})
|
||||
|
||||
def build(self):
|
||||
name = f'{"".join([x.lower().replace("factory").capitalize() for x in self._env_dict.keys()])}Factory'
|
||||
|
||||
return self.combine_cls(name, tuple(self._env_dict.values()))
|
||||
|
||||
|
||||
class MarlFrameStack(gym.ObservationWrapper):
|
||||
"""todo @romue404"""
|
||||
def __init__(self, env):
|
||||
super().__init__(env)
|
||||
|
||||
def observation(self, observation):
|
||||
if isinstance(self.env, gym.wrappers.FrameStack) and self.env.unwrapped.n_agents > 1:
|
||||
return observation[0:].swapaxes(0, 1)
|
||||
return observation
|
@ -1,28 +0,0 @@
|
||||
def make(env_name, pomdp_r=2, max_steps=400, stack_n_frames=3, n_agents=1, individual_rewards=False):
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from environments.factory.combined_factories import DirtItemFactory
|
||||
from environments.factory.factory_item import ItemFactory
|
||||
from environments.factory.additional.item.item_util import ItemProperties
|
||||
from environments.factory.factory_dirt import DirtFactory
|
||||
from environments.factory.dirt_util import DirtProperties
|
||||
from environments.factory.dirt_util import RewardsDirt
|
||||
from environments.utility_classes import AgentRenderOptions
|
||||
|
||||
with (Path(__file__).parent / 'levels' / 'parameters' / f'{env_name}.yaml').open('r') as stream:
|
||||
dictionary = yaml.load(stream, Loader=yaml.FullLoader)
|
||||
|
||||
obs_props = dict(render_agents=AgentRenderOptions.COMBINED,
|
||||
pomdp_r=pomdp_r,
|
||||
indicate_door_area=True,
|
||||
show_global_position_info=False,
|
||||
frames_to_stack=stack_n_frames)
|
||||
|
||||
factory_kwargs = dict(**dictionary,
|
||||
n_agents=n_agents,
|
||||
individual_rewards=individual_rewards,
|
||||
max_steps=max_steps,
|
||||
obs_prop=obs_props,
|
||||
verbose=False,
|
||||
)
|
||||
return DirtFactory(**factory_kwargs).__enter__()
|
@ -1,38 +0,0 @@
|
||||
from typing import Union
|
||||
|
||||
from environments.factory.additional.doors.doors_entities import Door
|
||||
from environments.factory.base.registers import EntityCollection
|
||||
|
||||
from environments.factory.additional.doors.doors_util import Constants as c
|
||||
|
||||
|
||||
class Doors(EntityCollection):
|
||||
|
||||
def __init__(self, *args, indicate_area=False, **kwargs):
|
||||
self.indicate_area = indicate_area
|
||||
self._area_marked = False
|
||||
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
|
||||
|
||||
_accepted_objects = Door
|
||||
|
||||
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
|
||||
try:
|
||||
return next(door for door in self if position in door.tile.neighboring_floor_pos)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def tick_doors(self):
|
||||
for door in self:
|
||||
door.tick()
|
||||
|
||||
def as_array(self):
|
||||
if not self._area_marked and self.indicate_area:
|
||||
for door in self:
|
||||
for tile in door.tile.neighboring_floor:
|
||||
if self._individual_slices:
|
||||
pass
|
||||
else:
|
||||
pos = (0, *tile.pos)
|
||||
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
|
||||
self._area_marked = True
|
||||
return super(Doors, self).as_array()
|
@ -1,71 +0,0 @@
|
||||
from environments.factory.base.objects import Entity
|
||||
from environments.factory.additional.doors.doors_util import Constants as c
|
||||
|
||||
|
||||
class Template(Entity):
|
||||
"""Template for new Entity"""
|
||||
|
||||
# How to define / override properties
|
||||
@property
|
||||
def is_blocking(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return False if self.template_attr else True
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
# This is important as it shadow is checked by occupation value
|
||||
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
|
||||
|
||||
@property
|
||||
def str_state(self):
|
||||
return 'open' if self.is_open else 'closed'
|
||||
|
||||
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs):
|
||||
super(Template, self).__init__(*args, **kwargs)
|
||||
self._state = c.CLOSED_DOOR
|
||||
self.indicate_area = indicate_area
|
||||
self.auto_close_interval = auto_close_interval
|
||||
self.time_to_close = -1
|
||||
if not closed_on_init:
|
||||
self._open()
|
||||
|
||||
def summarize_state(self):
|
||||
state_dict = super().summarize_state()
|
||||
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
|
||||
return state_dict
|
||||
|
||||
@property
|
||||
def is_closed(self):
|
||||
return self._state == c.CLOSED_DOOR
|
||||
|
||||
@property
|
||||
def is_open(self):
|
||||
return self._state == c.OPEN_DOOR
|
||||
|
||||
@property
|
||||
def status(self):
|
||||
return self._state
|
||||
|
||||
def use(self):
|
||||
if self._state == c.OPEN_DOOR:
|
||||
self._close()
|
||||
else:
|
||||
self._open()
|
||||
|
||||
def tick(self):
|
||||
if self.is_open and len(self.tile) == 1 and self.time_to_close:
|
||||
self.time_to_close -= 1
|
||||
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
|
||||
self.use()
|
||||
|
||||
def _open(self):
|
||||
self._state = c.OPEN_DOOR
|
||||
self._collection.notify_change_to_value(self)
|
||||
self.time_to_close = self.auto_close_interval
|
||||
|
||||
def _close(self):
|
||||
self._state = c.CLOSED_DOOR
|
||||
self._collection.notify_change_to_value(self)
|
@ -1,31 +0,0 @@
|
||||
|
||||
from typing import NamedTuple
|
||||
|
||||
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
|
||||
|
||||
|
||||
class Constants(BaseConstants):
|
||||
DOOR = 'Door' # Identifier of Single-Door Entities.
|
||||
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
|
||||
DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files.
|
||||
|
||||
ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation
|
||||
OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation
|
||||
CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation
|
||||
|
||||
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
|
||||
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
|
||||
# ACCESS_DOOR = 'access' # Identifier to compare access positions
|
||||
|
||||
|
||||
class Actions(BaseActions):
|
||||
USE_DOOR = 'use_door'
|
||||
|
||||
|
||||
class RewardsDoor(NamedTuple):
|
||||
USE_DOOR_VALID: float = -0.00
|
||||
USE_DOOR_FAIL: float = -0.01
|
||||
|
||||
|
||||
class DoorProperties(NamedTuple):
|
||||
indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation.
|
@ -1,196 +0,0 @@
|
||||
import time
|
||||
from typing import List, Union, Dict
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environments.factory.additional.doors.doors_collections import Doors
|
||||
from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions
|
||||
from environments.factory.base.base_factory import BaseFactory
|
||||
from environments.factory.base.objects import Agent, Action
|
||||
from environments.factory.base.registers import Entities
|
||||
|
||||
from environments import helpers as h
|
||||
|
||||
from environments.factory.base.renderer import RenderEntity
|
||||
from environments.utility_classes import ObservationProperties
|
||||
|
||||
|
||||
def softmax(x):
|
||||
"""Compute softmax values for each sets of scores in x."""
|
||||
e_x = np.exp(x - np.max(x))
|
||||
return e_x / e_x.sum()
|
||||
|
||||
|
||||
def entropy(x):
|
||||
return -(x * np.log(x + 1e-8)).sum()
|
||||
|
||||
|
||||
c = Constants
|
||||
a = Actions
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit, PyAbstractClass
|
||||
class DoorFactory(BaseFactory):
|
||||
|
||||
@property
|
||||
def actions_hook(self) -> Union[Action, List[Action]]:
|
||||
super_actions = super().actions_hook
|
||||
super_actions.append(Action(str_ident=a.USE_DOOR))
|
||||
return super_actions
|
||||
|
||||
@property
|
||||
def entities_hook(self) -> Dict[(str, Entities)]:
|
||||
super_entities = super().entities_hook
|
||||
|
||||
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL)
|
||||
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
|
||||
if np.any(parsed_doors):
|
||||
door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
|
||||
doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area,
|
||||
entity_kwargs=dict()
|
||||
)
|
||||
super_entities.update(({c.DOORS: doors}))
|
||||
return super_entities
|
||||
|
||||
def __init__(self, *args,
|
||||
door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(),
|
||||
env_seed=time.time_ns(), **kwargs):
|
||||
if isinstance(door_properties, dict):
|
||||
door_properties = DoorProperties(**door_properties)
|
||||
if isinstance(rewards_door, dict):
|
||||
rewards_door = RewardsDoor(**rewards_door)
|
||||
self.door_properties = door_properties
|
||||
self.rewards_door = rewards_door
|
||||
self._door_rng = np.random.default_rng(env_seed)
|
||||
self._doors: Doors
|
||||
kwargs.update(env_seed=env_seed)
|
||||
# TODO: Reset ---> document this
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def render_assets_hook(self, mode='human'):
|
||||
additional_assets = super().render_assets_hook()
|
||||
doors = []
|
||||
for i, door in enumerate(self[c.DOORS]):
|
||||
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
|
||||
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
|
||||
additional_assets.extend(doors)
|
||||
return additional_assets
|
||||
|
||||
|
||||
def step_hook(self) -> (List[dict], dict):
|
||||
super_reward_info = super().step_hook()
|
||||
# Step the door close intervall
|
||||
# TODO: Maybe move this to self.post_step_hook? May collide with reward calculation.
|
||||
if doors := self[c.DOORS]:
|
||||
doors.tick_doors()
|
||||
return super_reward_info
|
||||
|
||||
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
|
||||
action_result = super().do_additional_actions(agent, action)
|
||||
if action_result is None:
|
||||
if action == a.USE_DOOR:
|
||||
return self.use_door_action(agent)
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return action_result
|
||||
|
||||
def use_door_action(self, agent: Agent):
|
||||
|
||||
# Check if agent really is standing on a door:
|
||||
door = self[c.DOORS].get_near_position(agent.pos)
|
||||
if door is not None:
|
||||
door.use()
|
||||
valid = c.VALID
|
||||
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
|
||||
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
|
||||
# When he doesn't...
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
|
||||
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
|
||||
|
||||
reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL,
|
||||
reason=a.USE_DOOR, info=info_dict)
|
||||
|
||||
return valid, reward
|
||||
|
||||
def reset_hook(self) -> None:
|
||||
super().reset_hook()
|
||||
# There is nothing to reset.
|
||||
|
||||
def check_additional_done(self) -> (bool, dict):
|
||||
super_done, super_dict = super().check_additional_done()
|
||||
return super_done, super_dict
|
||||
|
||||
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
|
||||
additional_observations = super().observations_hook()
|
||||
|
||||
additional_observations.update({c.DOORS: self[c.DOORS].as_array()})
|
||||
return additional_observations
|
||||
|
||||
def post_step_hook(self) -> List[Dict[str, int]]:
|
||||
super_post_step = super(DoorFactory, self).post_step_hook()
|
||||
return super_post_step
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from environments.utility_classes import AgentRenderOptions as aro
|
||||
render = True
|
||||
|
||||
door_props = DoorProperties(
|
||||
indicate_door_area=True
|
||||
)
|
||||
|
||||
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
|
||||
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True
|
||||
)
|
||||
|
||||
move_props = {'allow_square_movement': True,
|
||||
'allow_diagonal_movement': False,
|
||||
'allow_no_op': False}
|
||||
import time
|
||||
global_timings = []
|
||||
for i in range(10):
|
||||
|
||||
factory = DoorFactory(n_agents=10, done_at_collision=False,
|
||||
level_name='rooms', max_steps=1000,
|
||||
obs_prop=obs_props, parse_doors=True,
|
||||
verbose=True,
|
||||
mv_prop=move_props, dirt_prop=door_props,
|
||||
# inject_agents=[TSPDirtAgent],
|
||||
)
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
n_actions = factory.action_space.n - 1
|
||||
_ = factory.observation_space
|
||||
obs_space = factory.observation_space
|
||||
obs_space_named = factory.named_observation_space
|
||||
action_space_named = factory.named_action_space
|
||||
times = []
|
||||
for epoch in range(10):
|
||||
start_time = time.time()
|
||||
random_actions = [[random.randint(0, n_actions) for _
|
||||
in range(factory.n_agents)] for _
|
||||
in range(factory.max_steps+1)]
|
||||
env_state = factory.reset()
|
||||
if render:
|
||||
factory.render()
|
||||
# tsp_agent = factory.get_injected_agents()[0]
|
||||
|
||||
rwrd = 0
|
||||
for agent_i_action in random_actions:
|
||||
# agent_i_action = tsp_agent.predict()
|
||||
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
|
||||
rwrd += step_rwrd
|
||||
if render:
|
||||
factory.render()
|
||||
if done_bool:
|
||||
break
|
||||
times.append(time.time() - start_time)
|
||||
# print(f'Factory run {epoch} done, reward is:\n {r}')
|
||||
print('Mean Time Taken: ', sum(times) / 10)
|
||||
global_timings.extend(times)
|
||||
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
|
||||
print('Median Time Taken: ', global_timings[len(global_timings)//2])
|
@ -1,41 +0,0 @@
|
||||
from environments.factory.additional.btry.btry_objects import Battery, ChargePod
|
||||
from environments.factory.base.registers import EnvObjectCollection, EntityCollection
|
||||
|
||||
|
||||
class Batteries(EnvObjectCollection):
|
||||
|
||||
_accepted_objects = Battery
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Batteries, self).__init__(*args, individual_slices=True,
|
||||
is_blocking_light=False, can_be_shadowed=False, **kwargs)
|
||||
self.is_observable = True
|
||||
|
||||
def spawn_batteries(self, agents, initial_charge_level):
|
||||
batteries = [self._accepted_objects(initial_charge_level, agent, self) for _, agent in enumerate(agents)]
|
||||
self.add_additional_items(batteries)
|
||||
|
||||
# Todo Move this to Mixin!
|
||||
def by_entity(self, entity):
|
||||
try:
|
||||
return next((x for x in self if x.belongs_to_entity(entity)))
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def idx_by_entity(self, entity):
|
||||
try:
|
||||
return next((idx for idx, x in enumerate(self) if x.belongs_to_entity(entity)))
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def as_array_by_entity(self, entity):
|
||||
return self._array[self.idx_by_entity(entity)]
|
||||
|
||||
|
||||
class ChargePods(EntityCollection):
|
||||
|
||||
_accepted_objects = ChargePod
|
||||
_stateless_entities = True
|
||||
|
||||
def __repr__(self):
|
||||
super(ChargePods, self).__repr__()
|
@ -1,30 +0,0 @@
|
||||
from typing import NamedTuple, Union
|
||||
|
||||
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
|
||||
|
||||
|
||||
class Constants(BaseConstants):
|
||||
# Battery Env
|
||||
CHARGE_PODS = 'Charge_Pod'
|
||||
BATTERIES = 'BATTERIES'
|
||||
BATTERY_DISCHARGED = 'DISCHARGED'
|
||||
CHARGE_POD = 1
|
||||
|
||||
|
||||
class Actions(BaseActions):
|
||||
CHARGE = 'do_charge_action'
|
||||
|
||||
|
||||
class RewardsBtry(NamedTuple):
|
||||
CHARGE_VALID: float = 0.1
|
||||
CHARGE_FAIL: float = -0.1
|
||||
BATTERY_DISCHARGED: float = -1.0
|
||||
|
||||
|
||||
class BatteryProperties(NamedTuple):
|
||||
initial_charge: float = 0.8 #
|
||||
charge_rate: float = 0.4 #
|
||||
charge_locations: int = 20 #
|
||||
per_action_costs: Union[dict, float] = 0.02
|
||||
done_when_discharged: bool = False
|
||||
multi_charge: bool = False
|
@ -1,139 +0,0 @@
|
||||
from typing import Dict, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environments.factory.additional.btry.btry_collections import Batteries, ChargePods
|
||||
from environments.factory.additional.btry.btry_util import Constants, Actions, RewardsBtry, BatteryProperties
|
||||
from environments.factory.base.base_factory import BaseFactory
|
||||
from environments.factory.base.objects import Agent, Action
|
||||
from environments.factory.base.renderer import RenderEntity
|
||||
|
||||
c = Constants
|
||||
a = Actions
|
||||
|
||||
|
||||
class BatteryFactory(BaseFactory):
|
||||
|
||||
def __init__(self, *args, btry_prop=BatteryProperties(), rewards_btry: RewardsBtry = RewardsBtry(),
|
||||
**kwargs):
|
||||
if isinstance(btry_prop, dict):
|
||||
btry_prop = BatteryProperties(**btry_prop)
|
||||
if isinstance(rewards_btry, dict):
|
||||
rewards_btry = RewardsBtry(**rewards_btry)
|
||||
self.btry_prop = btry_prop
|
||||
self.rewards_dest = rewards_btry
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def per_agent_raw_observations_hook(self, agent) -> Dict[str, np.typing.ArrayLike]:
|
||||
additional_raw_observations = super().per_agent_raw_observations_hook(agent)
|
||||
additional_raw_observations.update({c.BATTERIES: self[c.BATTERIES].as_array_by_entity(agent)})
|
||||
return additional_raw_observations
|
||||
|
||||
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
|
||||
additional_observations = super().observations_hook()
|
||||
additional_observations.update({c.CHARGE_PODS: self[c.CHARGE_PODS].as_array()})
|
||||
return additional_observations
|
||||
|
||||
@property
|
||||
def entities_hook(self):
|
||||
super_entities = super().entities_hook
|
||||
|
||||
empty_tiles = self[c.FLOOR].empty_tiles[:self.btry_prop.charge_locations]
|
||||
charge_pods = ChargePods.from_tiles(
|
||||
empty_tiles, self._level_shape,
|
||||
entity_kwargs=dict(charge_rate=self.btry_prop.charge_rate,
|
||||
multi_charge=self.btry_prop.multi_charge)
|
||||
)
|
||||
|
||||
batteries = Batteries(self._level_shape if not self._pomdp_r else ((self.pomdp_diameter,) * 2),
|
||||
)
|
||||
batteries.spawn_batteries(self[c.AGENT], self.btry_prop.initial_charge)
|
||||
super_entities.update({c.BATTERIES: batteries, c.CHARGE_PODS: charge_pods})
|
||||
return super_entities
|
||||
|
||||
def step_hook(self) -> (List[dict], dict):
|
||||
super_reward_info = super(BatteryFactory, self).step_hook()
|
||||
|
||||
# Decharge
|
||||
batteries = self[c.BATTERIES]
|
||||
|
||||
for agent in self[c.AGENT]:
|
||||
if isinstance(self.btry_prop.per_action_costs, dict):
|
||||
energy_consumption = self.btry_prop.per_action_costs[agent.temp_action]
|
||||
else:
|
||||
energy_consumption = self.btry_prop.per_action_costs
|
||||
|
||||
batteries.by_entity(agent).decharge(energy_consumption)
|
||||
|
||||
return super_reward_info
|
||||
|
||||
def do_charge_action(self, agent) -> (dict, dict):
|
||||
if charge_pod := self[c.CHARGE_PODS].by_pos(agent.pos):
|
||||
valid = charge_pod.charge_battery(self[c.BATTERIES].by_entity(agent))
|
||||
if valid:
|
||||
info_dict = {f'{agent.name}_{a.CHARGE}_VALID': 1}
|
||||
self.print(f'{agent.name} just charged batteries at {charge_pod.name}.')
|
||||
else:
|
||||
info_dict = {f'{agent.name}_{a.CHARGE}_FAIL': 1}
|
||||
self.print(f'{agent.name} failed to charged batteries at {charge_pod.name}.')
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
info_dict = {f'{agent.name}_{a.CHARGE}_FAIL': 1}
|
||||
# info_dict = {f'{agent.name}_no_charger': 1}
|
||||
self.print(f'{agent.name} failed to charged batteries at {agent.pos}.')
|
||||
reward = dict(value=self.rewards_dest.CHARGE_VALID if valid else self.rewards_dest.CHARGE_FAIL,
|
||||
reason=a.CHARGE, info=info_dict)
|
||||
return valid, reward
|
||||
|
||||
def do_additional_actions(self, agent: Agent, action: Action) -> (bool, dict):
|
||||
action_result = super().do_additional_actions(agent, action)
|
||||
if action_result is None:
|
||||
if action == a.CHARGE:
|
||||
action_result = self.do_charge_action(agent)
|
||||
return action_result
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return action_result
|
||||
pass
|
||||
|
||||
def reset_hook(self) -> (List[dict], dict):
|
||||
super_reward_info = super(BatteryFactory, self).reset_hook()
|
||||
# There is Nothing to reset.
|
||||
return super_reward_info
|
||||
|
||||
def check_additional_done(self) -> (bool, dict):
|
||||
super_done, super_dict = super(BatteryFactory, self).check_additional_done()
|
||||
if super_done:
|
||||
return super_done, super_dict
|
||||
else:
|
||||
if self.btry_prop.done_when_discharged:
|
||||
if btry_done := any(battery.is_discharged for battery in self[c.BATTERIES]):
|
||||
super_dict.update(DISCHARGE_DONE=1)
|
||||
return btry_done, super_dict
|
||||
else:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
return super_done, super_dict
|
||||
|
||||
def per_agent_reward_hook(self, agent: Agent) -> List[dict]:
|
||||
reward_event_list = super(BatteryFactory, self).per_agent_reward_hook(agent)
|
||||
if self[c.BATTERIES].by_entity(agent).is_discharged:
|
||||
self.print(f'{agent.name} Battery is discharged!')
|
||||
info_dict = {f'{agent.name}_{c.BATTERY_DISCHARGED}': 1}
|
||||
reward_event_list.append({'value': self.rewards_dest.BATTERY_DISCHARGED,
|
||||
'reason': c.BATTERY_DISCHARGED,
|
||||
'info': info_dict}
|
||||
)
|
||||
else:
|
||||
# All Fine
|
||||
pass
|
||||
return reward_event_list
|
||||
|
||||
def render_assets_hook(self):
|
||||
# noinspection PyUnresolvedReferences
|
||||
additional_assets = super().render_assets_hook()
|
||||
charge_pods = [RenderEntity(c.CHARGE_PODS, charge_pod.tile.pos) for charge_pod in self[c.CHARGE_PODS]]
|
||||
additional_assets.extend(charge_pods)
|
||||
return additional_assets
|
@ -1,82 +0,0 @@
|
||||
import random
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
from environments.factory.additional.btry.btry_util import BatteryProperties
|
||||
from environments.factory.additional.btry.factory_battery import BatteryFactory
|
||||
from environments.factory.additional.dest.factory_dest import DestFactory
|
||||
from environments.factory.additional.dirt.dirt_util import DirtProperties
|
||||
from environments.factory.additional.dirt.factory_dirt import DirtFactory
|
||||
from environments.factory.additional.doors.factory_doors import DoorFactory
|
||||
from environments.factory.additional.item.factory_item import ItemFactory
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class DoorDirtFactory(DoorFactory, DirtFactory):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class DirtItemFactory(ItemFactory, DirtFactory):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class DirtBatteryFactory(DirtFactory, BatteryFactory):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class DirtDestItemFactory(ItemFactory, DirtFactory, DestFactory):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class DestBatteryFactory(BatteryFactory, DestFactory):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from environments.utility_classes import AgentRenderOptions as ARO, ObservationProperties
|
||||
|
||||
render = True
|
||||
|
||||
obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True,
|
||||
pomdp_r=2, additional_agent_placeholder=None)
|
||||
|
||||
move_props = {'allow_square_movement': True,
|
||||
'allow_diagonal_movement': False,
|
||||
'allow_no_op': False}
|
||||
|
||||
factory = DoorDirtFactory(n_agents=10, done_at_collision=False,
|
||||
level_name='rooms', max_steps=400,
|
||||
obs_prop=obs_props, parse_doors=True,
|
||||
record_episodes=True, verbose=True,
|
||||
dirt_prop=DirtProperties(),
|
||||
mv_prop=move_props)
|
||||
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
n_actions = factory.action_space.n - 1
|
||||
_ = factory.observation_space
|
||||
|
||||
for epoch in range(4):
|
||||
random_actions = [[random.randint(0, n_actions) for _
|
||||
in range(factory.n_agents)] for _
|
||||
in range(factory.max_steps + 1)]
|
||||
env_state = factory.reset()
|
||||
r = 0
|
||||
for agent_i_action in random_actions:
|
||||
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
|
||||
r += step_r
|
||||
if render:
|
||||
factory.render()
|
||||
if done_bool:
|
||||
break
|
||||
print(f'Factory run {epoch} done, reward is:\n {r}')
|
||||
pass
|
@ -1,38 +0,0 @@
|
||||
from environments.factory.base.registers import EntityCollection
|
||||
from environments.factory.additional.dest.dest_util import Constants as c
|
||||
from environments.factory.additional.dest.dest_enitites import Destination
|
||||
|
||||
|
||||
class Destinations(EntityCollection):
|
||||
|
||||
_accepted_objects = Destination
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.is_blocking_light = False
|
||||
self.can_be_shadowed = False
|
||||
|
||||
def as_array(self):
|
||||
self._array[:] = c.FREE_CELL
|
||||
# ToDo: Switch to new Style Array Put
|
||||
# indices = list(zip(range(len(cls)), *zip(*[x.pos for x in cls])))
|
||||
# np.put(cls._array, [np.ravel_multi_index(x, cls._array.shape) for x in indices], cls.encodings)
|
||||
for item in self:
|
||||
if item.pos != c.NO_POS:
|
||||
self._array[0, item.x, item.y] = item.encoding
|
||||
return self._array
|
||||
|
||||
def __repr__(self):
|
||||
return super(Destinations, self).__repr__()
|
||||
|
||||
|
||||
class ReachedDestinations(Destinations):
|
||||
_accepted_objects = Destination
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(ReachedDestinations, self).__init__(*args, **kwargs)
|
||||
self.can_be_shadowed = False
|
||||
self.is_blocking_light = False
|
||||
|
||||
def __repr__(self):
|
||||
return super(ReachedDestinations, self).__repr__()
|
@ -1,41 +0,0 @@
|
||||
from typing import NamedTuple
|
||||
|
||||
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
|
||||
|
||||
|
||||
class Constants(BaseConstants):
|
||||
# Destination Env
|
||||
DEST = 'Destination'
|
||||
DESTINATION = 1
|
||||
DESTINATION_DONE = 0.5
|
||||
DEST_REACHED = 'ReachedDestination'
|
||||
|
||||
|
||||
class Actions(BaseActions):
|
||||
WAIT_ON_DEST = 'WAIT'
|
||||
|
||||
|
||||
class RewardsDest(NamedTuple):
|
||||
|
||||
WAIT_VALID: float = 0.1
|
||||
WAIT_FAIL: float = -0.1
|
||||
DEST_REACHED: float = 5.0
|
||||
|
||||
|
||||
class DestModeOptions(object):
|
||||
DONE = 'DONE'
|
||||
GROUPED = 'GROUPED'
|
||||
PER_DEST = 'PER_DEST'
|
||||
|
||||
|
||||
class DestProperties(NamedTuple):
|
||||
n_dests: int = 1 # How many destinations are there
|
||||
dwell_time: int = 0 # How long does the agent need to "wait" on a destination
|
||||
spawn_frequency: int = 0
|
||||
spawn_in_other_zone: bool = True #
|
||||
spawn_mode: str = DestModeOptions.DONE
|
||||
|
||||
assert dwell_time >= 0, 'dwell_time cannot be < 0!'
|
||||
assert spawn_frequency >= 0, 'spawn_frequency cannot be < 0!'
|
||||
assert n_dests >= 0, 'n_destinations cannot be < 0!'
|
||||
assert (spawn_mode == DestModeOptions.DONE) != bool(spawn_frequency)
|
@ -1,203 +0,0 @@
|
||||
import time
|
||||
from enum import Enum
|
||||
from typing import List, Union, Dict
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
from environments.factory.additional.dest.dest_collections import Destinations, ReachedDestinations
|
||||
from environments.factory.additional.dest.dest_enitites import Destination
|
||||
from environments.factory.additional.dest.dest_util import Constants, Actions, RewardsDest, DestModeOptions, \
|
||||
DestProperties
|
||||
from environments.factory.base.base_factory import BaseFactory
|
||||
from environments.factory.base.objects import Agent, Action
|
||||
from environments.factory.base.registers import Entities
|
||||
|
||||
from environments.factory.base.renderer import RenderEntity
|
||||
|
||||
c = Constants
|
||||
a = Actions
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit, PyAbstractClass
|
||||
class DestFactory(BaseFactory):
|
||||
# noinspection PyMissingConstructor
|
||||
|
||||
def __init__(self, *args, dest_prop: DestProperties = DestProperties(), rewards_dest: RewardsDest = RewardsDest(),
|
||||
env_seed=time.time_ns(), **kwargs):
|
||||
if isinstance(dest_prop, dict):
|
||||
dest_prop = DestProperties(**dest_prop)
|
||||
if isinstance(rewards_dest, dict):
|
||||
rewards_dest = RewardsDest(**rewards_dest)
|
||||
self.dest_prop = dest_prop
|
||||
self.rewards_dest = rewards_dest
|
||||
kwargs.update(env_seed=env_seed)
|
||||
self._dest_rng = np.random.default_rng(env_seed)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@property
|
||||
def actions_hook(self) -> Union[Action, List[Action]]:
|
||||
# noinspection PyUnresolvedReferences
|
||||
super_actions = super().actions_hook
|
||||
# If targets are considers reached after some time, agents need an action for that.
|
||||
if self.dest_prop.dwell_time:
|
||||
super_actions.append(Action(enum_ident=a.WAIT_ON_DEST))
|
||||
return super_actions
|
||||
|
||||
@property
|
||||
def entities_hook(self) -> Dict[(Enum, Entities)]:
|
||||
# noinspection PyUnresolvedReferences
|
||||
super_entities = super().entities_hook
|
||||
|
||||
empty_tiles = self[c.FLOOR].empty_tiles[:self.dest_prop.n_dests]
|
||||
destinations = Destinations.from_tiles(
|
||||
empty_tiles, self._level_shape,
|
||||
entity_kwargs=dict(
|
||||
dwell_time=self.dest_prop.dwell_time)
|
||||
)
|
||||
reached_destinations = ReachedDestinations(level_shape=self._level_shape)
|
||||
|
||||
super_entities.update({c.DEST: destinations, c.DEST_REACHED: reached_destinations})
|
||||
return super_entities
|
||||
|
||||
def do_wait_action(self, agent: Agent) -> (dict, dict):
|
||||
if destination := self[c.DEST].by_pos(agent.pos):
|
||||
valid = destination.do_wait_action(agent)
|
||||
self.print(f'{agent.name} just waited at {agent.pos}')
|
||||
info_dict = {f'{agent.name}_{a.WAIT_ON_DEST}_VALID': 1}
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
self.print(f'{agent.name} just tried to do_wait_action do_wait_action at {agent.pos} but failed')
|
||||
info_dict = {f'{agent.name}_{a.WAIT_ON_DEST}_FAIL': 1}
|
||||
reward = dict(value=self.rewards_dest.WAIT_VALID if valid else self.rewards_dest.WAIT_FAIL,
|
||||
reason=a.WAIT_ON_DEST, info=info_dict)
|
||||
return valid, reward
|
||||
|
||||
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
|
||||
# noinspection PyUnresolvedReferences
|
||||
super_action_result = super().do_additional_actions(agent, action)
|
||||
if super_action_result is None:
|
||||
if action == a.WAIT_ON_DEST:
|
||||
action_result = self.do_wait_action(agent)
|
||||
return action_result
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return super_action_result
|
||||
|
||||
def reset_hook(self) -> None:
|
||||
# noinspection PyUnresolvedReferences
|
||||
super().reset_hook()
|
||||
self._dest_spawn_timer = dict()
|
||||
|
||||
def trigger_destination_spawn(self):
|
||||
destinations_to_spawn = [key for key, val in self._dest_spawn_timer.items()
|
||||
if val == self.dest_prop.spawn_frequency]
|
||||
if destinations_to_spawn:
|
||||
n_dest_to_spawn = len(destinations_to_spawn)
|
||||
if self.dest_prop.spawn_mode != DestModeOptions.GROUPED:
|
||||
destinations = [Destination(tile, self[c.DEST]) for tile in self[c.FLOOR].empty_tiles[:n_dest_to_spawn]]
|
||||
self[c.DEST].add_additional_items(destinations)
|
||||
for dest in destinations_to_spawn:
|
||||
del self._dest_spawn_timer[dest]
|
||||
self.print(f'{n_dest_to_spawn} new destinations have been spawned')
|
||||
elif self.dest_prop.spawn_mode == DestModeOptions.GROUPED and n_dest_to_spawn == self.dest_prop.n_dests:
|
||||
destinations = [Destination(tile, self[c.DEST]) for tile in self[c.FLOOR].empty_tiles[:n_dest_to_spawn]]
|
||||
self[c.DEST].add_additional_items(destinations)
|
||||
for dest in destinations_to_spawn:
|
||||
del self._dest_spawn_timer[dest]
|
||||
self.print(f'{n_dest_to_spawn} new destinations have been spawned')
|
||||
else:
|
||||
self.print(f'{n_dest_to_spawn} new destinations could be spawned, but waiting for all.')
|
||||
pass
|
||||
else:
|
||||
self.print('No Items are spawning, limit is reached.')
|
||||
|
||||
def step_hook(self) -> (List[dict], dict):
|
||||
# noinspection PyUnresolvedReferences
|
||||
super_reward_info = super().step_hook()
|
||||
for key, val in self._dest_spawn_timer.items():
|
||||
self._dest_spawn_timer[key] = min(self.dest_prop.spawn_frequency, self._dest_spawn_timer[key] + 1)
|
||||
|
||||
for dest in list(self[c.DEST].values()):
|
||||
if dest.is_considered_reached:
|
||||
dest.change_parent_collection(self[c.DEST_REACHED])
|
||||
self._dest_spawn_timer[dest.name] = 0
|
||||
self.print(f'{dest.name} is reached now, removing...')
|
||||
else:
|
||||
for agent_name in dest.currently_dwelling_names:
|
||||
agent = self[c.AGENT].by_name(agent_name)
|
||||
if agent.pos == dest.pos:
|
||||
self.print(f'{agent.name} is still waiting.')
|
||||
pass
|
||||
else:
|
||||
dest.leave(agent)
|
||||
self.print(f'{agent.name} left the destination early.')
|
||||
self.trigger_destination_spawn()
|
||||
return super_reward_info
|
||||
|
||||
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
|
||||
additional_observations = super().observations_hook()
|
||||
additional_observations.update({c.DEST: self[c.DEST].as_array()})
|
||||
return additional_observations
|
||||
|
||||
def per_agent_reward_hook(self, agent: Agent) -> List[dict]:
|
||||
# noinspection PyUnresolvedReferences
|
||||
reward_event_list = super().per_agent_reward_hook(agent)
|
||||
if len(self[c.DEST_REACHED]):
|
||||
for reached_dest in list(self[c.DEST_REACHED]):
|
||||
if agent.pos == reached_dest.pos:
|
||||
self.print(f'{agent.name} just reached destination at {agent.pos}')
|
||||
self[c.DEST_REACHED].delete_env_object(reached_dest)
|
||||
info_dict = {f'{agent.name}_{c.DEST_REACHED}': 1}
|
||||
reward_event_list.append({'value': self.rewards_dest.DEST_REACHED,
|
||||
'reason': c.DEST_REACHED,
|
||||
'info': info_dict})
|
||||
return reward_event_list
|
||||
|
||||
def render_assets_hook(self, mode='human'):
|
||||
# noinspection PyUnresolvedReferences
|
||||
additional_assets = super().render_assets_hook()
|
||||
destinations = [RenderEntity(c.DEST, dest.pos) for dest in self[c.DEST]]
|
||||
additional_assets.extend(destinations)
|
||||
return additional_assets
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from environments.utility_classes import AgentRenderOptions as aro, ObservationProperties
|
||||
|
||||
render = True
|
||||
|
||||
dest_probs = DestProperties(n_dests=2, spawn_frequency=5, spawn_mode=DestModeOptions.GROUPED)
|
||||
|
||||
obs_props = ObservationProperties(render_agents=aro.LEVEL, omit_agent_self=True, pomdp_r=2)
|
||||
|
||||
move_props = {'allow_square_movement': True,
|
||||
'allow_diagonal_movement': False,
|
||||
'allow_no_op': False}
|
||||
|
||||
factory = DestFactory(n_agents=10, done_at_collision=False,
|
||||
level_name='rooms', max_steps=400,
|
||||
obs_prop=obs_props, parse_doors=True,
|
||||
verbose=True,
|
||||
mv_prop=move_props, dest_prop=dest_probs
|
||||
)
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
n_actions = factory.action_space.n - 1
|
||||
_ = factory.observation_space
|
||||
|
||||
for epoch in range(4):
|
||||
random_actions = [[random.randint(0, n_actions) for _
|
||||
in range(factory.n_agents)] for _
|
||||
in range(factory.max_steps + 1)]
|
||||
env_state = factory.reset()
|
||||
r = 0
|
||||
for agent_i_action in random_actions:
|
||||
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
|
||||
r += step_r
|
||||
if render:
|
||||
factory.render()
|
||||
if done_bool:
|
||||
break
|
||||
print(f'Factory run {epoch} done, reward is:\n {r}')
|
||||
pass
|
@ -1,44 +0,0 @@
|
||||
from environments.factory.additional.dirt.dirt_entity import DirtPile
|
||||
from environments.factory.additional.dirt.dirt_util import DirtProperties
|
||||
from environments.factory.base.objects import Floor
|
||||
from environments.factory.base.registers import EntityCollection
|
||||
from environments.factory.additional.dirt.dirt_util import Constants as c
|
||||
|
||||
|
||||
class DirtPiles(EntityCollection):
|
||||
|
||||
_accepted_objects = DirtPile
|
||||
|
||||
@property
|
||||
def amount(self):
|
||||
return sum([dirt.amount for dirt in self])
|
||||
|
||||
@property
|
||||
def dirt_properties(self):
|
||||
return self._dirt_properties
|
||||
|
||||
def __init__(self, dirt_properties, *args):
|
||||
super(DirtPiles, self).__init__(*args)
|
||||
self._dirt_properties: DirtProperties = dirt_properties
|
||||
|
||||
def spawn_dirt(self, then_dirty_tiles) -> bool:
|
||||
if isinstance(then_dirty_tiles, Floor):
|
||||
then_dirty_tiles = [then_dirty_tiles]
|
||||
for tile in then_dirty_tiles:
|
||||
if not self.amount > self.dirt_properties.max_global_amount:
|
||||
dirt = self.by_pos(tile.pos)
|
||||
if dirt is None:
|
||||
if len(tile.guests) > 1:
|
||||
return c.NOT_VALID
|
||||
dirt = DirtPile(tile, self, amount=self.dirt_properties.max_spawn_amount)
|
||||
self.add_item(dirt)
|
||||
else:
|
||||
new_value = dirt.amount + self.dirt_properties.max_spawn_amount
|
||||
dirt.set_new_amount(min(new_value, self.dirt_properties.max_local_amount))
|
||||
else:
|
||||
return c.NOT_VALID
|
||||
return c.VALID
|
||||
|
||||
def __repr__(self):
|
||||
s = super(DirtPiles, self).__repr__()
|
||||
return f'{s[:-1]}, {self.amount})'
|
@ -1,26 +0,0 @@
|
||||
from environments.factory.base.objects import Entity
|
||||
|
||||
|
||||
class DirtPile(Entity):
|
||||
|
||||
@property
|
||||
def amount(self):
|
||||
return self._amount
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
# Edit this if you want items to be drawn in the ops differntly
|
||||
return self._amount
|
||||
|
||||
def __init__(self, *args, amount=None, **kwargs):
|
||||
super(DirtPile, self).__init__(*args, **kwargs)
|
||||
self._amount = amount
|
||||
|
||||
def set_new_amount(self, amount):
|
||||
self._amount = amount
|
||||
self._collection.notify_change_to_value(self)
|
||||
|
||||
def summarize_state(self):
|
||||
state_dict = super().summarize_state()
|
||||
state_dict.update(amount=float(self.amount))
|
||||
return state_dict
|
@ -1,30 +0,0 @@
|
||||
from typing import NamedTuple
|
||||
|
||||
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
|
||||
|
||||
|
||||
class Constants(BaseConstants):
|
||||
DIRT = 'DirtPile'
|
||||
|
||||
|
||||
class Actions(BaseActions):
|
||||
CLEAN_UP = 'do_cleanup_action'
|
||||
|
||||
|
||||
class RewardsDirt(NamedTuple):
|
||||
CLEAN_UP_VALID: float = 0.5
|
||||
CLEAN_UP_FAIL: float = -0.1
|
||||
CLEAN_UP_LAST_PIECE: float = 4.5
|
||||
|
||||
|
||||
class DirtProperties(NamedTuple):
|
||||
initial_dirt_ratio: float = 0.3 # On INIT, on max how many tiles does the dirt spawn in percent.
|
||||
initial_dirt_spawn_r_var: float = 0.05 # How much does the dirt spawn amount vary?
|
||||
clean_amount: float = 1 # How much does the robot clean with one actions.
|
||||
max_spawn_ratio: float = 0.20 # On max how many tiles does the dirt spawn in percent.
|
||||
max_spawn_amount: float = 0.3 # How much dirt does spawn per tile at max.
|
||||
spawn_frequency: int = 0 # Spawn Frequency in Steps.
|
||||
max_local_amount: int = 2 # Max dirt amount per tile.
|
||||
max_global_amount: int = 20 # Max dirt amount in the whole environment.
|
||||
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place.
|
||||
done_when_clean: bool = True
|
@ -1,252 +0,0 @@
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import List, Union, Dict
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environments.factory.additional.dirt.dirt_collections import DirtPiles
|
||||
from environments.factory.additional.dirt.dirt_entity import DirtPile
|
||||
from environments.factory.additional.dirt.dirt_util import Constants, Actions, RewardsDirt, DirtProperties
|
||||
|
||||
from environments.factory.base.base_factory import BaseFactory
|
||||
from environments.factory.base.objects import Agent, Action
|
||||
from environments.factory.base.registers import Entities
|
||||
|
||||
from environments.factory.base.renderer import RenderEntity
|
||||
from environments.utility_classes import ObservationProperties
|
||||
|
||||
|
||||
def softmax(x):
|
||||
"""Compute softmax values for each sets of scores in x."""
|
||||
e_x = np.exp(x - np.max(x))
|
||||
return e_x / e_x.sum()
|
||||
|
||||
|
||||
def entropy(x):
|
||||
return -(x * np.log(x + 1e-8)).sum()
|
||||
|
||||
|
||||
c = Constants
|
||||
a = Actions
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit, PyAbstractClass
|
||||
class DirtFactory(BaseFactory):
|
||||
|
||||
@property
|
||||
def actions_hook(self) -> Union[Action, List[Action]]:
|
||||
super_actions = super().actions_hook
|
||||
super_actions.append(Action(str_ident=a.CLEAN_UP))
|
||||
return super_actions
|
||||
|
||||
@property
|
||||
def entities_hook(self) -> Dict[(str, Entities)]:
|
||||
super_entities = super().entities_hook
|
||||
dirt_register = DirtPiles(self.dirt_prop, self._level_shape)
|
||||
super_entities.update({c.DIRT: dirt_register})
|
||||
return super_entities
|
||||
|
||||
def __init__(self, *args,
|
||||
dirt_prop: DirtProperties = DirtProperties(), rewards_dirt: RewardsDirt = RewardsDirt(),
|
||||
env_seed=time.time_ns(), **kwargs):
|
||||
if isinstance(dirt_prop, dict):
|
||||
dirt_prop = DirtProperties(**dirt_prop)
|
||||
if isinstance(rewards_dirt, dict):
|
||||
rewards_dirt = RewardsDirt(**rewards_dirt)
|
||||
self.dirt_prop = dirt_prop
|
||||
self.rewards_dirt = rewards_dirt
|
||||
self._dirt_rng = np.random.default_rng(env_seed)
|
||||
self._dirt: DirtPiles
|
||||
kwargs.update(env_seed=env_seed)
|
||||
# TODO: Reset ---> document this
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def render_assets_hook(self, mode='human'):
|
||||
additional_assets = super().render_assets_hook()
|
||||
dirt = [RenderEntity('dirt', dirt.tile.pos, min(0.15 + dirt.amount, 1.5), 'scale')
|
||||
for dirt in self[c.DIRT]]
|
||||
additional_assets.extend(dirt)
|
||||
return additional_assets
|
||||
|
||||
def do_cleanup_action(self, agent: Agent) -> (dict, dict):
|
||||
if dirt := self[c.DIRT].by_pos(agent.pos):
|
||||
new_dirt_amount = dirt.amount - self.dirt_prop.clean_amount
|
||||
|
||||
if new_dirt_amount <= 0:
|
||||
self[c.DIRT].delete_env_object(dirt)
|
||||
else:
|
||||
dirt.set_new_amount(max(new_dirt_amount, c.FREE_CELL.value))
|
||||
valid = c.VALID
|
||||
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
|
||||
info_dict = {f'{agent.name}_{a.CLEAN_UP}_VALID': 1, 'cleanup_valid': 1}
|
||||
reward = self.rewards_dirt.CLEAN_UP_VALID
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
|
||||
info_dict = {f'{agent.name}_{a.CLEAN_UP}_FAIL': 1, 'cleanup_fail': 1}
|
||||
reward = self.rewards_dirt.CLEAN_UP_FAIL
|
||||
|
||||
if valid and self.dirt_prop.done_when_clean and (len(self[c.DIRT]) == 0):
|
||||
reward += self.rewards_dirt.CLEAN_UP_LAST_PIECE
|
||||
self.print(f'{agent.name} picked up the last piece of dirt!')
|
||||
info_dict = {f'{agent.name}_{a.CLEAN_UP}_LAST_PIECE': 1}
|
||||
return valid, dict(value=reward, reason=a.CLEAN_UP, info=info_dict)
|
||||
|
||||
def trigger_dirt_spawn(self, initial_spawn=False):
|
||||
dirt_rng = self._dirt_rng
|
||||
free_for_dirt = [x for x in self[c.FLOOR]
|
||||
if len(x.guests) == 0 or (len(x.guests) == 1 and isinstance(next(y for y in x.guests), DirtPile))
|
||||
]
|
||||
self._dirt_rng.shuffle(free_for_dirt)
|
||||
if initial_spawn:
|
||||
var = self.dirt_prop.initial_dirt_spawn_r_var
|
||||
new_spawn = self.dirt_prop.initial_dirt_ratio + dirt_rng.uniform(-var, var)
|
||||
else:
|
||||
new_spawn = dirt_rng.uniform(0, self.dirt_prop.max_spawn_ratio)
|
||||
n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt)))
|
||||
self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles])
|
||||
|
||||
def step_hook(self) -> (List[dict], dict):
|
||||
super_reward_info = super().step_hook()
|
||||
if smear_amount := self.dirt_prop.dirt_smear_amount:
|
||||
for agent in self[c.AGENT]:
|
||||
if agent.step_result['action_valid'] and agent.last_pos != c.NO_POS:
|
||||
if self._actions.is_moving_action(agent.step_result['action_name']):
|
||||
if old_pos_dirt := self[c.DIRT].by_pos(agent.last_pos):
|
||||
if smeared_dirt := round(old_pos_dirt.amount * smear_amount, 2):
|
||||
old_pos_dirt.set_new_amount(max(0, old_pos_dirt.amount-smeared_dirt))
|
||||
if new_pos_dirt := self[c.DIRT].by_pos(agent.pos):
|
||||
new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt))
|
||||
else:
|
||||
if self[c.DIRT].spawn_dirt(agent.tile):
|
||||
new_pos_dirt = self[c.DIRT].by_pos(agent.pos)
|
||||
new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt))
|
||||
if self._next_dirt_spawn < 0:
|
||||
pass # No DirtPile Spawn
|
||||
elif not self._next_dirt_spawn:
|
||||
self.trigger_dirt_spawn()
|
||||
self._next_dirt_spawn = self.dirt_prop.spawn_frequency
|
||||
else:
|
||||
self._next_dirt_spawn -= 1
|
||||
return super_reward_info
|
||||
|
||||
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
|
||||
action_result = super().do_additional_actions(agent, action)
|
||||
if action_result is None:
|
||||
if action == a.CLEAN_UP:
|
||||
return self.do_cleanup_action(agent)
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return action_result
|
||||
|
||||
def reset_hook(self) -> None:
|
||||
super().reset_hook()
|
||||
self.trigger_dirt_spawn(initial_spawn=True)
|
||||
self._next_dirt_spawn = self.dirt_prop.spawn_frequency if self.dirt_prop.spawn_frequency else -1
|
||||
|
||||
def check_additional_done(self) -> (bool, dict):
|
||||
super_done, super_dict = super().check_additional_done()
|
||||
if self.dirt_prop.done_when_clean:
|
||||
if all_cleaned := len(self[c.DIRT]) == 0:
|
||||
super_dict.update(ALL_CLEAN_DONE=all_cleaned)
|
||||
return all_cleaned, super_dict
|
||||
return super_done, super_dict
|
||||
|
||||
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
|
||||
additional_observations = super().observations_hook()
|
||||
additional_observations.update({c.DIRT: self[c.DIRT].as_array()})
|
||||
return additional_observations
|
||||
|
||||
def post_step_hook(self) -> List[Dict[str, int]]:
|
||||
super_post_step = super(DirtFactory, self).post_step_hook()
|
||||
info_dict = dict()
|
||||
|
||||
dirt = [dirt.amount for dirt in self[c.DIRT]]
|
||||
current_dirt_amount = sum(dirt)
|
||||
dirty_tile_count = len(dirt)
|
||||
|
||||
# if dirty_tile_count:
|
||||
# dirt_distribution_score = entropy(softmax(np.asarray(dirt)) / dirty_tile_count)
|
||||
# else:
|
||||
# dirt_distribution_score = 0
|
||||
|
||||
info_dict.update(dirt_amount=current_dirt_amount)
|
||||
info_dict.update(dirty_tile_count=dirty_tile_count)
|
||||
|
||||
super_post_step.append(info_dict)
|
||||
return super_post_step
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from environments.utility_classes import AgentRenderOptions as aro
|
||||
render = True
|
||||
|
||||
dirt_props = DirtProperties(
|
||||
initial_dirt_ratio=0.35,
|
||||
initial_dirt_spawn_r_var=0.1,
|
||||
clean_amount=0.34,
|
||||
max_spawn_amount=0.1,
|
||||
max_global_amount=20,
|
||||
max_local_amount=1,
|
||||
spawn_frequency=0,
|
||||
max_spawn_ratio=0.05,
|
||||
dirt_smear_amount=0.0
|
||||
)
|
||||
|
||||
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
|
||||
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True,
|
||||
indicate_door_area=False)
|
||||
|
||||
move_props = {'allow_square_movement': True,
|
||||
'allow_diagonal_movement': False,
|
||||
'allow_no_op': False}
|
||||
import time
|
||||
global_timings = []
|
||||
for i in range(10):
|
||||
|
||||
factory = DirtFactory(n_agents=10, done_at_collision=False,
|
||||
level_name='rooms', max_steps=1000,
|
||||
doors_have_area=False,
|
||||
obs_prop=obs_props, parse_doors=True,
|
||||
verbose=True,
|
||||
mv_prop=move_props, dirt_prop=dirt_props,
|
||||
# inject_agents=[TSPDirtAgent],
|
||||
)
|
||||
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
n_actions = factory.action_space.n - 1
|
||||
_ = factory.observation_space
|
||||
obs_space = factory.observation_space
|
||||
obs_space_named = factory.named_observation_space
|
||||
action_space_named = factory.named_action_space
|
||||
times = []
|
||||
for epoch in range(10):
|
||||
start_time = time.time()
|
||||
random_actions = [[random.randint(0, n_actions) for _
|
||||
in range(factory.n_agents)] for _
|
||||
in range(factory.max_steps+1)]
|
||||
env_state = factory.reset()
|
||||
if render:
|
||||
factory.render()
|
||||
# tsp_agent = factory.get_injected_agents()[0]
|
||||
|
||||
rwrd = 0
|
||||
for agent_i_action in random_actions:
|
||||
# agent_i_action = tsp_agent.predict()
|
||||
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
|
||||
rwrd += step_rwrd
|
||||
if render:
|
||||
factory.render()
|
||||
if done_bool:
|
||||
break
|
||||
times.append(time.time() - start_time)
|
||||
# print(f'Factory run {epoch} done, reward is:\n {r}')
|
||||
print('Mean Time Taken: ', sum(times) / 10)
|
||||
global_timings.extend(times)
|
||||
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
|
||||
print('Median Time Taken: ', global_timings[len(global_timings)//2])
|
||||
|
||||
pass
|
@ -1,38 +0,0 @@
|
||||
from typing import Union
|
||||
|
||||
from environments.factory.additional.doors.doors_entities import Door
|
||||
from environments.factory.base.registers import EntityCollection
|
||||
|
||||
from environments.factory.additional.doors.doors_util import Constants as c
|
||||
|
||||
|
||||
class Doors(EntityCollection):
|
||||
|
||||
def __init__(self, *args, indicate_area=False, **kwargs):
|
||||
self.indicate_area = indicate_area
|
||||
self._area_marked = False
|
||||
super(Doors, self).__init__(*args, is_blocking_light=True, can_collide=True, **kwargs)
|
||||
|
||||
_accepted_objects = Door
|
||||
|
||||
def get_near_position(self, position: (int, int)) -> Union[None, Door]:
|
||||
try:
|
||||
return next(door for door in self if position in door.tile.neighboring_floor_pos)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def tick_doors(self):
|
||||
for door in self:
|
||||
door.tick()
|
||||
|
||||
def as_array(self):
|
||||
if not self._area_marked and self.indicate_area:
|
||||
for door in self:
|
||||
for tile in door.tile.neighboring_floor:
|
||||
if self._individual_slices:
|
||||
pass
|
||||
else:
|
||||
pos = (0, *tile.pos)
|
||||
self._lazy_eval_transforms.append((pos, c.ACCESS_DOOR_CELL))
|
||||
self._area_marked = True
|
||||
return super(Doors, self).as_array()
|
@ -1,69 +0,0 @@
|
||||
from environments.factory.base.objects import Entity
|
||||
from environments.factory.additional.doors.doors_util import Constants as c
|
||||
|
||||
|
||||
class Door(Entity):
|
||||
|
||||
@property
|
||||
def is_blocking(self):
|
||||
return False if self.is_open else True
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return False if self.is_open else True
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
# This is important as it shadow is checked by occupation value
|
||||
return c.CLOSED_DOOR_CELL if self.is_closed else c.OPEN_DOOR_CELL
|
||||
|
||||
@property
|
||||
def str_state(self):
|
||||
return 'open' if self.is_open else 'closed'
|
||||
|
||||
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, indicate_area=False, **kwargs):
|
||||
super(Door, self).__init__(*args, **kwargs)
|
||||
self._state = c.CLOSED_DOOR
|
||||
self.indicate_area = indicate_area
|
||||
self.auto_close_interval = auto_close_interval
|
||||
self.time_to_close = -1
|
||||
if not closed_on_init:
|
||||
self._open()
|
||||
|
||||
def summarize_state(self):
|
||||
state_dict = super().summarize_state()
|
||||
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
|
||||
return state_dict
|
||||
|
||||
@property
|
||||
def is_closed(self):
|
||||
return self._state == c.CLOSED_DOOR
|
||||
|
||||
@property
|
||||
def is_open(self):
|
||||
return self._state == c.OPEN_DOOR
|
||||
|
||||
@property
|
||||
def status(self):
|
||||
return self._state
|
||||
|
||||
def use(self):
|
||||
if self._state == c.OPEN_DOOR:
|
||||
self._close()
|
||||
else:
|
||||
self._open()
|
||||
|
||||
def tick(self):
|
||||
if self.is_open and len(self.tile) == 1 and self.time_to_close:
|
||||
self.time_to_close -= 1
|
||||
elif self.is_open and not self.time_to_close and len(self.tile) == 1:
|
||||
self.use()
|
||||
|
||||
def _open(self):
|
||||
self._state = c.OPEN_DOOR
|
||||
self._collection.notify_change_to_value(self)
|
||||
self.time_to_close = self.auto_close_interval
|
||||
|
||||
def _close(self):
|
||||
self._state = c.CLOSED_DOOR
|
||||
self._collection.notify_change_to_value(self)
|
@ -1,31 +0,0 @@
|
||||
|
||||
from typing import NamedTuple
|
||||
|
||||
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
|
||||
|
||||
|
||||
class Constants(BaseConstants):
|
||||
DOOR = 'Door' # Identifier of Single-Door Entities.
|
||||
DOORS = 'Doors' # Identifier of Door-objects and sets (collections).
|
||||
DOOR_SYMBOL = 'D' # Door identifier for resolving the string based map files.
|
||||
|
||||
ACCESS_DOOR_CELL = 1 / 3 # Access-door-Cell value used in observation
|
||||
OPEN_DOOR_CELL = 2 / 3 # Open-door-Cell value used in observation
|
||||
CLOSED_DOOR_CELL = 3 / 3 # Closed-door-Cell value used in observation
|
||||
|
||||
CLOSED_DOOR = 'closed' # Identifier to compare door-is-closed state
|
||||
OPEN_DOOR = 'open' # Identifier to compare door-is-open state
|
||||
# ACCESS_DOOR = 'access' # Identifier to compare access positions
|
||||
|
||||
|
||||
class Actions(BaseActions):
|
||||
USE_DOOR = 'use_door'
|
||||
|
||||
|
||||
class RewardsDoor(NamedTuple):
|
||||
USE_DOOR_VALID: float = -0.00
|
||||
USE_DOOR_FAIL: float = -0.01
|
||||
|
||||
|
||||
class DoorProperties(NamedTuple):
|
||||
indicate_door_area: bool = True # Wether the door area should be indicated in the agents' observation.
|
@ -1,196 +0,0 @@
|
||||
import time
|
||||
from typing import List, Union, Dict
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environments.factory.additional.doors.doors_collections import Doors
|
||||
from environments.factory.additional.doors.doors_util import DoorProperties, RewardsDoor, Constants, Actions
|
||||
from environments.factory.base.base_factory import BaseFactory
|
||||
from environments.factory.base.objects import Agent, Action
|
||||
from environments.factory.base.registers import Entities
|
||||
|
||||
from environments import helpers as h
|
||||
|
||||
from environments.factory.base.renderer import RenderEntity
|
||||
from environments.utility_classes import ObservationProperties
|
||||
|
||||
|
||||
def softmax(x):
|
||||
"""Compute softmax values for each sets of scores in x."""
|
||||
e_x = np.exp(x - np.max(x))
|
||||
return e_x / e_x.sum()
|
||||
|
||||
|
||||
def entropy(x):
|
||||
return -(x * np.log(x + 1e-8)).sum()
|
||||
|
||||
|
||||
c = Constants
|
||||
a = Actions
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit, PyAbstractClass
|
||||
class DoorFactory(BaseFactory):
|
||||
|
||||
@property
|
||||
def actions_hook(self) -> Union[Action, List[Action]]:
|
||||
super_actions = super().actions_hook
|
||||
super_actions.append(Action(str_ident=a.USE_DOOR))
|
||||
return super_actions
|
||||
|
||||
@property
|
||||
def entities_hook(self) -> Dict[(str, Entities)]:
|
||||
super_entities = super().entities_hook
|
||||
|
||||
parsed_doors = h.one_hot_level(self._parsed_level, c.DOOR_SYMBOL)
|
||||
parsed_doors = np.pad(parsed_doors, self.obs_prop.pomdp_r, 'constant', constant_values=0)
|
||||
if np.any(parsed_doors):
|
||||
door_tiles = [self[c.FLOOR].by_pos(tuple(pos)) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL)]
|
||||
doors = Doors.from_tiles(door_tiles, self._level_shape, indicate_area=self.obs_prop.indicate_door_area,
|
||||
entity_kwargs=dict()
|
||||
)
|
||||
super_entities.update(({c.DOORS: doors}))
|
||||
return super_entities
|
||||
|
||||
def __init__(self, *args,
|
||||
door_properties: DoorProperties = DoorProperties(), rewards_door: RewardsDoor = RewardsDoor(),
|
||||
env_seed=time.time_ns(), **kwargs):
|
||||
if isinstance(door_properties, dict):
|
||||
door_properties = DoorProperties(**door_properties)
|
||||
if isinstance(rewards_door, dict):
|
||||
rewards_door = RewardsDoor(**rewards_door)
|
||||
self.door_properties = door_properties
|
||||
self.rewards_door = rewards_door
|
||||
self._door_rng = np.random.default_rng(env_seed)
|
||||
self._doors: Doors
|
||||
kwargs.update(env_seed=env_seed)
|
||||
# TODO: Reset ---> document this
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def render_assets_hook(self, mode='human'):
|
||||
additional_assets = super().render_assets_hook()
|
||||
doors = []
|
||||
for i, door in enumerate(self[c.DOORS]):
|
||||
name, state = 'door_open' if door.is_open else 'door_closed', 'blank'
|
||||
doors.append(RenderEntity(name, door.pos, 1, 'none', state, i + 1))
|
||||
additional_assets.extend(doors)
|
||||
return additional_assets
|
||||
|
||||
|
||||
def step_hook(self) -> (List[dict], dict):
|
||||
super_reward_info = super().step_hook()
|
||||
# Step the door close intervall
|
||||
# TODO: Maybe move this to self.post_step_hook? May collide with reward calculation.
|
||||
if doors := self[c.DOORS]:
|
||||
doors.tick_doors()
|
||||
return super_reward_info
|
||||
|
||||
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
|
||||
action_result = super().do_additional_actions(agent, action)
|
||||
if action_result is None:
|
||||
if action == a.USE_DOOR:
|
||||
return self.use_door_action(agent)
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return action_result
|
||||
|
||||
def use_door_action(self, agent: Agent):
|
||||
|
||||
# Check if agent really is standing on a door:
|
||||
door = self[c.DOORS].get_near_position(agent.pos)
|
||||
if door is not None:
|
||||
door.use()
|
||||
valid = c.VALID
|
||||
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
|
||||
info_dict = {f'{agent.name}_door_use': 1, f'door_use': 1}
|
||||
# When he doesn't...
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
info_dict = {f'{agent.name}_failed_door_use': 1, 'failed_door_use': 1}
|
||||
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but there is none.')
|
||||
|
||||
reward = dict(value=self.rewards_door.USE_DOOR_VALID if valid else self.rewards_door.USE_DOOR_FAIL,
|
||||
reason=a.USE_DOOR, info=info_dict)
|
||||
|
||||
return valid, reward
|
||||
|
||||
def reset_hook(self) -> None:
|
||||
super().reset_hook()
|
||||
# There is nothing to reset.
|
||||
|
||||
def check_additional_done(self) -> (bool, dict):
|
||||
super_done, super_dict = super().check_additional_done()
|
||||
return super_done, super_dict
|
||||
|
||||
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
|
||||
additional_observations = super().observations_hook()
|
||||
|
||||
additional_observations.update({c.DOORS: self[c.DOORS].as_array()})
|
||||
return additional_observations
|
||||
|
||||
def post_step_hook(self) -> List[Dict[str, int]]:
|
||||
super_post_step = super(DoorFactory, self).post_step_hook()
|
||||
return super_post_step
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from environments.utility_classes import AgentRenderOptions as aro
|
||||
render = True
|
||||
|
||||
door_props = DoorProperties(
|
||||
indicate_door_area=True
|
||||
)
|
||||
|
||||
obs_props = ObservationProperties(render_agents=aro.COMBINED, omit_agent_self=True,
|
||||
pomdp_r=2, additional_agent_placeholder=None, cast_shadows=True
|
||||
)
|
||||
|
||||
move_props = {'allow_square_movement': True,
|
||||
'allow_diagonal_movement': False,
|
||||
'allow_no_op': False}
|
||||
import time
|
||||
global_timings = []
|
||||
for i in range(10):
|
||||
|
||||
factory = DoorFactory(n_agents=10, done_at_collision=False,
|
||||
level_name='rooms', max_steps=1000,
|
||||
obs_prop=obs_props, parse_doors=True,
|
||||
verbose=True,
|
||||
mv_prop=move_props, dirt_prop=door_props,
|
||||
# inject_agents=[TSPDirtAgent],
|
||||
)
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
n_actions = factory.action_space.n - 1
|
||||
_ = factory.observation_space
|
||||
obs_space = factory.observation_space
|
||||
obs_space_named = factory.named_observation_space
|
||||
action_space_named = factory.named_action_space
|
||||
times = []
|
||||
for epoch in range(10):
|
||||
start_time = time.time()
|
||||
random_actions = [[random.randint(0, n_actions) for _
|
||||
in range(factory.n_agents)] for _
|
||||
in range(factory.max_steps+1)]
|
||||
env_state = factory.reset()
|
||||
if render:
|
||||
factory.render()
|
||||
# tsp_agent = factory.get_injected_agents()[0]
|
||||
|
||||
rwrd = 0
|
||||
for agent_i_action in random_actions:
|
||||
# agent_i_action = tsp_agent.predict()
|
||||
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
|
||||
rwrd += step_rwrd
|
||||
if render:
|
||||
factory.render()
|
||||
if done_bool:
|
||||
break
|
||||
times.append(time.time() - start_time)
|
||||
# print(f'Factory run {epoch} done, reward is:\n {r}')
|
||||
print('Mean Time Taken: ', sum(times) / 10)
|
||||
global_timings.extend(times)
|
||||
print('Mean Time Taken: ', sum(global_timings) / len(global_timings))
|
||||
print('Median Time Taken: ', global_timings[len(global_timings)//2])
|
@ -1,193 +0,0 @@
|
||||
import time
|
||||
from typing import List, Union, Dict
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
from environments.factory.additional.item.item_collections import Items, Inventories, DropOffLocations
|
||||
from environments.factory.additional.item.item_util import Constants, Actions, RewardsItem, ItemProperties
|
||||
from environments.factory.base.base_factory import BaseFactory
|
||||
from environments.factory.base.objects import Agent, Action
|
||||
from environments.factory.base.registers import Entities
|
||||
|
||||
from environments.factory.base.renderer import RenderEntity
|
||||
|
||||
c = Constants
|
||||
a = Actions
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit, PyAbstractClass
|
||||
class ItemFactory(BaseFactory):
|
||||
# noinspection PyMissingConstructor
|
||||
def __init__(self, *args, item_prop: ItemProperties = ItemProperties(), env_seed=time.time_ns(),
|
||||
rewards_item: RewardsItem = RewardsItem(), **kwargs):
|
||||
if isinstance(item_prop, dict):
|
||||
item_prop = ItemProperties(**item_prop)
|
||||
if isinstance(rewards_item, dict):
|
||||
rewards_item = RewardsItem(**rewards_item)
|
||||
self.item_prop = item_prop
|
||||
self.rewards_item = rewards_item
|
||||
kwargs.update(env_seed=env_seed)
|
||||
self._item_rng = np.random.default_rng(env_seed)
|
||||
assert (item_prop.n_items <= ((1 + kwargs.get('_pomdp_r', 0) * 2) ** 2)) or not kwargs.get('_pomdp_r', 0)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@property
|
||||
def actions_hook(self) -> Union[Action, List[Action]]:
|
||||
# noinspection PyUnresolvedReferences
|
||||
super_actions = super().actions_hook
|
||||
super_actions.append(Action(str_ident=a.ITEM_ACTION))
|
||||
return super_actions
|
||||
|
||||
@property
|
||||
def entities_hook(self) -> Dict[(str, Entities)]:
|
||||
# noinspection PyUnresolvedReferences
|
||||
super_entities = super().entities_hook
|
||||
|
||||
empty_tiles = self[c.FLOOR].empty_tiles[:self.item_prop.n_drop_off_locations]
|
||||
drop_offs = DropOffLocations.from_tiles(
|
||||
empty_tiles, self._level_shape,
|
||||
entity_kwargs=dict(
|
||||
storage_size_until_full=self.item_prop.max_dropoff_storage_size)
|
||||
)
|
||||
item_register = Items(self._level_shape)
|
||||
empty_tiles = self[c.FLOOR].empty_tiles[:self.item_prop.n_items]
|
||||
item_register.spawn_items(empty_tiles)
|
||||
|
||||
inventories = Inventories(self._obs_shape, self._level_shape)
|
||||
inventories.spawn_inventories(self[c.AGENT], self.item_prop.max_agent_inventory_capacity)
|
||||
|
||||
super_entities.update({c.DROP_OFF: drop_offs, c.ITEM: item_register, c.INVENTORY: inventories})
|
||||
return super_entities
|
||||
|
||||
def per_agent_raw_observations_hook(self, agent) -> Dict[str, np.typing.ArrayLike]:
|
||||
additional_raw_observations = super().per_agent_raw_observations_hook(agent)
|
||||
additional_raw_observations.update({c.INVENTORY: self[c.INVENTORY].by_entity(agent).as_array()})
|
||||
return additional_raw_observations
|
||||
|
||||
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
|
||||
additional_observations = super().observations_hook()
|
||||
additional_observations.update({c.ITEM: self[c.ITEM].as_array()})
|
||||
additional_observations.update({c.DROP_OFF: self[c.DROP_OFF].as_array()})
|
||||
return additional_observations
|
||||
|
||||
def do_item_action(self, agent: Agent) -> (dict, dict):
|
||||
inventory = self[c.INVENTORY].by_entity(agent)
|
||||
if drop_off := self[c.DROP_OFF].by_pos(agent.pos):
|
||||
if inventory:
|
||||
valid = drop_off.place_item(inventory.pop())
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
if valid:
|
||||
self.print(f'{agent.name} just dropped of an item at {drop_off.pos}.')
|
||||
info_dict = {f'{agent.name}_DROPOFF_VALID': 1, 'DROPOFF_VALID': 1}
|
||||
else:
|
||||
self.print(f'{agent.name} just tried to drop off at {agent.pos}, but failed.')
|
||||
info_dict = {f'{agent.name}_DROPOFF_FAIL': 1, 'DROPOFF_FAIL': 1}
|
||||
reward = dict(value=self.rewards_item.DROP_OFF_VALID if valid else self.rewards_item.DROP_OFF_FAIL,
|
||||
reason=a.ITEM_ACTION, info=info_dict)
|
||||
return valid, reward
|
||||
elif item := self[c.ITEM].by_pos(agent.pos):
|
||||
item.change_parent_collection(inventory)
|
||||
item.set_tile_to(self._NO_POS_TILE)
|
||||
self.print(f'{agent.name} just picked up an item at {agent.pos}')
|
||||
info_dict = {f'{agent.name}_{a.ITEM_ACTION}_VALID': 1, f'{a.ITEM_ACTION}_VALID': 1}
|
||||
return c.VALID, dict(value=self.rewards_item.PICK_UP_VALID, reason=a.ITEM_ACTION, info=info_dict)
|
||||
else:
|
||||
self.print(f'{agent.name} just tried to pick up an item at {agent.pos}, but failed.')
|
||||
info_dict = {f'{agent.name}_{a.ITEM_ACTION}_FAIL': 1, f'{a.ITEM_ACTION}_FAIL': 1}
|
||||
return c.NOT_VALID, dict(value=self.rewards_item.PICK_UP_FAIL, reason=a.ITEM_ACTION, info=info_dict)
|
||||
|
||||
def do_additional_actions(self, agent: Agent, action: Action) -> (dict, dict):
|
||||
# noinspection PyUnresolvedReferences
|
||||
action_result = super().do_additional_actions(agent, action)
|
||||
if action_result is None:
|
||||
if action == a.ITEM_ACTION:
|
||||
action_result = self.do_item_action(agent)
|
||||
return action_result
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
return action_result
|
||||
|
||||
def reset_hook(self) -> None:
|
||||
# noinspection PyUnresolvedReferences
|
||||
super().reset_hook()
|
||||
self._next_item_spawn = self.item_prop.spawn_frequency
|
||||
self.trigger_item_spawn()
|
||||
|
||||
def trigger_item_spawn(self):
|
||||
if item_to_spawns := max(0, (self.item_prop.n_items - len(self[c.ITEM]))):
|
||||
empty_tiles = self[c.FLOOR].empty_tiles[:item_to_spawns]
|
||||
self[c.ITEM].spawn_items(empty_tiles)
|
||||
self._next_item_spawn = self.item_prop.spawn_frequency
|
||||
self.print(f'{item_to_spawns} new items have been spawned; next spawn in {self._next_item_spawn}')
|
||||
else:
|
||||
self.print('No Items are spawning, limit is reached.')
|
||||
|
||||
def step_hook(self) -> (List[dict], dict):
|
||||
# noinspection PyUnresolvedReferences
|
||||
super_reward_info = super().step_hook()
|
||||
for item in list(self[c.ITEM].values()):
|
||||
if item.auto_despawn >= 1:
|
||||
item.set_auto_despawn(item.auto_despawn-1)
|
||||
elif not item.auto_despawn:
|
||||
self[c.ITEM].delete_env_object(item)
|
||||
else:
|
||||
pass
|
||||
|
||||
if not self._next_item_spawn:
|
||||
self.trigger_item_spawn()
|
||||
else:
|
||||
self._next_item_spawn = max(0, self._next_item_spawn-1)
|
||||
return super_reward_info
|
||||
|
||||
def render_assets_hook(self, mode='human'):
|
||||
# noinspection PyUnresolvedReferences
|
||||
additional_assets = super().render_assets_hook()
|
||||
items = [RenderEntity(c.ITEM, item.tile.pos) for item in self[c.ITEM] if item.tile != self._NO_POS_TILE]
|
||||
additional_assets.extend(items)
|
||||
drop_offs = [RenderEntity(c.DROP_OFF, drop_off.tile.pos) for drop_off in self[c.DROP_OFF]]
|
||||
additional_assets.extend(drop_offs)
|
||||
return additional_assets
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from environments.utility_classes import AgentRenderOptions as aro, ObservationProperties
|
||||
|
||||
render = True
|
||||
|
||||
item_probs = ItemProperties(n_items=30, n_drop_off_locations=6)
|
||||
|
||||
obs_props = ObservationProperties(render_agents=aro.SEPERATE, omit_agent_self=True, pomdp_r=2)
|
||||
|
||||
move_props = {'allow_square_movement': True,
|
||||
'allow_diagonal_movement': True,
|
||||
'allow_no_op': False}
|
||||
|
||||
factory = ItemFactory(n_agents=6, done_at_collision=False,
|
||||
level_name='rooms', max_steps=400,
|
||||
obs_prop=obs_props, parse_doors=True,
|
||||
record_episodes=True, verbose=True,
|
||||
mv_prop=move_props, item_prop=item_probs
|
||||
)
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
n_actions = factory.action_space.n - 1
|
||||
obs_space = factory.observation_space
|
||||
obs_space_named = factory.named_observation_space
|
||||
|
||||
for epoch in range(400):
|
||||
random_actions = [[random.randint(0, n_actions) for _
|
||||
in range(factory.n_agents)] for _
|
||||
in range(factory.max_steps + 1)]
|
||||
env_state = factory.reset()
|
||||
rwrd = 0
|
||||
for agent_i_action in random_actions:
|
||||
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
|
||||
rwrd += step_r
|
||||
if render:
|
||||
factory.render()
|
||||
if done_bool:
|
||||
break
|
||||
print(f'Factory run {epoch} done, reward is:\n {rwrd}')
|
||||
pass
|
@ -1,89 +0,0 @@
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environments.factory.base.objects import Floor, Agent
|
||||
from environments.factory.base.registers import EntityCollection, BoundEnvObjCollection, ObjectCollection
|
||||
from environments.factory.additional.item.item_entities import Item, DropOffLocation
|
||||
|
||||
|
||||
class Items(EntityCollection):
|
||||
|
||||
_accepted_objects = Item
|
||||
|
||||
def spawn_items(self, tiles: List[Floor]):
|
||||
items = [Item(tile, self) for tile in tiles]
|
||||
self.add_additional_items(items)
|
||||
|
||||
def despawn_items(self, items: List[Item]):
|
||||
items = [items] if isinstance(items, Item) else items
|
||||
for item in items:
|
||||
del self[item]
|
||||
|
||||
|
||||
class Inventory(BoundEnvObjCollection):
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return f'{self.__class__.__name__}({self._bound_entity.name})'
|
||||
|
||||
def __init__(self, agent: Agent, capacity: int, *args, **kwargs):
|
||||
super(Inventory, self).__init__(agent, *args, is_blocking_light=False, can_be_shadowed=False, **kwargs)
|
||||
self.capacity = capacity
|
||||
|
||||
def as_array(self):
|
||||
if self._array is None:
|
||||
self._array = np.zeros((1, *self._shape))
|
||||
return super(Inventory, self).as_array()
|
||||
|
||||
def summarize_states(self, **kwargs):
|
||||
attr_dict = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and key != 'data'}
|
||||
attr_dict.update(dict(items=[val.summarize_state(**kwargs) for key, val in self.items()]))
|
||||
attr_dict.update(dict(name=self.name, belongs_to=self._bound_entity.name))
|
||||
return attr_dict
|
||||
|
||||
def pop(self):
|
||||
item_to_pop = self[0]
|
||||
self.delete_env_object(item_to_pop)
|
||||
return item_to_pop
|
||||
|
||||
|
||||
class Inventories(ObjectCollection):
|
||||
|
||||
_accepted_objects = Inventory
|
||||
is_blocking_light = False
|
||||
can_be_shadowed = False
|
||||
|
||||
def __init__(self, obs_shape, *args, **kwargs):
|
||||
super(Inventories, self).__init__(*args, is_per_agent=True, individual_slices=True, **kwargs)
|
||||
self._obs_shape = obs_shape
|
||||
|
||||
def as_array(self):
|
||||
return np.stack([inventory.as_array() for inv_idx, inventory in enumerate(self)])
|
||||
|
||||
def spawn_inventories(self, agents, capacity):
|
||||
inventories = [self._accepted_objects(agent, capacity, self._obs_shape)
|
||||
for _, agent in enumerate(agents)]
|
||||
self.add_additional_items(inventories)
|
||||
|
||||
def idx_by_entity(self, entity):
|
||||
try:
|
||||
return next((idx for idx, inv in enumerate(self) if inv.belongs_to_entity(entity)))
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def by_entity(self, entity):
|
||||
try:
|
||||
return next((inv for inv in self if inv.belongs_to_entity(entity)))
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def summarize_states(self, **kwargs):
|
||||
return [val.summarize_states(**kwargs) for key, val in self.items()]
|
||||
|
||||
|
||||
class DropOffLocations(EntityCollection):
|
||||
|
||||
_accepted_objects = DropOffLocation
|
||||
_stateless_entities = True
|
||||
|
@ -1,31 +0,0 @@
|
||||
from typing import NamedTuple
|
||||
|
||||
from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions
|
||||
|
||||
|
||||
class Constants(BaseConstants):
|
||||
NO_ITEM = 0
|
||||
ITEM_DROP_OFF = 1
|
||||
# Item Env
|
||||
ITEM = 'Item'
|
||||
INVENTORY = 'Inventory'
|
||||
DROP_OFF = 'Drop_Off'
|
||||
|
||||
|
||||
class Actions(BaseActions):
|
||||
ITEM_ACTION = 'ITEMACTION'
|
||||
|
||||
|
||||
class RewardsItem(NamedTuple):
|
||||
DROP_OFF_VALID: float = 0.1
|
||||
DROP_OFF_FAIL: float = -0.1
|
||||
PICK_UP_FAIL: float = -0.1
|
||||
PICK_UP_VALID: float = 0.1
|
||||
|
||||
|
||||
class ItemProperties(NamedTuple):
|
||||
n_items: int = 5 # How many items are there at the same time
|
||||
spawn_frequency: int = 10 # Spawn Frequency in Steps
|
||||
n_drop_off_locations: int = 5 # How many DropOff locations are there at the same time
|
||||
max_dropoff_storage_size: int = 0 # How many items are needed until the dropoff is full
|
||||
max_agent_inventory_capacity: int = 5 # How many items are needed until the agent inventory is full
|
@ -1,651 +0,0 @@
|
||||
import abc
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import List, Union, Iterable, Dict
|
||||
import numpy as np
|
||||
|
||||
import gym
|
||||
from gym import spaces
|
||||
from gym.wrappers import FrameStack
|
||||
|
||||
from environments.factory.base.shadow_casting import Map
|
||||
from environments import helpers as h
|
||||
from environments.helpers import Constants as c
|
||||
from environments.helpers import EnvActions as a
|
||||
from environments.helpers import RewardsBase
|
||||
from environments.factory.base.objects import Agent, Floor, Action
|
||||
from environments.factory.base.registers import Actions, Entities, Agents, Floors, Walls, PlaceHolders, \
|
||||
GlobalPositions
|
||||
from environments.utility_classes import MovementProperties, ObservationProperties, MarlFrameStack
|
||||
from environments.utility_classes import AgentRenderOptions as a_obs
|
||||
|
||||
import simplejson
|
||||
|
||||
REC_TAC = 'rec_'
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit
|
||||
class BaseFactory(gym.Env):
|
||||
|
||||
@property
|
||||
def action_space(self):
|
||||
return spaces.Discrete(len(self._actions))
|
||||
|
||||
@property
|
||||
def named_action_space(self):
|
||||
return {x.identifier: idx for idx, x in enumerate(self._actions.values())}
|
||||
|
||||
@property
|
||||
def observation_space(self):
|
||||
obs, _ = self._build_observations()
|
||||
if self.n_agents > 1:
|
||||
shape = obs[0].shape
|
||||
else:
|
||||
shape = obs.shape
|
||||
space = spaces.Box(low=0, high=1, shape=shape, dtype=np.float32)
|
||||
return space
|
||||
|
||||
@property
|
||||
def named_observation_space(self):
|
||||
# Build it
|
||||
_, named_obs = self._build_observations()
|
||||
if self.n_agents > 1:
|
||||
# Only return the first named obs space, as their structure at the moment is same.
|
||||
return named_obs[list(named_obs.keys())[0]]
|
||||
else:
|
||||
return named_obs
|
||||
|
||||
@property
|
||||
def pomdp_diameter(self):
|
||||
return self._pomdp_r * 2 + 1
|
||||
|
||||
@property
|
||||
def movement_actions(self):
|
||||
return self._actions.movement_actions
|
||||
|
||||
@property
|
||||
def params(self) -> dict:
|
||||
d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')}
|
||||
d['class_name'] = self.__class__.__name__
|
||||
return d
|
||||
|
||||
@property
|
||||
def summarize_header(self):
|
||||
summary_dict = self._summarize_state(stateless_entities=True)
|
||||
summary_dict.update(actions=self._actions.summarize())
|
||||
return summary_dict
|
||||
|
||||
def __enter__(self):
|
||||
return self if self.obs_prop.frames_to_stack == 0 else \
|
||||
MarlFrameStack(FrameStack(self, self.obs_prop.frames_to_stack))
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2),
|
||||
mv_prop: MovementProperties = MovementProperties(),
|
||||
obs_prop: ObservationProperties = ObservationProperties(),
|
||||
rewards_base: RewardsBase = RewardsBase(),
|
||||
done_at_collision=False, inject_agents: Union[None, List] = None,
|
||||
verbose=False, env_seed=time.time_ns(), individual_rewards=False,
|
||||
class_name='', **kwargs):
|
||||
|
||||
if class_name:
|
||||
print(f'You loaded parameters for {class_name}', f'this is: {self.__class__.__name__}')
|
||||
|
||||
if isinstance(mv_prop, dict):
|
||||
mv_prop = MovementProperties(**mv_prop)
|
||||
if isinstance(obs_prop, dict):
|
||||
obs_prop = ObservationProperties(**obs_prop)
|
||||
if isinstance(rewards_base, dict):
|
||||
rewards_base = RewardsBase(**rewards_base)
|
||||
|
||||
assert obs_prop.frames_to_stack != 1 and \
|
||||
obs_prop.frames_to_stack >= 0, \
|
||||
"'frames_to_stack' cannot be negative or 1."
|
||||
if kwargs:
|
||||
print(f'Following kwargs were passed, but ignored: {kwargs}')
|
||||
|
||||
# Attribute Assignment
|
||||
self.env_seed = env_seed
|
||||
self.seed(env_seed)
|
||||
self._base_rng = np.random.default_rng(self.env_seed)
|
||||
self.mv_prop = mv_prop
|
||||
self.obs_prop = obs_prop
|
||||
self.rewards_base = rewards_base
|
||||
self.level_name = level_name
|
||||
self._level_shape = None
|
||||
self._obs_shape = None
|
||||
self.verbose = verbose
|
||||
self._renderer = None # expensive - don't use it when not required !
|
||||
self._entities = Entities()
|
||||
|
||||
self.n_agents = n_agents
|
||||
level_filepath = Path(__file__).parent.parent / h.LEVELS_DIR / f'{self.level_name}.txt'
|
||||
self._parsed_level = h.parse_level(level_filepath)
|
||||
|
||||
self.max_steps = max_steps
|
||||
self._pomdp_r = self.obs_prop.pomdp_r
|
||||
|
||||
self.done_at_collision = done_at_collision
|
||||
self._record_episodes = False
|
||||
self._injected_agents = inject_agents or []
|
||||
self.individual_rewards = individual_rewards
|
||||
|
||||
# TODO: Reset ---> document this
|
||||
self.reset()
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self._entities[item]
|
||||
|
||||
def _base_init_env(self):
|
||||
|
||||
# All entities
|
||||
# Objects
|
||||
self._entities = Entities()
|
||||
# Level
|
||||
level_array = h.one_hot_level(self._parsed_level)
|
||||
self._level_init_shape = level_array.shape
|
||||
level_array = np.pad(level_array, self.obs_prop.pomdp_r, 'constant', constant_values=c.OCCUPIED_CELL)
|
||||
|
||||
self._level_shape = level_array.shape
|
||||
self._obs_shape = self._level_shape if not self.obs_prop.pomdp_r else (self.pomdp_diameter, ) * 2
|
||||
|
||||
# Walls
|
||||
walls = Walls.from_argwhere_coordinates(
|
||||
np.argwhere(level_array == c.OCCUPIED_CELL),
|
||||
self._level_shape
|
||||
)
|
||||
self._entities.add_additional_items({c.WALLS: walls})
|
||||
|
||||
# Floor
|
||||
floor = Floors.from_argwhere_coordinates(
|
||||
np.argwhere(level_array == c.FREE_CELL),
|
||||
self._level_shape
|
||||
)
|
||||
self._entities.add_additional_items({c.FLOOR: floor})
|
||||
|
||||
# NOPOS
|
||||
self._NO_POS_TILE = Floor(c.NO_POS, None)
|
||||
|
||||
# Actions
|
||||
# TODO: Move this to Agent init, so that agents can have individual action sets.
|
||||
self._actions = Actions(self.mv_prop)
|
||||
if additional_actions := self.actions_hook:
|
||||
self._actions.add_additional_items(additional_actions)
|
||||
|
||||
# Agents
|
||||
agents_to_spawn = self.n_agents-len(self._injected_agents)
|
||||
agents_kwargs = dict(individual_slices=self.obs_prop.render_agents == a_obs.SEPERATE,
|
||||
hide_from_obs_builder=self.obs_prop.render_agents in [a_obs.NOT, a_obs.LEVEL],
|
||||
)
|
||||
if agents_to_spawn:
|
||||
agents = Agents.from_tiles(floor.empty_tiles[:agents_to_spawn], self._level_shape, **agents_kwargs)
|
||||
else:
|
||||
agents = Agents(self._level_shape, **agents_kwargs)
|
||||
if self._injected_agents:
|
||||
initialized_injections = list()
|
||||
for i, injection in enumerate(self._injected_agents):
|
||||
agents.add_item(injection(self, floor.empty_tiles[0], agents, static_problem=False))
|
||||
initialized_injections.append(agents[-1])
|
||||
self._initialized_injections = initialized_injections
|
||||
self._entities.add_additional_items({c.AGENT: agents})
|
||||
|
||||
if self.obs_prop.additional_agent_placeholder is not None:
|
||||
# TODO: Make this accept Lists for multiple placeholders
|
||||
|
||||
# Empty Observations with either [0, 1, N(0, 1)]
|
||||
placeholder = PlaceHolders.from_values(self.obs_prop.additional_agent_placeholder, self._level_shape,
|
||||
entity_kwargs=dict(
|
||||
fill_value=self.obs_prop.additional_agent_placeholder)
|
||||
)
|
||||
|
||||
self._entities.add_additional_items({c.AGENT_PLACEHOLDER: placeholder})
|
||||
|
||||
# Additional Entitites from SubEnvs
|
||||
if additional_entities := self.entities_hook:
|
||||
self._entities.add_additional_items(additional_entities)
|
||||
|
||||
if self.obs_prop.show_global_position_info:
|
||||
global_positions = GlobalPositions(self._level_shape)
|
||||
# This moved into the GlobalPosition object
|
||||
# obs_shape_2d = self._level_shape if not self._pomdp_r else ((self.pomdp_diameter,) * 2)
|
||||
global_positions.spawn_global_position_objects(self[c.AGENT])
|
||||
self._entities.add_additional_items({c.GLOBAL_POSITION: global_positions})
|
||||
|
||||
# Return
|
||||
return self._entities
|
||||
|
||||
def reset(self) -> (np.typing.ArrayLike, int, bool, dict):
|
||||
_ = self._base_init_env()
|
||||
self.reset_hook()
|
||||
|
||||
self._steps = 0
|
||||
|
||||
obs, _ = self._build_observations()
|
||||
return obs
|
||||
|
||||
def step(self, actions):
|
||||
|
||||
if self.n_agents == 1 and not isinstance(actions, list):
|
||||
actions = [int(actions)]
|
||||
|
||||
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
|
||||
self._steps += 1
|
||||
|
||||
# Pre step Hook for later use
|
||||
self.pre_step_hook()
|
||||
|
||||
for action, agent in zip(actions, self[c.AGENT]):
|
||||
agent.clear_temp_state()
|
||||
action_obj = self._actions[int(action)]
|
||||
step_result = dict(collisions=[], rewards=[], info={}, action_name='', action_valid=False)
|
||||
# cls.print(f'Action #{action} has been resolved to: {action_obj}')
|
||||
if a.is_move(action_obj):
|
||||
action_valid, reward = self._do_move_action(agent, action_obj)
|
||||
elif a.NOOP == action_obj:
|
||||
action_valid = c.VALID
|
||||
reward = dict(value=self.rewards_base.NOOP, reason=a.NOOP, info={f'{agent.name}_NOOP': 1, 'NOOP': 1})
|
||||
else:
|
||||
# noinspection PyTupleAssignmentBalance
|
||||
action_valid, reward = self.do_additional_actions(agent, action_obj)
|
||||
# Not needed any more sice the tuple assignment above will fail in case of a failing action resolvement.
|
||||
# assert step_result is not None, 'This should not happen, every Action musst be detected correctly!'
|
||||
step_result['action_name'] = action_obj.identifier
|
||||
step_result['action_valid'] = action_valid
|
||||
step_result['rewards'].append(reward)
|
||||
agent.step_result = step_result
|
||||
|
||||
# Additional step and Reward, Info Init
|
||||
rewards, info = self.step_hook()
|
||||
# Todo: Make this faster, so that only tiles of entities that can collide are searched.
|
||||
tiles_with_collisions = self.get_all_tiles_with_collisions()
|
||||
for tile in tiles_with_collisions:
|
||||
guests = tile.guests_that_can_collide
|
||||
for i, guest in enumerate(guests):
|
||||
for j, collision in enumerate(guests):
|
||||
if j != i and hasattr(guest, 'step_result'):
|
||||
guest.step_result['collisions'].append(collision)
|
||||
|
||||
done = False
|
||||
if self.done_at_collision:
|
||||
if done_at_col := bool(tiles_with_collisions):
|
||||
done = done_at_col
|
||||
info.update(COLLISION_DONE=done_at_col)
|
||||
|
||||
additional_done, additional_done_info = self.check_additional_done()
|
||||
done = done or additional_done
|
||||
info.update(additional_done_info)
|
||||
|
||||
# Finalize
|
||||
reward, reward_info = self.build_reward_result(rewards)
|
||||
|
||||
info.update(reward_info)
|
||||
if self._steps >= self.max_steps:
|
||||
done = True
|
||||
info.update(step_reward=reward, step=self._steps)
|
||||
if self._record_episodes:
|
||||
info.update(self._summarize_state())
|
||||
|
||||
# Post step Hook for later use
|
||||
for post_step_info in self.post_step_hook():
|
||||
info.update(post_step_info)
|
||||
|
||||
obs, _ = self._build_observations()
|
||||
return obs, reward, done, info
|
||||
|
||||
def _build_observations(self) -> np.typing.ArrayLike:
|
||||
# Observation dict:
|
||||
per_agent_expl_idx = dict()
|
||||
per_agent_obsn = dict()
|
||||
# Generel Observations
|
||||
lvl_obs = self[c.WALLS].as_array()
|
||||
if self.obs_prop.render_agents == a_obs.NOT:
|
||||
global_agent_obs = None
|
||||
elif self.obs_prop.omit_agent_self and self.n_agents == 1:
|
||||
global_agent_obs = None
|
||||
else:
|
||||
global_agent_obs = self[c.AGENT].as_array().copy()
|
||||
placeholder_obs = self[c.AGENT_PLACEHOLDER].as_array() if self[c.AGENT_PLACEHOLDER] else None
|
||||
add_obs_dict = self.observations_hook()
|
||||
|
||||
for agent_idx, agent in enumerate(self[c.AGENT]):
|
||||
obs_dict = dict()
|
||||
# Build Agent Observations
|
||||
if self.obs_prop.render_agents != a_obs.NOT:
|
||||
if self.obs_prop.omit_agent_self and self.n_agents >= 2:
|
||||
if self.obs_prop.render_agents == a_obs.SEPERATE:
|
||||
other_agent_obs_idx = [x for x in range(self.n_agents) if x != agent_idx]
|
||||
agent_obs = np.take(global_agent_obs, other_agent_obs_idx, axis=0)
|
||||
else:
|
||||
agent_obs = global_agent_obs.copy()
|
||||
agent_obs[(0, *agent.pos)] -= agent.encoding
|
||||
else:
|
||||
agent_obs = global_agent_obs.copy()
|
||||
else:
|
||||
# agent_obs == None!!!!!
|
||||
agent_obs = global_agent_obs
|
||||
|
||||
# Build Level Observations
|
||||
if self.obs_prop.render_agents == a_obs.LEVEL:
|
||||
assert agent_obs is not None
|
||||
lvl_obs = lvl_obs.copy()
|
||||
lvl_obs += agent_obs
|
||||
|
||||
obs_dict[c.WALLS] = lvl_obs
|
||||
if self.obs_prop.render_agents in [a_obs.SEPERATE, a_obs.COMBINED] and agent_obs is not None:
|
||||
obs_dict[c.AGENT] = agent_obs[:]
|
||||
if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None:
|
||||
obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs
|
||||
obs_dict.update(add_obs_dict)
|
||||
obsn = np.vstack(list(obs_dict.values()))
|
||||
if self.obs_prop.pomdp_r:
|
||||
obsn = self._do_pomdp_cutout(agent, obsn)
|
||||
|
||||
raw_obs = self.per_agent_raw_observations_hook(agent)
|
||||
raw_obs = {key: np.expand_dims(val, 0) if val.ndim != 3 else val for key, val in raw_obs.items()}
|
||||
obsn = np.vstack((obsn, *raw_obs.values()))
|
||||
|
||||
keys = list(chain(obs_dict.keys(), raw_obs.keys()))
|
||||
idxs = np.cumsum([x.shape[0] for x in chain(obs_dict.values(), raw_obs.values())]) - 1
|
||||
per_agent_expl_idx[agent.name] = {key: list(range(d, b)) for key, d, b in
|
||||
zip(keys, idxs, list(idxs[1:]) + [idxs[-1]+1, ])}
|
||||
|
||||
# Shadow Casting
|
||||
if agent.step_result is not None:
|
||||
pass
|
||||
else:
|
||||
assert self._steps == 0
|
||||
agent.step_result = {'action_name': a.NOOP, 'action_valid': True,
|
||||
'collisions': [], 'lightmap': None}
|
||||
if self.obs_prop.cast_shadows:
|
||||
try:
|
||||
light_block_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
|
||||
if self[key].is_blocking_light]
|
||||
# Flatten
|
||||
light_block_obs = [x for y in light_block_obs for x in y]
|
||||
shadowed_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
|
||||
if self[key].can_be_shadowed]
|
||||
# Flatten
|
||||
shadowed_obs = [x for y in shadowed_obs for x in y]
|
||||
except AttributeError as e:
|
||||
print('Check your Keys! Only use Constants as Keys!')
|
||||
print(e)
|
||||
raise e
|
||||
|
||||
obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL
|
||||
light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int).squeeze())
|
||||
if self._pomdp_r:
|
||||
light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape))
|
||||
else:
|
||||
light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
|
||||
|
||||
agent.step_result['lightmap'] = light_block_map
|
||||
|
||||
obsn[shadowed_obs] = ((obsn[shadowed_obs] * light_block_map) + 0.) - (1 - light_block_map)
|
||||
else:
|
||||
if self._pomdp_r:
|
||||
agent.step_result['lightmap'] = np.ones(self._obs_shape)
|
||||
else:
|
||||
agent.step_result['lightmap'] = None
|
||||
|
||||
per_agent_obsn[agent.name] = obsn
|
||||
|
||||
if self.n_agents == 1:
|
||||
agent_name = self[c.AGENT][0].name
|
||||
obs, explained_idx = per_agent_obsn[agent_name], per_agent_expl_idx[agent_name]
|
||||
elif self.n_agents >= 2:
|
||||
obs, explained_idx = np.stack(list(per_agent_obsn.values())), per_agent_expl_idx
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
return obs, explained_idx
|
||||
|
||||
def _do_pomdp_cutout(self, agent, obs_to_be_padded):
|
||||
assert obs_to_be_padded.ndim == 3
|
||||
ra, d = self._pomdp_r, self.pomdp_diameter
|
||||
x0, x1 = max(0, agent.x - ra), min(agent.x + ra + 1, self._level_shape[0])
|
||||
y0, y1 = max(0, agent.y - ra), min(agent.y + ra + 1, self._level_shape[1])
|
||||
oobs = obs_to_be_padded[:, x0:x1, y0:y1]
|
||||
if oobs.shape[1:] != (d, d):
|
||||
if xd := oobs.shape[1] % d:
|
||||
if agent.x > ra:
|
||||
x0_pad = 0
|
||||
x1_pad = (d - xd)
|
||||
else:
|
||||
x0_pad = ra - agent.x
|
||||
x1_pad = 0
|
||||
else:
|
||||
x0_pad, x1_pad = 0, 0
|
||||
|
||||
if yd := oobs.shape[2] % d:
|
||||
if agent.y > ra:
|
||||
y0_pad = 0
|
||||
y1_pad = (d - yd)
|
||||
else:
|
||||
y0_pad = ra - agent.y
|
||||
y1_pad = 0
|
||||
else:
|
||||
y0_pad, y1_pad = 0, 0
|
||||
|
||||
oobs = np.pad(oobs, ((0, 0), (x0_pad, x1_pad), (y0_pad, y1_pad)), 'constant')
|
||||
return oobs
|
||||
|
||||
def get_all_tiles_with_collisions(self) -> List[Floor]:
|
||||
tiles = [x for x in self[c.FLOOR] if len(x.guests_that_can_collide) > 1]
|
||||
if False:
|
||||
tiles_with_collisions = list()
|
||||
for tile in self[c.FLOOR]:
|
||||
if tile.is_occupied():
|
||||
guests = tile.guests_that_can_collide
|
||||
if len(guests) >= 2:
|
||||
tiles_with_collisions.append(tile)
|
||||
return tiles
|
||||
|
||||
def _do_move_action(self, agent: Agent, action: Action) -> (dict, dict):
|
||||
info_dict = dict()
|
||||
new_tile, valid = self._check_agent_move(agent, action)
|
||||
if valid:
|
||||
# Does not collide width level boundaries
|
||||
valid = agent.move(new_tile)
|
||||
if valid:
|
||||
# This will spam your logs, beware!
|
||||
self.print(f'{agent.name} just moved {action.identifier} from {agent.last_pos} to {agent.pos}.')
|
||||
info_dict.update({f'{agent.name}_move': 1, 'move': 1})
|
||||
pass
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
self.print(f'{agent.name} just hit the wall at {agent.pos}. ({action.identifier})')
|
||||
info_dict.update({f'{agent.name}_wall_collide': 1, 'wall_collide': 1})
|
||||
else:
|
||||
# Agent seems to be trying to Leave the level
|
||||
self.print(f'{agent.name} tried to leave the level {agent.pos}. ({action.identifier})')
|
||||
info_dict.update({f'{agent.name}_wall_collide': 1, 'wall_collide': 1})
|
||||
reward_value = self.rewards_base.MOVEMENTS_VALID if valid else self.rewards_base.MOVEMENTS_FAIL
|
||||
reward = {'value': reward_value, 'reason': action.identifier, 'info': info_dict}
|
||||
return valid, reward
|
||||
|
||||
def _check_agent_move(self, agent, action: Action) -> (Floor, bool):
|
||||
# Actions
|
||||
x_diff, y_diff = a.resolve_movement_action_to_coords(action.identifier)
|
||||
x_new = agent.x + x_diff
|
||||
y_new = agent.y + y_diff
|
||||
|
||||
new_tile = self[c.FLOOR].by_pos((x_new, y_new))
|
||||
if new_tile and not np.any([x.is_blocking for x in new_tile.guests]):
|
||||
valid = c.VALID
|
||||
else:
|
||||
tile = agent.tile
|
||||
valid = c.VALID
|
||||
return tile, valid
|
||||
|
||||
return new_tile, valid
|
||||
|
||||
def build_reward_result(self, global_env_rewards: list) -> (int, dict):
|
||||
# Returns: Reward, Info
|
||||
info = defaultdict(lambda: 0.0)
|
||||
|
||||
# Gather additional sub-env rewards and calculate collisions
|
||||
for agent in self[c.AGENT]:
|
||||
|
||||
rewards = self.per_agent_reward_hook(agent)
|
||||
for reward in rewards:
|
||||
agent.step_result['rewards'].append(reward)
|
||||
if collisions := agent.step_result['collisions']:
|
||||
self.print(f't = {self._steps}\t{agent.name} has collisions with {collisions}')
|
||||
info[c.COLLISION] += 1
|
||||
reward = {'value': self.rewards_base.COLLISION,
|
||||
'reason': c.COLLISION,
|
||||
'info': {f'{agent.name}_{c.COLLISION}': 1}}
|
||||
agent.step_result['rewards'].append(reward)
|
||||
else:
|
||||
# No Collisions, nothing to do
|
||||
pass
|
||||
|
||||
comb_rewards = {agent.name: sum(x['value'] for x in agent.step_result['rewards']) for agent in self[c.AGENT]}
|
||||
|
||||
# Combine the per_agent_info_dict:
|
||||
combined_info_dict = defaultdict(lambda: 0)
|
||||
for agent in self[c.AGENT]:
|
||||
for reward in agent.step_result['rewards']:
|
||||
combined_info_dict.update(reward['info'])
|
||||
|
||||
# Combine Info dicts into a global one
|
||||
combined_info_dict = dict(combined_info_dict)
|
||||
|
||||
combined_info_dict.update(info)
|
||||
|
||||
global_reward_sum = sum(global_env_rewards)
|
||||
if self.individual_rewards:
|
||||
self.print(f"rewards are {comb_rewards}")
|
||||
reward = list(comb_rewards.values())
|
||||
reward = [x + global_reward_sum for x in reward]
|
||||
return reward, combined_info_dict
|
||||
else:
|
||||
reward = sum(comb_rewards.values()) + global_reward_sum
|
||||
self.print(f"reward is {reward}")
|
||||
return reward, combined_info_dict
|
||||
|
||||
def start_recording(self):
|
||||
self._record_episodes = True
|
||||
return self._record_episodes
|
||||
|
||||
def stop_recording(self):
|
||||
self._record_episodes = False
|
||||
return not self._record_episodes
|
||||
|
||||
# noinspection PyGlobalUndefined
|
||||
def render(self, mode='human'):
|
||||
if not self._renderer: # lazy init
|
||||
from environments.factory.base.renderer import Renderer, RenderEntity
|
||||
global Renderer, RenderEntity
|
||||
self._renderer = Renderer(self._level_shape, view_radius=self._pomdp_r, fps=7)
|
||||
|
||||
# noinspection PyUnboundLocalVariable
|
||||
walls = [RenderEntity('wall', wall.pos) for wall in self[c.WALLS]]
|
||||
|
||||
agents = []
|
||||
for i, agent in enumerate(self[c.AGENT]):
|
||||
name, state = h.asset_str(agent)
|
||||
agents.append(RenderEntity(name, agent.pos, 1, 'none', state, i + 1, agent.step_result['lightmap']))
|
||||
|
||||
additional_assets = self.render_assets_hook()
|
||||
|
||||
return self._renderer.render(walls + additional_assets + agents)
|
||||
|
||||
def save_params(self, filepath: Path):
|
||||
# noinspection PyProtectedMember
|
||||
d = self.params
|
||||
filepath.parent.mkdir(parents=True, exist_ok=True)
|
||||
with filepath.open('w') as f:
|
||||
simplejson.dump(d, f, indent=4, namedtuple_as_object=True)
|
||||
|
||||
def get_injected_agents(self) -> list:
|
||||
if hasattr(self, '_initialized_injections'):
|
||||
return self._initialized_injections
|
||||
else:
|
||||
return []
|
||||
|
||||
def _summarize_state(self, stateless_entities=False):
|
||||
summary = {f'{REC_TAC}step': self._steps}
|
||||
|
||||
for entity_group in self._entities:
|
||||
if entity_group.is_stateless == stateless_entities:
|
||||
summary.update({f'{REC_TAC}{entity_group.name}': entity_group.summarize_states()})
|
||||
return summary
|
||||
|
||||
def print(self, string):
|
||||
if self.verbose:
|
||||
print(string)
|
||||
|
||||
# Properties which are called by the base class to extend beyond attributes of the base class
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def actions_hook(self) -> Union[Action, List[Action]]:
|
||||
"""
|
||||
When heriting from this Base Class, you musst implement this methode!!!
|
||||
|
||||
:return: A list of Actions-object holding all additional actions.
|
||||
:rtype: List[Action]
|
||||
"""
|
||||
return []
|
||||
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def entities_hook(self) -> Dict[(str, Entities)]:
|
||||
"""
|
||||
When heriting from this Base Class, you musst implement this methode!!!
|
||||
|
||||
:return: A single Entites collection or a list of such.
|
||||
:rtype: Union[Entities, List[Entities]]
|
||||
"""
|
||||
return {}
|
||||
|
||||
# Functions which provide additions to functions of the base class
|
||||
# Always call super!!!!!!
|
||||
@abc.abstractmethod
|
||||
def reset_hook(self) -> None:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def pre_step_hook(self) -> None:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def do_additional_actions(self, agent: Agent, action: Action) -> (bool, dict):
|
||||
return None
|
||||
|
||||
@abc.abstractmethod
|
||||
def step_hook(self) -> (List[dict], dict):
|
||||
return [], {}
|
||||
|
||||
@abc.abstractmethod
|
||||
def check_additional_done(self) -> (bool, dict):
|
||||
return False, {}
|
||||
|
||||
@abc.abstractmethod
|
||||
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
|
||||
return {}
|
||||
|
||||
@abc.abstractmethod
|
||||
def per_agent_reward_hook(self, agent: Agent) -> List[dict]:
|
||||
return []
|
||||
|
||||
@abc.abstractmethod
|
||||
def post_step_hook(self) -> List[dict]:
|
||||
return []
|
||||
|
||||
@abc.abstractmethod
|
||||
def per_agent_raw_observations_hook(self, agent) -> Dict[str, np.typing.ArrayLike]:
|
||||
additional_raw_observations = {}
|
||||
if self.obs_prop.show_global_position_info:
|
||||
global_pos_obs = np.zeros(self._obs_shape)
|
||||
global_pos_obs[:2, 0] = self[c.GLOBAL_POSITION].by_entity(agent).encoding
|
||||
additional_raw_observations.update({c.GLOBAL_POSITION: global_pos_obs})
|
||||
return additional_raw_observations
|
||||
|
||||
@abc.abstractmethod
|
||||
def render_assets_hook(self):
|
||||
return []
|
@ -1,338 +0,0 @@
|
||||
from collections import defaultdict
|
||||
from typing import Union, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environments import helpers as h
|
||||
from environments.helpers import Constants as c
|
||||
|
||||
##########################################################################
|
||||
# ##################### Base Object Building Blocks ######################### #
|
||||
##########################################################################
|
||||
|
||||
|
||||
# TODO: Missing Documentation
|
||||
class Object:
|
||||
|
||||
"""Generell Objects for Organisation and Maintanance such as Actions etc..."""
|
||||
|
||||
_u_idx = defaultdict(lambda: 0)
|
||||
|
||||
def __bool__(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def identifier(self):
|
||||
if self._str_ident is not None:
|
||||
return self._str_ident
|
||||
else:
|
||||
return self._name
|
||||
|
||||
def __init__(self, str_ident: Union[str, None] = None, **kwargs):
|
||||
|
||||
self._str_ident = str_ident
|
||||
|
||||
if self._str_ident is not None:
|
||||
self._name = f'{self.__class__.__name__}[{self._str_ident}]'
|
||||
elif self._str_ident is None:
|
||||
self._name = f'{self.__class__.__name__}#{Object._u_idx[self.__class__.__name__]}'
|
||||
Object._u_idx[self.__class__.__name__] += 1
|
||||
else:
|
||||
raise ValueError('Please use either of the idents.')
|
||||
|
||||
if kwargs:
|
||||
print(f'Following kwargs were passed, but ignored: {kwargs}')
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.name}'
|
||||
|
||||
def __eq__(self, other) -> bool:
|
||||
return other == self.identifier
|
||||
# Base
|
||||
|
||||
|
||||
# TODO: Missing Documentation
|
||||
class EnvObject(Object):
|
||||
|
||||
"""Objects that hold Information that are observable, but have no position on the env grid. Inventories etc..."""
|
||||
|
||||
_u_idx = defaultdict(lambda: 0)
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return c.OCCUPIED_CELL
|
||||
|
||||
def __init__(self, collection, **kwargs):
|
||||
super(EnvObject, self).__init__(**kwargs)
|
||||
self._collection = collection
|
||||
|
||||
def change_parent_collection(self, other_collection):
|
||||
other_collection.add_item(self)
|
||||
self._collection.delete_env_object(self)
|
||||
self._collection = other_collection
|
||||
return self._collection == other_collection
|
||||
# With Rendering
|
||||
|
||||
|
||||
# TODO: Missing Documentation
|
||||
class Entity(EnvObject):
|
||||
"""Full Env Entity that lives on the env Grid. Doors, Items, DirtPile etc..."""
|
||||
|
||||
@property
|
||||
def is_blocking(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def x(self):
|
||||
return self.pos[0]
|
||||
|
||||
@property
|
||||
def y(self):
|
||||
return self.pos[1]
|
||||
|
||||
@property
|
||||
def pos(self):
|
||||
return self._tile.pos
|
||||
|
||||
@property
|
||||
def tile(self):
|
||||
return self._tile
|
||||
|
||||
def __init__(self, tile, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._tile = tile
|
||||
tile.enter(self)
|
||||
|
||||
def summarize_state(self) -> dict:
|
||||
return dict(name=str(self.name), x=int(self.x), y=int(self.y),
|
||||
tile=str(self.tile.name), can_collide=bool(self.can_collide))
|
||||
|
||||
def __repr__(self):
|
||||
return super(Entity, self).__repr__() + f'(@{self.pos})'
|
||||
|
||||
|
||||
# TODO: Missing Documentation
|
||||
class MoveableEntity(Entity):
|
||||
|
||||
@property
|
||||
def last_tile(self):
|
||||
return self._last_tile
|
||||
|
||||
@property
|
||||
def last_pos(self):
|
||||
if self._last_tile:
|
||||
return self._last_tile.pos
|
||||
else:
|
||||
return c.NO_POS
|
||||
|
||||
@property
|
||||
def direction_of_view(self):
|
||||
last_x, last_y = self.last_pos
|
||||
curr_x, curr_y = self.pos
|
||||
return last_x-curr_x, last_y-curr_y
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._last_tile = None
|
||||
|
||||
def move(self, next_tile):
|
||||
curr_tile = self.tile
|
||||
if curr_tile != next_tile:
|
||||
next_tile.enter(self)
|
||||
curr_tile.leave(self)
|
||||
self._tile = next_tile
|
||||
self._last_tile = curr_tile
|
||||
self._collection.notify_change_to_value(self)
|
||||
return c.VALID
|
||||
else:
|
||||
return c.NOT_VALID
|
||||
# Can Move
|
||||
|
||||
|
||||
# TODO: Missing Documentation
|
||||
class BoundingMixin(Object):
|
||||
|
||||
@property
|
||||
def bound_entity(self):
|
||||
return self._bound_entity
|
||||
|
||||
def __init__(self,entity_to_be_bound, *args, **kwargs):
|
||||
super(BoundingMixin, self).__init__(*args, **kwargs)
|
||||
assert entity_to_be_bound is not None
|
||||
self._bound_entity = entity_to_be_bound
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return f'{super(BoundingMixin, self).name}({self._bound_entity.name})'
|
||||
|
||||
def belongs_to_entity(self, entity):
|
||||
return entity == self.bound_entity
|
||||
|
||||
|
||||
##########################################################################
|
||||
# ####################### Objects and Entitys ########################## #
|
||||
##########################################################################
|
||||
|
||||
|
||||
class Action(Object):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class PlaceHolder(Object):
|
||||
|
||||
def __init__(self, *args, fill_value=0, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._fill_value = fill_value
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return self._fill_value
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return "PlaceHolder"
|
||||
|
||||
|
||||
class GlobalPosition(BoundingMixin, EnvObject):
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
if self._normalized:
|
||||
return tuple(np.divide(self._bound_entity.pos, self._level_shape))
|
||||
else:
|
||||
return self.bound_entity.pos
|
||||
|
||||
def __init__(self, level_shape: (int, int), *args, normalized: bool = True, **kwargs):
|
||||
super(GlobalPosition, self).__init__(*args, **kwargs)
|
||||
self._level_shape = level_shape
|
||||
self._normalized = normalized
|
||||
|
||||
|
||||
class Floor(EnvObject):
|
||||
|
||||
@property
|
||||
def neighboring_floor_pos(self):
|
||||
return [x.pos for x in self.neighboring_floor]
|
||||
|
||||
@property
|
||||
def neighboring_floor(self):
|
||||
if self._neighboring_floor:
|
||||
pass
|
||||
else:
|
||||
self._neighboring_floor = [x for x in [self._collection.by_pos(np.add(self.pos, pos))
|
||||
for pos in h.POS_MASK.reshape(-1, 2)
|
||||
if not np.all(pos == [0, 0])]
|
||||
if x]
|
||||
return self._neighboring_floor
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return c.FREE_CELL
|
||||
|
||||
@property
|
||||
def guests_that_can_collide(self):
|
||||
return [x for x in self.guests if x.can_collide]
|
||||
|
||||
@property
|
||||
def guests(self):
|
||||
return self._guests.values()
|
||||
|
||||
@property
|
||||
def x(self):
|
||||
return self.pos[0]
|
||||
|
||||
@property
|
||||
def y(self):
|
||||
return self.pos[1]
|
||||
|
||||
@property
|
||||
def pos(self):
|
||||
return self._pos
|
||||
|
||||
def __init__(self, pos, *args, **kwargs):
|
||||
super(Floor, self).__init__(*args, **kwargs)
|
||||
self._guests = dict()
|
||||
self._pos = tuple(pos)
|
||||
self._neighboring_floor: List[Floor] = list()
|
||||
|
||||
def __len__(self):
|
||||
return len(self._guests)
|
||||
|
||||
def is_empty(self):
|
||||
return not len(self._guests)
|
||||
|
||||
def is_occupied(self):
|
||||
return bool(len(self._guests))
|
||||
|
||||
def enter(self, guest):
|
||||
if guest.name not in self._guests:
|
||||
self._guests.update({guest.name: guest})
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def leave(self, guest):
|
||||
try:
|
||||
del self._guests[guest.name]
|
||||
except (ValueError, KeyError):
|
||||
return False
|
||||
return True
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.name}(@{self.pos})'
|
||||
|
||||
def summarize_state(self, **_):
|
||||
return dict(name=self.name, x=int(self.x), y=int(self.y))
|
||||
|
||||
|
||||
class Wall(Floor):
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return c.OCCUPIED_CELL
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class Agent(MoveableEntity):
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
return True
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Agent, self).__init__(*args, **kwargs)
|
||||
self.clear_temp_state()
|
||||
|
||||
# noinspection PyAttributeOutsideInit
|
||||
def clear_temp_state(self):
|
||||
# for attr in cls.__dict__:
|
||||
# if attr.startswith('temp'):
|
||||
self.step_result = None
|
||||
|
||||
def summarize_state(self):
|
||||
state_dict = super().summarize_state()
|
||||
state_dict.update(valid=bool(self.step_result['action_valid']), action=str(self.step_result['action_name']))
|
||||
return state_dict
|
@ -1,517 +0,0 @@
|
||||
import numbers
|
||||
import random
|
||||
from abc import ABC
|
||||
from typing import List, Union, Dict, Tuple
|
||||
|
||||
import numpy as np
|
||||
import six
|
||||
|
||||
from environments.factory.base.objects import Entity, Floor, Agent, Action, Wall, PlaceHolder, GlobalPosition, \
|
||||
Object, EnvObject
|
||||
from environments.utility_classes import MovementProperties
|
||||
from environments import helpers as h
|
||||
from environments.helpers import Constants as c
|
||||
|
||||
##########################################################################
|
||||
# ################## Base Collections Definition ####################### #
|
||||
##########################################################################
|
||||
|
||||
|
||||
class ObjectCollection:
|
||||
_accepted_objects = Object
|
||||
_stateless_entities = False
|
||||
|
||||
@property
|
||||
def is_stateless(self):
|
||||
return self._stateless_entities
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return f'{self.__class__.__name__}'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._collection = dict()
|
||||
|
||||
def __len__(self):
|
||||
return len(self._collection)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.values())
|
||||
|
||||
def add_item(self, other: _accepted_objects):
|
||||
assert isinstance(other, self._accepted_objects), f'All item names have to be of type ' \
|
||||
f'{self._accepted_objects}, ' \
|
||||
f'but were {other.__class__}.,'
|
||||
self._collection.update({other.name: other})
|
||||
return self
|
||||
|
||||
def add_additional_items(self, others: List[_accepted_objects]):
|
||||
for other in others:
|
||||
self.add_item(other)
|
||||
return self
|
||||
|
||||
def keys(self):
|
||||
return self._collection.keys()
|
||||
|
||||
def values(self):
|
||||
return self._collection.values()
|
||||
|
||||
def items(self):
|
||||
return self._collection.items()
|
||||
|
||||
def _get_index(self, item):
|
||||
try:
|
||||
return next(i for i, v in enumerate(self._collection.values()) if v == item)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, (int, np.int64, np.int32)):
|
||||
if item < 0:
|
||||
item = len(self._collection) - abs(item)
|
||||
try:
|
||||
return next(v for i, v in enumerate(self._collection.values()) if i == item)
|
||||
except StopIteration:
|
||||
return None
|
||||
try:
|
||||
return self._collection[item]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.__class__.__name__}[{self._collection}]'
|
||||
|
||||
|
||||
class EnvObjectCollection(ObjectCollection):
|
||||
|
||||
_accepted_objects = EnvObject
|
||||
|
||||
@property
|
||||
def encodings(self):
|
||||
return [x.encoding for x in self]
|
||||
|
||||
def __init__(self, obs_shape: (int, int), *args,
|
||||
individual_slices: bool = False,
|
||||
is_blocking_light: bool = False,
|
||||
can_collide: bool = False,
|
||||
can_be_shadowed: bool = True, **kwargs):
|
||||
super(EnvObjectCollection, self).__init__(*args, **kwargs)
|
||||
self._shape = obs_shape
|
||||
self._array = None
|
||||
self._individual_slices = individual_slices
|
||||
self._lazy_eval_transforms = []
|
||||
self.is_blocking_light = is_blocking_light
|
||||
self.can_be_shadowed = can_be_shadowed
|
||||
self.can_collide = can_collide
|
||||
|
||||
def add_item(self, other: EnvObject):
|
||||
super(EnvObjectCollection, self).add_item(other)
|
||||
if self._array is None:
|
||||
self._array = np.zeros((1, *self._shape))
|
||||
else:
|
||||
if self._individual_slices:
|
||||
self._array = np.vstack((self._array, np.zeros((1, *self._shape))))
|
||||
self.notify_change_to_value(other)
|
||||
|
||||
def as_array(self):
|
||||
if self._lazy_eval_transforms:
|
||||
idxs, values = zip(*self._lazy_eval_transforms)
|
||||
# nuumpy put repects the ordering so that
|
||||
np.put(self._array, idxs, values)
|
||||
self._lazy_eval_transforms = []
|
||||
return self._array
|
||||
|
||||
def summarize_states(self):
|
||||
return [entity.summarize_state() for entity in self.values()]
|
||||
|
||||
def notify_change_to_free(self, env_object: EnvObject):
|
||||
self._array_change_notifyer(env_object, value=c.FREE_CELL)
|
||||
|
||||
def notify_change_to_value(self, env_object: EnvObject):
|
||||
self._array_change_notifyer(env_object)
|
||||
|
||||
def _array_change_notifyer(self, env_object: EnvObject, value=None):
|
||||
pos = self._get_index(env_object)
|
||||
value = value if value is not None else env_object.encoding
|
||||
self._lazy_eval_transforms.append((pos, value))
|
||||
if self._individual_slices:
|
||||
idx = (self._get_index(env_object) * np.prod(self._shape[1:]), value)
|
||||
self._lazy_eval_transforms.append((idx, value))
|
||||
else:
|
||||
self._lazy_eval_transforms.append((pos, value))
|
||||
|
||||
def _refresh_arrays(self):
|
||||
poss, values = zip(*[(idx, x.encoding) for idx,x in enumerate(self.values())])
|
||||
for pos, value in zip(poss, values):
|
||||
self._lazy_eval_transforms.append((pos, value))
|
||||
|
||||
def __delitem__(self, name):
|
||||
idx, obj = next((i, obj) for i, obj in enumerate(self) if obj.name == name)
|
||||
if self._individual_slices:
|
||||
self._array = np.delete(self._array, idx, axis=0)
|
||||
else:
|
||||
self.notify_change_to_free(self._collection[name])
|
||||
# Dirty Hack to check if not beeing subclassed. In that case we need to refresh the array since positions
|
||||
# in the observation array are result of enumeration. They can overide each other.
|
||||
# Todo: Find a better solution
|
||||
if not issubclass(self.__class__, EntityCollection) and issubclass(self.__class__, EnvObjectCollection):
|
||||
self._refresh_arrays()
|
||||
del self._collection[name]
|
||||
|
||||
def delete_env_object(self, env_object: EnvObject):
|
||||
del self[env_object.name]
|
||||
|
||||
def delete_env_object_by_name(self, name):
|
||||
del self[name]
|
||||
|
||||
|
||||
class EntityCollection(EnvObjectCollection, ABC):
|
||||
|
||||
_accepted_objects = Entity
|
||||
|
||||
@classmethod
|
||||
def from_tiles(cls, tiles, *args, entity_kwargs=None, **kwargs):
|
||||
# objects_name = cls._accepted_objects.__name__
|
||||
collection = cls(*args, **kwargs)
|
||||
entities = [cls._accepted_objects(tile, collection, str_ident=i,
|
||||
**entity_kwargs if entity_kwargs is not None else {})
|
||||
for i, tile in enumerate(tiles)]
|
||||
collection.add_additional_items(entities)
|
||||
return collection
|
||||
|
||||
@classmethod
|
||||
def from_argwhere_coordinates(cls, positions: [(int, int)], tiles, *args, entity_kwargs=None, **kwargs, ):
|
||||
return cls.from_tiles([tiles.by_pos(position) for position in positions], *args, entity_kwargs=entity_kwargs,
|
||||
**kwargs)
|
||||
|
||||
@property
|
||||
def positions(self):
|
||||
return [x.pos for x in self]
|
||||
|
||||
@property
|
||||
def tiles(self):
|
||||
return [entity.tile for entity in self]
|
||||
|
||||
def __init__(self, level_shape, *args, **kwargs):
|
||||
super(EntityCollection, self).__init__(level_shape, *args, **kwargs)
|
||||
self._lazy_eval_transforms = []
|
||||
|
||||
def __delitem__(self, name):
|
||||
idx, obj = next((i, obj) for i, obj in enumerate(self) if obj.name == name)
|
||||
obj.tile.leave(obj)
|
||||
super(EntityCollection, self).__delitem__(name)
|
||||
|
||||
def as_array(self):
|
||||
if self._lazy_eval_transforms:
|
||||
idxs, values = zip(*self._lazy_eval_transforms)
|
||||
# numpy put repects the ordering so that
|
||||
# Todo: Export the index building in a seperate function
|
||||
np.put(self._array, [np.ravel_multi_index(idx, self._array.shape) for idx in idxs], values)
|
||||
self._lazy_eval_transforms = []
|
||||
return self._array
|
||||
|
||||
def _array_change_notifyer(self, entity, pos=None, value=None):
|
||||
# Todo: Export the contruction in a seperate function
|
||||
pos = pos if pos is not None else entity.pos
|
||||
value = value if value is not None else entity.encoding
|
||||
x, y = pos
|
||||
if self._individual_slices:
|
||||
idx = (self._get_index(entity), x, y)
|
||||
else:
|
||||
idx = (0, x, y)
|
||||
self._lazy_eval_transforms.append((idx, value))
|
||||
|
||||
def by_pos(self, pos: Tuple[int, int]):
|
||||
try:
|
||||
return next(item for item in self if item.pos == tuple(pos))
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
|
||||
class BoundEnvObjCollection(EnvObjectCollection, ABC):
|
||||
|
||||
def __init__(self, entity_to_be_bound, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._bound_entity = entity_to_be_bound
|
||||
|
||||
def belongs_to_entity(self, entity):
|
||||
return self._bound_entity == entity
|
||||
|
||||
def by_entity(self, entity):
|
||||
try:
|
||||
return next((x for x in self if x.belongs_to_entity(entity)))
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def idx_by_entity(self, entity):
|
||||
try:
|
||||
return next((idx for idx, x in enumerate(self) if x.belongs_to_entity(entity)))
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def as_array_by_entity(self, entity):
|
||||
return self._array[self.idx_by_entity(entity)]
|
||||
|
||||
|
||||
class MovingEntityObjectCollection(EntityCollection, ABC):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(MovingEntityObjectCollection, self).__init__(*args, **kwargs)
|
||||
|
||||
def notify_change_to_value(self, entity):
|
||||
super(MovingEntityObjectCollection, self).notify_change_to_value(entity)
|
||||
if entity.last_pos != c.NO_POS:
|
||||
try:
|
||||
self._array_change_notifyer(entity, entity.last_pos, value=c.FREE_CELL)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
|
||||
##########################################################################
|
||||
# ################# Objects and Entity Collection ###################### #
|
||||
##########################################################################
|
||||
|
||||
|
||||
class GlobalPositions(EnvObjectCollection):
|
||||
|
||||
_accepted_objects = GlobalPosition
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(GlobalPositions, self).__init__(*args, is_per_agent=True, individual_slices=True, is_blocking_light = False,
|
||||
can_be_shadowed = False, can_collide = False, **kwargs)
|
||||
|
||||
def as_array(self):
|
||||
# FIXME DEBUG!!! make this lazy?
|
||||
return np.stack([gp.as_array() for inv_idx, gp in enumerate(self)])
|
||||
|
||||
def as_array_by_entity(self, entity):
|
||||
# FIXME DEBUG!!! make this lazy?
|
||||
return np.stack([gp.as_array() for inv_idx, gp in enumerate(self)])
|
||||
|
||||
def spawn_global_position_objects(self, agents):
|
||||
# Todo, change to 'from xy'-form
|
||||
global_positions = [self._accepted_objects(self._shape, agent, self)
|
||||
for _, agent in enumerate(agents)]
|
||||
# noinspection PyTypeChecker
|
||||
self.add_additional_items(global_positions)
|
||||
|
||||
def idx_by_entity(self, entity):
|
||||
try:
|
||||
return next((idx for idx, inv in enumerate(self) if inv.belongs_to_entity(entity)))
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def by_entity(self, entity):
|
||||
try:
|
||||
return next((inv for inv in self if inv.belongs_to_entity(entity)))
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
|
||||
class PlaceHolders(EnvObjectCollection):
|
||||
_accepted_objects = PlaceHolder
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
assert 'individual_slices' not in kwargs, 'Keyword - "individual_slices": "True" and must not be altered'
|
||||
kwargs.update(individual_slices=False)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def from_values(cls, values: Union[str, numbers.Number, List[Union[str, numbers.Number]]],
|
||||
*args, object_kwargs=None, **kwargs):
|
||||
# objects_name = cls._accepted_objects.__name__
|
||||
if isinstance(values, (str, numbers.Number)):
|
||||
values = [values]
|
||||
collection = cls(*args, **kwargs)
|
||||
objects = [cls._accepted_objects(collection, str_ident=i, fill_value=value,
|
||||
**object_kwargs if object_kwargs is not None else {})
|
||||
for i, value in enumerate(values)]
|
||||
collection.add_additional_items(objects)
|
||||
return collection
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
def as_array(self):
|
||||
for idx, placeholder in enumerate(self):
|
||||
if isinstance(placeholder.encoding, numbers.Number):
|
||||
self._array[idx][:] = placeholder.fill_value
|
||||
elif isinstance(placeholder.fill_value, str):
|
||||
if placeholder.fill_value.lower() in ['normal', 'n']:
|
||||
self._array[:] = np.random.normal(size=self._array.shape)
|
||||
else:
|
||||
raise ValueError('Choose one of: ["normal", "N"]')
|
||||
else:
|
||||
raise TypeError('Objects of type "str" or "number" is required here.')
|
||||
|
||||
return self._array
|
||||
|
||||
|
||||
class Entities(ObjectCollection):
|
||||
_accepted_objects = EntityCollection
|
||||
|
||||
@property
|
||||
def arrays(self):
|
||||
return {key: val.as_array() for key, val in self.items()}
|
||||
|
||||
@property
|
||||
def names(self):
|
||||
return list(self._collection.keys())
|
||||
|
||||
def __init__(self):
|
||||
super(Entities, self).__init__()
|
||||
|
||||
def iter_individual_entitites(self):
|
||||
return iter((x for sublist in self.values() for x in sublist))
|
||||
|
||||
def add_item(self, other: dict):
|
||||
assert not any([key for key in other.keys() if key in self.keys()]), \
|
||||
"This group of entities has already been added!"
|
||||
self._collection.update(other)
|
||||
return self
|
||||
|
||||
def add_additional_items(self, others: Dict):
|
||||
return self.add_item(others)
|
||||
|
||||
def by_pos(self, pos: (int, int)):
|
||||
found_entities = [y for y in (x.by_pos(pos) for x in self.values() if hasattr(x, 'by_pos')) if y is not None]
|
||||
return found_entities
|
||||
|
||||
|
||||
class Walls(EntityCollection):
|
||||
_accepted_objects = Wall
|
||||
_stateless_entities = True
|
||||
|
||||
def as_array(self):
|
||||
if not np.any(self._array):
|
||||
# Which is Faster?
|
||||
# indices = [x.pos for x in cls]
|
||||
# np.put(cls._array, [np.ravel_multi_index((0, *x), cls._array.shape) for x in indices], cls.encodings)
|
||||
x, y = zip(*[x.pos for x in self])
|
||||
self._array[0, x, y] = self._value
|
||||
return self._array
|
||||
|
||||
def __init__(self, *args, is_blocking_light=True, **kwargs):
|
||||
super(Walls, self).__init__(*args, individual_slices=False,
|
||||
can_collide=True,
|
||||
is_blocking_light=is_blocking_light, **kwargs)
|
||||
self._value = c.OCCUPIED_CELL
|
||||
|
||||
@classmethod
|
||||
def from_argwhere_coordinates(cls, argwhere_coordinates, *args, **kwargs):
|
||||
tiles = cls(*args, **kwargs)
|
||||
# noinspection PyTypeChecker
|
||||
tiles.add_additional_items(
|
||||
[cls._accepted_objects(pos, tiles)
|
||||
for pos in argwhere_coordinates]
|
||||
)
|
||||
return tiles
|
||||
|
||||
@classmethod
|
||||
def from_tiles(cls, tiles, *args, **kwargs):
|
||||
raise RuntimeError()
|
||||
|
||||
|
||||
class Floors(Walls):
|
||||
_accepted_objects = Floor
|
||||
_stateless_entities = True
|
||||
|
||||
def __init__(self, *args, is_blocking_light=False, **kwargs):
|
||||
super(Floors, self).__init__(*args, is_blocking_light=is_blocking_light, **kwargs)
|
||||
self._value = c.FREE_CELL
|
||||
|
||||
@property
|
||||
def occupied_tiles(self):
|
||||
tiles = [tile for tile in self if tile.is_occupied()]
|
||||
random.shuffle(tiles)
|
||||
return tiles
|
||||
|
||||
@property
|
||||
def empty_tiles(self) -> List[Floor]:
|
||||
tiles = [tile for tile in self if tile.is_empty()]
|
||||
random.shuffle(tiles)
|
||||
return tiles
|
||||
|
||||
@classmethod
|
||||
def from_tiles(cls, tiles, *args, **kwargs):
|
||||
raise RuntimeError()
|
||||
|
||||
|
||||
class Agents(MovingEntityObjectCollection):
|
||||
_accepted_objects = Agent
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, can_collide=True, **kwargs)
|
||||
|
||||
@property
|
||||
def positions(self):
|
||||
return [agent.pos for agent in self]
|
||||
|
||||
def replace_agent(self, key, agent):
|
||||
old_agent = self[key]
|
||||
self[key].tile.leave(self[key])
|
||||
agent._name = old_agent.name
|
||||
self._collection[agent.name] = agent
|
||||
|
||||
|
||||
class Actions(ObjectCollection):
|
||||
_accepted_objects = Action
|
||||
|
||||
@property
|
||||
def movement_actions(self):
|
||||
return self._movement_actions
|
||||
|
||||
# noinspection PyTypeChecker
|
||||
def __init__(self, movement_properties: MovementProperties):
|
||||
self.allow_no_op = movement_properties.allow_no_op
|
||||
self.allow_diagonal_movement = movement_properties.allow_diagonal_movement
|
||||
self.allow_square_movement = movement_properties.allow_square_movement
|
||||
super(Actions, self).__init__()
|
||||
|
||||
# Move this to Baseclass, Env init?
|
||||
if self.allow_square_movement:
|
||||
self.add_additional_items([self._accepted_objects(str_ident=direction)
|
||||
for direction in h.EnvActions.square_move()])
|
||||
if self.allow_diagonal_movement:
|
||||
self.add_additional_items([self._accepted_objects(str_ident=direction)
|
||||
for direction in h.EnvActions.diagonal_move()])
|
||||
self._movement_actions = self._collection.copy()
|
||||
if self.allow_no_op:
|
||||
self.add_additional_items([self._accepted_objects(str_ident=h.EnvActions.NOOP)])
|
||||
|
||||
def is_moving_action(self, action: Union[int]):
|
||||
return action in self.movement_actions.values()
|
||||
|
||||
def summarize(self):
|
||||
return [dict(name=action.identifier) for action in self]
|
||||
|
||||
|
||||
class Zones(ObjectCollection):
|
||||
|
||||
@property
|
||||
def accounting_zones(self):
|
||||
return [self[idx] for idx, name in self.items() if name != c.DANGER_ZONE]
|
||||
|
||||
def __init__(self, parsed_level):
|
||||
raise NotImplementedError('This needs a Rework')
|
||||
super(Zones, self).__init__()
|
||||
slices = list()
|
||||
self._accounting_zones = list()
|
||||
self._danger_zones = list()
|
||||
for symbol in np.unique(parsed_level):
|
||||
if symbol == c.WALL:
|
||||
continue
|
||||
elif symbol == c.DANGER_ZONE:
|
||||
self + symbol
|
||||
slices.append(h.one_hot_level(parsed_level, symbol))
|
||||
self._danger_zones.append(symbol)
|
||||
else:
|
||||
self + symbol
|
||||
slices.append(h.one_hot_level(parsed_level, symbol))
|
||||
self._accounting_zones.append(symbol)
|
||||
|
||||
self._zone_slices = np.stack(slices)
|
||||
|
||||
def __getitem__(self, item):
|
||||
return self._zone_slices[item]
|
||||
|
||||
def add_additional_items(self, other: Union[str, List[str]]):
|
||||
raise AttributeError('You are not allowed to add additional Zones in runtime.')
|
@ -1,85 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from environments.helpers import Constants as c
|
||||
|
||||
# Multipliers for transforming coordinates to other octants:
|
||||
mult_array = np.asarray([
|
||||
[1, 0, 0, -1, -1, 0, 0, 1],
|
||||
[0, 1, -1, 0, 0, -1, 1, 0],
|
||||
[0, 1, 1, 0, 0, -1, -1, 0],
|
||||
[1, 0, 0, 1, -1, 0, 0, -1]
|
||||
])
|
||||
|
||||
|
||||
class Map(object):
|
||||
def __init__(self, map_array: np.typing.ArrayLike, diamond_slope: float = 0.9):
|
||||
self.data = map_array
|
||||
self.width, self.height = map_array.shape
|
||||
self.light = np.full_like(self.data, c.FREE_CELL)
|
||||
self.flag = c.FREE_CELL
|
||||
self.d_slope = diamond_slope
|
||||
|
||||
def blocked(self, x, y):
|
||||
return (x < 0 or y < 0
|
||||
or x >= self.width or y >= self.height
|
||||
or self.data[x, y] == c.OCCUPIED_CELL)
|
||||
|
||||
def lit(self, x, y):
|
||||
return self.light[x, y] == self.flag
|
||||
|
||||
def set_lit(self, x, y):
|
||||
if 0 <= x < self.width and 0 <= y < self.height:
|
||||
self.light[x, y] = self.flag
|
||||
|
||||
def _cast_light(self, cx, cy, row, start, end, radius, xx, xy, yx, yy, id):
|
||||
"""Recursive lightcasting function"""
|
||||
if start < end:
|
||||
return
|
||||
radius_squared = radius*radius
|
||||
new_start = None
|
||||
for j in range(row, radius+1):
|
||||
dx, dy = -j-1, -j
|
||||
blocked = False
|
||||
while dx <= 0:
|
||||
dx += 1
|
||||
# Translate the dx, dy coordinates into map coordinates:
|
||||
X, Y = cx + dx * xx + dy * xy, cy + dx * yx + dy * yy
|
||||
# l_slope and r_slope store the slopes of the left and right
|
||||
# extremities of the square_move we're considering:
|
||||
l_slope, r_slope = (dx-self.d_slope)/(dy+self.d_slope), (dx+self.d_slope)/(dy-self.d_slope)
|
||||
if start < r_slope:
|
||||
continue
|
||||
elif end > l_slope:
|
||||
break
|
||||
else:
|
||||
# Our light beam is touching this square_move; light it:
|
||||
if dx*dx + dy*dy < radius_squared:
|
||||
self.set_lit(X, Y)
|
||||
if blocked:
|
||||
# we're scanning a row of blocked squares:
|
||||
if self.blocked(X, Y):
|
||||
new_start = r_slope
|
||||
continue
|
||||
else:
|
||||
blocked = False
|
||||
start = new_start
|
||||
else:
|
||||
if self.blocked(X, Y) and j < radius:
|
||||
# This is a blocking square_move, start a child scan:
|
||||
blocked = True
|
||||
self._cast_light(cx, cy, j+1, start, l_slope,
|
||||
radius, xx, xy, yx, yy, id+1)
|
||||
new_start = r_slope
|
||||
# Row is scanned; do next row unless last square_move was blocked:
|
||||
if blocked:
|
||||
break
|
||||
|
||||
def do_fov(self, x, y, radius):
|
||||
"Calculate lit squares from the given location and radius"
|
||||
self.flag += 1
|
||||
for oct in range(8):
|
||||
self._cast_light(x, y, 1, 1.0, 0.0, radius,
|
||||
mult_array[0, oct], mult_array[1, oct],
|
||||
mult_array[2, oct], mult_array[3, oct], 0)
|
||||
self.light[x, y] = self.flag
|
||||
return self.light
|
@ -1,59 +0,0 @@
|
||||
from typing import Dict, List, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from environments.factory.base.objects import Agent, Entity, Action
|
||||
from environments.factory.factory_dirt import DirtFactory
|
||||
from environments.factory.additional.dirt.dirt_collections import DirtPiles
|
||||
from environments.factory.additional.dirt.dirt_entity import DirtPile
|
||||
from environments.factory.base.objects import Floor
|
||||
from environments.factory.base.registers import Floors, Entities, EntityCollection
|
||||
|
||||
|
||||
class Machines(EntityCollection):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class Machine(Entity):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class StationaryMachinesDirtFactory(DirtFactory):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._machine_coords = [(6, 6), (12, 13)]
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def entities_hook(self) -> Dict[(str, Entities)]:
|
||||
super_entities = super().entities_hook()
|
||||
return super_entities
|
||||
|
||||
def reset_hook(self) -> None:
|
||||
pass
|
||||
|
||||
def observations_hook(self) -> Dict[str, np.typing.ArrayLike]:
|
||||
pass
|
||||
|
||||
def actions_hook(self) -> Union[Action, List[Action]]:
|
||||
pass
|
||||
|
||||
def step_hook(self) -> (List[dict], dict):
|
||||
|
||||
pass
|
||||
|
||||
def per_agent_raw_observations_hook(self, agent) -> Dict[str, np.typing.ArrayLike]:
|
||||
super_per_agent_raw_observations = super().per_agent_raw_observations_hook(agent)
|
||||
return super_per_agent_raw_observations
|
||||
|
||||
def per_agent_reward_hook(self, agent: Agent) -> List[dict]:
|
||||
return super(StationaryMachinesDirtFactory, self).per_agent_reward_hook(agent)
|
||||
|
||||
def pre_step_hook(self) -> None:
|
||||
pass
|
||||
|
||||
def post_step_hook(self) -> dict:
|
||||
pass
|
@ -1,30 +0,0 @@
|
||||
parse_doors: True
|
||||
doors_have_area: True
|
||||
done_at_collision: False
|
||||
level_name: "rooms"
|
||||
mv_prop:
|
||||
allow_diagonal_movement: True
|
||||
allow_square_movement: True
|
||||
allow_no_op: False
|
||||
dirt_prop:
|
||||
initial_dirt_ratio: 0.35
|
||||
initial_dirt_spawn_r_var : 0.1
|
||||
clean_amount: 0.34
|
||||
max_spawn_amount: 0.1
|
||||
max_global_amount: 20
|
||||
max_local_amount: 1
|
||||
spawn_frequency: 0
|
||||
max_spawn_ratio: 0.05
|
||||
dirt_smear_amount: 0.0
|
||||
done_when_clean: True
|
||||
rewards_base:
|
||||
MOVEMENTS_VALID: 0
|
||||
MOVEMENTS_FAIL: 0
|
||||
NOOP: 0
|
||||
USE_DOOR_VALID: 0
|
||||
USE_DOOR_FAIL: 0
|
||||
COLLISION: 0
|
||||
rewards_dirt:
|
||||
CLEAN_UP_VALID: 1
|
||||
CLEAN_UP_FAIL: 0
|
||||
CLEAN_UP_LAST_PIECE: 5
|
@ -1,106 +0,0 @@
|
||||
from typing import NamedTuple, Union
|
||||
import gym
|
||||
from gym.wrappers.frame_stack import FrameStack
|
||||
|
||||
|
||||
class EnvCombiner(object):
|
||||
|
||||
def __init__(self, *envs_cls):
|
||||
self._env_dict = {env_cls.__name__: env_cls for env_cls in envs_cls}
|
||||
|
||||
@staticmethod
|
||||
def combine_cls(name, *envs_cls):
|
||||
return type(name,envs_cls,{})
|
||||
|
||||
def build(self):
|
||||
name = f'{"".join([x.lower().replace("factory").capitalize() for x in self._env_dict.keys()])}Factory'
|
||||
|
||||
return self.combine_cls(name, tuple(self._env_dict.values()))
|
||||
|
||||
|
||||
class AgentRenderOptions(object):
|
||||
"""
|
||||
Class that specifies the available options for the way agents are represented in the env observation.
|
||||
|
||||
SEPERATE:
|
||||
Each agent is represented in a seperate slice as Constant.OCCUPIED_CELL value (one hot)
|
||||
|
||||
COMBINED:
|
||||
For all agent, value of Constant.OCCUPIED_CELL is added to a zero-value slice at the agents position (sum(SEPERATE))
|
||||
|
||||
LEVEL:
|
||||
The combined slice is added to the LEVEL-slice. (Agents appear as obstacle / wall)
|
||||
|
||||
NOT:
|
||||
The position of individual agents can not be read from the observation.
|
||||
"""
|
||||
|
||||
SEPERATE = 'seperate'
|
||||
COMBINED = 'combined'
|
||||
LEVEL = 'lvl'
|
||||
NOT = 'not'
|
||||
|
||||
|
||||
class MovementProperties(NamedTuple):
|
||||
"""
|
||||
Property holder; for setting multiple related parameters through a single parameter. Comes with default values.
|
||||
"""
|
||||
|
||||
"""Allow the manhattan style movement on a grid (move to cells that are connected by square edges)."""
|
||||
allow_square_movement: bool = True
|
||||
|
||||
"""Allow diagonal movement on the grid (move to cells that are connected by square corners)."""
|
||||
allow_diagonal_movement: bool = False
|
||||
|
||||
"""Allow the agent to just do nothing; not move (NO-OP)."""
|
||||
allow_no_op: bool = False
|
||||
|
||||
|
||||
class ObservationProperties(NamedTuple):
|
||||
"""
|
||||
Property holder; for setting multiple related parameters through a single parameter. Comes with default values.
|
||||
"""
|
||||
|
||||
"""How to represent agents in the observation space. This may also alter the obs-shape."""
|
||||
render_agents: AgentRenderOptions = AgentRenderOptions.SEPERATE
|
||||
|
||||
"""Obserations are build per agent; whether the current agent should be represented in its own observation."""
|
||||
omit_agent_self: bool = True
|
||||
|
||||
"""Their might be the case you want to modify the agents obs-space, so that it can be used with additional obs.
|
||||
The additional slice can be filled with any number"""
|
||||
additional_agent_placeholder: Union[None, str, int] = None
|
||||
|
||||
"""Whether to cast shadows (make floortiles and items hidden).; """
|
||||
cast_shadows: bool = True
|
||||
|
||||
"""Frame Stacking is a methode do give some temporal information to the agents.
|
||||
This paramters controls how many "old-frames" """
|
||||
frames_to_stack: int = 0
|
||||
|
||||
"""Specifies the radius (_r) of the agents field of view. Please note, that the agents grid cellis not taken
|
||||
accountance for. This means, that the resulting field of view diameter = `pomdp_r * 2 + 1`.
|
||||
A 'pomdp_r' of 0 always returns the full env == no partial observability."""
|
||||
pomdp_r: int = 2
|
||||
|
||||
"""Whether to place a visual encoding on walkable tiles around the doors. This is helpfull when the doors can be
|
||||
operated from their surrounding area. So the agent can more easily get a notion of where to choose the door option.
|
||||
However, this is not necesarry at all.
|
||||
"""
|
||||
indicate_door_area: bool = False
|
||||
|
||||
"""Whether to add the agents normalized global position as float values (2,1) to a seperate information slice.
|
||||
More optional informations are to come.
|
||||
"""
|
||||
show_global_position_info: bool = False
|
||||
|
||||
|
||||
class MarlFrameStack(gym.ObservationWrapper):
|
||||
"""todo @romue404"""
|
||||
def __init__(self, env):
|
||||
super().__init__(env)
|
||||
|
||||
def observation(self, observation):
|
||||
if isinstance(self.env, FrameStack) and self.env.unwrapped.n_agents > 1:
|
||||
return observation[0:].swapaxes(0, 1)
|
||||
return observation
|
11
modules/_template/constants.py
Normal file
@ -0,0 +1,11 @@
|
||||
TEMPLATE = '#' # TEMPLATE _identifier. Define your own!
|
||||
|
||||
# Movements
|
||||
NORTH = 'north'
|
||||
EAST = 'east'
|
||||
SOUTH = 'south'
|
||||
WEST = 'west'
|
||||
NORTHEAST = 'north_east'
|
||||
SOUTHEAST = 'south_east'
|
||||
SOUTHWEST = 'south_west'
|
||||
NORTHWEST = 'north_west'
|
24
modules/_template/rules.py
Normal file
@ -0,0 +1,24 @@
|
||||
from typing import List
|
||||
from environment.rules import Rule
|
||||
from environment.utils.results import TickResult, DoneResult
|
||||
|
||||
|
||||
class TemplateRule(Rule):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(TemplateRule, self).__init__(*args, **kwargs)
|
||||
|
||||
def on_init(self, state):
|
||||
pass
|
||||
|
||||
def tick_pre_step(self, state) -> List[TickResult]:
|
||||
pass
|
||||
|
||||
def tick_step(self, state) -> List[TickResult]:
|
||||
pass
|
||||
|
||||
def tick_post_step(self, state) -> List[TickResult]:
|
||||
pass
|
||||
|
||||
def on_check_done(self, state) -> List[DoneResult]:
|
||||
pass
|
26
modules/batteries/actions.py
Normal file
@ -0,0 +1,26 @@
|
||||
from typing import Union
|
||||
|
||||
from environment.actions import Action
|
||||
from environment.utils.results import ActionResult
|
||||
|
||||
from modules.batteries import constants as b, rewards as r
|
||||
from environment import constants as c
|
||||
|
||||
|
||||
class BtryCharge(Action):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(b.CHARGE)
|
||||
|
||||
def do(self, entity, state) -> Union[None, ActionResult]:
|
||||
if charge_pod := state[b.CHARGE_PODS].by_pos(entity.pos):
|
||||
valid = charge_pod.charge_battery(state[b.BATTERIES].by_entity(entity))
|
||||
if valid:
|
||||
state.print(f'{entity.name} just charged batteries at {charge_pod.name}.')
|
||||
else:
|
||||
state.print(f'{entity.name} failed to charged batteries at {charge_pod.name}.')
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
state.print(f'{entity.name} failed to charged batteries at {entity.pos}.')
|
||||
return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
|
||||
reward=r.CHARGE_VALID if valid else r.CHARGE_FAIL)
|
19
modules/batteries/constants.py
Normal file
@ -0,0 +1,19 @@
|
||||
from typing import NamedTuple, Union
|
||||
|
||||
# Battery Env
|
||||
CHARGE_PODS = 'ChargePods'
|
||||
BATTERIES = 'Batteries'
|
||||
BATTERY_DISCHARGED = 'DISCHARGED'
|
||||
CHARGE_POD_SYMBOL = 1
|
||||
|
||||
|
||||
CHARGE = 'do_charge_action'
|
||||
|
||||
|
||||
class BatteryProperties(NamedTuple):
|
||||
initial_charge: float = 0.8 #
|
||||
charge_rate: float = 0.4 #
|
||||
charge_locations: int = 20 #
|
||||
per_action_costs: Union[dict, float] = 0.02
|
||||
done_when_discharged: bool = False
|
||||
multi_charge: bool = False
|
@ -1,21 +1,31 @@
|
||||
from environments import helpers as h
|
||||
from environments.factory.base.objects import BoundingMixin, EnvObject, Entity
|
||||
from environments.factory.additional.btry.btry_util import Constants as c
|
||||
from environment.entity.mixin import BoundEntityMixin
|
||||
from environment.entity.object import EnvObject
|
||||
from environment.entity.entity import Entity
|
||||
from environment import constants as c
|
||||
from environment.utils.render import RenderEntity
|
||||
|
||||
from modules.batteries import constants as b
|
||||
|
||||
|
||||
class Battery(BoundingMixin, EnvObject):
|
||||
class Battery(BoundEntityMixin, EnvObject):
|
||||
|
||||
@property
|
||||
def is_discharged(self):
|
||||
return self.charge_level == 0
|
||||
|
||||
def __init__(self, initial_charge_level: float, *args, **kwargs):
|
||||
super(Battery, self).__init__(*args, **kwargs)
|
||||
self.charge_level = initial_charge_level
|
||||
@property
|
||||
def obs_tag(self):
|
||||
return self.name
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return self.charge_level
|
||||
|
||||
def __init__(self, initial_charge_level: float, owner: Entity, *args, **kwargs):
|
||||
super(Battery, self).__init__(*args, **kwargs)
|
||||
self.charge_level = initial_charge_level
|
||||
self.bind_to(owner)
|
||||
|
||||
def do_charge_action(self, amount):
|
||||
if self.charge_level < 1:
|
||||
# noinspection PyTypeChecker
|
||||
@ -24,11 +34,10 @@ class Battery(BoundingMixin, EnvObject):
|
||||
else:
|
||||
return c.NOT_VALID
|
||||
|
||||
def decharge(self, amount) -> c:
|
||||
def decharge(self, amount) -> float:
|
||||
if self.charge_level != 0:
|
||||
# noinspection PyTypeChecker
|
||||
self.charge_level = max(0, amount + self.charge_level)
|
||||
self._collection.notify_change_to_value(self)
|
||||
return c.VALID
|
||||
else:
|
||||
return c.NOT_VALID
|
||||
@ -38,12 +47,15 @@ class Battery(BoundingMixin, EnvObject):
|
||||
attr_dict.update(dict(name=self.name, belongs_to=self._bound_entity.name))
|
||||
return attr_dict
|
||||
|
||||
def render(self):
|
||||
return None
|
||||
|
||||
|
||||
class ChargePod(Entity):
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return c.CHARGE_POD
|
||||
return b.CHARGE_POD_SYMBOL
|
||||
|
||||
def __init__(self, *args, charge_rate: float = 0.4,
|
||||
multi_charge: bool = False, **kwargs):
|
||||
@ -58,3 +70,6 @@ class ChargePod(Entity):
|
||||
return c.NOT_VALID
|
||||
valid = battery.do_charge_action(self.charge_rate)
|
||||
return valid
|
||||
|
||||
def render(self):
|
||||
return RenderEntity(b.CHARGE_PODS, self.pos)
|
36
modules/batteries/groups.py
Normal file
@ -0,0 +1,36 @@
|
||||
from environment.groups.env_objects import EnvObjects
|
||||
from environment.groups.mixins import PositionMixin, HasBoundedMixin
|
||||
from modules.batteries.entitites import ChargePod, Battery
|
||||
|
||||
|
||||
class Batteries(HasBoundedMixin, EnvObjects):
|
||||
|
||||
_entity = Battery
|
||||
is_blocking_light: bool = False
|
||||
can_collide: bool = False
|
||||
|
||||
@property
|
||||
def obs_tag(self):
|
||||
return self.__class__.__name__
|
||||
|
||||
@property
|
||||
def obs_pairs(self):
|
||||
return [(x.name, x) for x in self]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Batteries, self).__init__(*args, **kwargs)
|
||||
|
||||
def spawn_batteries(self, agents, initial_charge_level):
|
||||
batteries = [self._entity(initial_charge_level, agent) for _, agent in enumerate(agents)]
|
||||
self.add_items(batteries)
|
||||
|
||||
|
||||
class ChargePods(PositionMixin, EnvObjects):
|
||||
|
||||
_entity = ChargePod
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(ChargePods, self).__init__(*args, **kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
return super(ChargePods, self).__repr__()
|
3
modules/batteries/rewards.py
Normal file
@ -0,0 +1,3 @@
|
||||
CHARGE_VALID: float = 0.1
|
||||
CHARGE_FAIL: float = -0.1
|
||||
BATTERY_DISCHARGED: float = -1.0
|
61
modules/batteries/rules.py
Normal file
@ -0,0 +1,61 @@
|
||||
from typing import List, Union
|
||||
from environment.rules import Rule
|
||||
from environment.utils.results import TickResult, DoneResult
|
||||
|
||||
from environment import constants as c
|
||||
from modules.batteries import constants as b, rewards as r
|
||||
|
||||
|
||||
class Btry(Rule):
|
||||
|
||||
def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02):
|
||||
super().__init__()
|
||||
self.per_action_costs = per_action_costs
|
||||
self.initial_charge = initial_charge
|
||||
|
||||
def on_init(self, state):
|
||||
state[b.BATTERIES].spawn_batteries(state[c.AGENT], self.initial_charge)
|
||||
|
||||
def tick_pre_step(self, state) -> List[TickResult]:
|
||||
pass
|
||||
|
||||
def tick_step(self, state) -> List[TickResult]:
|
||||
# Decharge
|
||||
batteries = state[b.BATTERIES]
|
||||
results = []
|
||||
|
||||
for agent in state[c.AGENT]:
|
||||
if isinstance(self.per_action_costs, dict):
|
||||
energy_consumption = self.per_action_costs[agent.step_result()['action']]
|
||||
else:
|
||||
energy_consumption = self.per_action_costs
|
||||
|
||||
batteries.by_entity(agent).decharge(energy_consumption)
|
||||
|
||||
results.append(TickResult(self.name, reward=0, entity=agent, validity=c.VALID))
|
||||
|
||||
return results
|
||||
|
||||
def tick_post_step(self, state) -> List[TickResult]:
|
||||
results = []
|
||||
for btry in state[b.BATTERIES]:
|
||||
if btry.is_discharged:
|
||||
state.print(f'Battery of {btry.bound_entity.name} is discharged!')
|
||||
results.append(
|
||||
TickResult(self.name, entity=btry.bound_entity, reward=r.BATTERY_DISCHARGED, validity=c.VALID))
|
||||
else:
|
||||
pass
|
||||
return results
|
||||
|
||||
|
||||
class BtryDoneAtDischarge(Rule):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def on_check_done(self, state) -> List[DoneResult]:
|
||||
if btry_done := any(battery.is_discharged for battery in state[b.BATTERIES]):
|
||||
return [DoneResult(self.name, validity=c.VALID, reward=r.BATTERY_DISCHARGED)]
|
||||
else:
|
||||
return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]
|
||||
|
36
modules/clean_up/actions.py
Normal file
@ -0,0 +1,36 @@
|
||||
from typing import Union
|
||||
|
||||
from environment.actions import Action
|
||||
from environment.utils.results import ActionResult
|
||||
|
||||
from modules.clean_up import constants as d, rewards as r
|
||||
|
||||
from environment import constants as c
|
||||
|
||||
|
||||
class CleanUp(Action):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(d.CLEAN_UP)
|
||||
|
||||
def do(self, entity, state) -> Union[None, ActionResult]:
|
||||
if dirt := state[d.DIRT].by_pos(entity.pos):
|
||||
new_dirt_amount = dirt.amount - state[d.DIRT].clean_amount
|
||||
|
||||
if new_dirt_amount <= 0:
|
||||
state[d.DIRT].delete_env_object(dirt)
|
||||
else:
|
||||
dirt.set_new_amount(max(new_dirt_amount, c.VALUE_FREE_CELL))
|
||||
valid = c.VALID
|
||||
print_str = f'{entity.name} did just clean up some dirt at {entity.pos}.'
|
||||
state.print(print_str)
|
||||
reward = r.CLEAN_UP_VALID
|
||||
identifier = d.CLEAN_UP
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
print_str = f'{entity.name} just tried to clean up some dirt at {entity.pos}, but failed.'
|
||||
state.print(print_str)
|
||||
reward = r.CLEAN_UP_FAIL
|
||||
identifier = d.CLEAN_UP_FAIL
|
||||
|
||||
return ActionResult(identifier=identifier, validity=valid, reward=reward, entity=entity)
|
7
modules/clean_up/constants.py
Normal file
@ -0,0 +1,7 @@
|
||||
DIRT = 'DirtPiles'
|
||||
|
||||
CLEAN_UP = 'do_cleanup_action'
|
||||
|
||||
CLEAN_UP_VALID = 'clean_up_valid'
|
||||
CLEAN_UP_FAIL = 'clean_up_fail'
|
||||
CLEAN_UP_ALL = 'all_cleaned_up'
|
Before Width: | Height: | Size: 38 KiB After Width: | Height: | Size: 38 KiB |