new rules, new spawn logic, small fixes, default and narrow corridor debugged

This commit is contained in:
Steffen Illium
2023-11-09 17:50:20 +01:00
parent 9b9c6e0385
commit 06a5130b25
67 changed files with 768 additions and 921 deletions

View File

@@ -1,4 +1,4 @@
from .actions import BtryCharge
from .entitites import Pod, Battery
from .entitites import ChargePod, Battery
from .groups import ChargePods, Batteries
from .rules import DoneAtBatteryDischarge, BatteryDecharge

View File

@@ -6,6 +6,7 @@ from marl_factory_grid.utils.results import ActionResult
from marl_factory_grid.modules.batteries import constants as b
from marl_factory_grid.environment import constants as c
from marl_factory_grid.utils import helpers as h
class BtryCharge(Action):
@@ -14,8 +15,8 @@ class BtryCharge(Action):
super().__init__(b.ACTION_CHARGE)
def do(self, entity, state) -> Union[None, ActionResult]:
if charge_pod := state[b.CHARGE_PODS].by_pos(entity.pos):
valid = charge_pod.charge_battery(state[b.BATTERIES].by_entity(entity))
if charge_pod := h.get_first(state[b.CHARGE_PODS].by_pos(entity.pos)):
valid = h.get_first(charge_pod.charge_battery(state[b.BATTERIES].by_entity(entity)))
if valid:
state.print(f'{entity.name} just charged batteries at {charge_pod.name}.')
else:

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 KiB

View File

@@ -50,7 +50,7 @@ class Battery(_Object):
return summary
class Pod(Entity):
class ChargePod(Entity):
@property
def encoding(self):
@@ -58,7 +58,7 @@ class Pod(Entity):
def __init__(self, *args, charge_rate: float = 0.4,
multi_charge: bool = False, **kwargs):
super(Pod, self).__init__(*args, **kwargs)
super(ChargePod, self).__init__(*args, **kwargs)
self.charge_rate = charge_rate
self.multi_charge = multi_charge

View File

@@ -1,52 +1,36 @@
from typing import Union, List, Tuple
from marl_factory_grid.environment import constants as c
from marl_factory_grid.environment.groups.collection import Collection
from marl_factory_grid.modules.batteries.entitites import Pod, Battery
from marl_factory_grid.modules.batteries.entitites import ChargePod, Battery
from marl_factory_grid.utils.results import Result
class Batteries(Collection):
_entity = Battery
@property
def var_is_blocking_light(self):
return False
@property
def var_can_collide(self):
return False
@property
def var_can_move(self):
return False
@property
def var_has_position(self):
return False
@property
def var_can_be_bound(self):
return True
var_has_position = False
var_can_be_bound = True
@property
def obs_tag(self):
return self.__class__.__name__
def __init__(self, *args, **kwargs):
super(Batteries, self).__init__(*args, **kwargs)
def __init__(self, size, initial_charge_level: float=1.0, *args, **kwargs):
super(Batteries, self).__init__(size, *args, **kwargs)
self.initial_charge_level = initial_charge_level
def spawn(self, agents, initial_charge_level):
batteries = [self._entity(initial_charge_level, agent) for _, agent in enumerate(agents)]
def spawn(self, coords_or_quantity: Union[int, List[Tuple[(int, int)]]], agents, *entity_args, **entity_kwargs):
batteries = [self._entity(self.initial_charge_level, agent) for _, agent in enumerate(agents)]
self.add_items(batteries)
# def spawn(self, coords_or_quantity: Union[int, List[Tuple[(int, int)]]], *entity_args): hat keine pos
# agents = entity_args[0]
# initial_charge_level = entity_args[1]
# batteries = [self._entity(initial_charge_level, agent) for _, agent in enumerate(agents)]
# self.add_items(batteries)
def trigger_spawn(self, state, *entity_args, coords_or_quantity=None, **entity_kwargs):
self.spawn(0, state[c.AGENT])
return Result(identifier=f'{self.name}_spawn', validity=c.VALID, value=len(self))
class ChargePods(Collection):
_entity = Pod
_entity = ChargePod
def __init__(self, *args, **kwargs):
super(ChargePods, self).__init__(*args, **kwargs)

View File

@@ -49,10 +49,6 @@ class BatteryDecharge(Rule):
self.per_action_costs = per_action_costs
self.initial_charge = initial_charge
def on_init(self, state, lvl_map): # on reset?
assert len(state[c.AGENT]), "There are no agents, did you already spawn them?"
state[b.BATTERIES].spawn(state[c.AGENT], self.initial_charge)
def tick_step(self, state) -> List[TickResult]:
# Decharge
batteries = state[b.BATTERIES]
@@ -66,7 +62,7 @@ class BatteryDecharge(Rule):
batteries.by_entity(agent).decharge(energy_consumption)
results.append(TickResult(self.name, reward=0, entity=agent, validity=c.VALID))
results.append(TickResult(self.name, entity=agent, validity=c.VALID))
return results
@@ -82,13 +78,13 @@ class BatteryDecharge(Rule):
if self.paralyze_agents_on_discharge:
btry.bound_entity.paralyze(self.name)
results.append(
TickResult("Paralyzed", entity=btry.bound_entity, reward=0, validity=c.VALID)
TickResult("Paralyzed", entity=btry.bound_entity, validity=c.VALID)
)
state.print(f'{btry.bound_entity.name} has just been paralyzed!')
if btry.bound_entity.var_is_paralyzed and not btry.is_discharged:
btry.bound_entity.de_paralyze(self.name)
results.append(
TickResult("De-Paralyzed", entity=btry.bound_entity, reward=0, validity=c.VALID)
TickResult("De-Paralyzed", entity=btry.bound_entity, validity=c.VALID)
)
state.print(f'{btry.bound_entity.name} has just been de-paralyzed!')
return results
@@ -132,7 +128,7 @@ class DoneAtBatteryDischarge(BatteryDecharge):
if any_discharged or all_discharged:
return [DoneResult(self.name, validity=c.VALID, reward=self.reward_discharge_done)]
else:
return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]
return [DoneResult(self.name, validity=c.NOT_VALID)]
class SpawnChargePods(Rule):
@@ -155,7 +151,7 @@ class SpawnChargePods(Rule):
def on_init(self, state, lvl_map):
pod_collection = state[b.CHARGE_PODS]
empty_positions = state.entities.empty_positions()
empty_positions = state.entities.empty_positions
pods = pod_collection.from_coordinates(empty_positions, entity_kwargs=dict(
multi_charge=self.multi_charge, charge_rate=self.charge_rate)
)

View File

@@ -1,4 +1,4 @@
from .actions import CleanUp
from .entitites import DirtPile
from .groups import DirtPiles
from .rules import SpawnDirt, EntitiesSmearDirtOnMove, DoneOnAllDirtCleaned
from .rules import EntitiesSmearDirtOnMove, DoneOnAllDirtCleaned

View File

@@ -7,22 +7,6 @@ from marl_factory_grid.modules.clean_up import constants as d
class DirtPile(Entity):
@property
def var_can_collide(self):
return False
@property
def var_can_move(self):
return False
@property
def var_is_blocking_light(self):
return False
@property
def var_has_position(self):
return True
@property
def amount(self):
return self._amount

View File

@@ -9,68 +9,55 @@ from marl_factory_grid.modules.clean_up.entitites import DirtPile
class DirtPiles(Collection):
_entity = DirtPile
@property
def var_is_blocking_light(self):
return False
var_is_blocking_light = False
var_can_collide = False
var_can_move = False
var_has_position = True
@property
def var_can_collide(self):
return False
@property
def var_can_move(self):
return False
@property
def var_has_position(self):
return True
@property
def amount(self):
def global_amount(self):
return sum([dirt.amount for dirt in self])
def __init__(self, *args,
max_local_amount=5,
clean_amount=1,
max_global_amount: int = 20, **kwargs):
max_global_amount: int = 20,
coords_or_quantity=10,
initial_amount=2,
amount_var=0.2,
n_var=0.2,
**kwargs):
super(DirtPiles, self).__init__(*args, **kwargs)
self.amount_var = amount_var
self.n_var = n_var
self.clean_amount = clean_amount
self.max_global_amount = max_global_amount
self.max_local_amount = max_local_amount
self.coords_or_quantity = coords_or_quantity
self.initial_amount = initial_amount
def spawn(self, coords_or_quantity: Union[int, List[Tuple[(int, int)]]], *entity_args):
amount_s = entity_args[0]
def trigger_spawn(self, state, coords_or_quantity=0, amount=0) -> [Result]:
coords_or_quantity = coords_or_quantity if coords_or_quantity else self.coords_or_quantity
n_new = int(abs(coords_or_quantity + (state.rng.uniform(-self.n_var, self.n_var))))
n_new = state.get_n_random_free_positions(n_new)
amounts = [amount if amount else (self.initial_amount + state.rng.uniform(-self.amount_var, self.amount_var))
for _ in range(coords_or_quantity)]
spawn_counter = 0
for idx, pos in enumerate(coords_or_quantity):
if not self.amount > self.max_global_amount:
amount = amount_s[idx] if isinstance(amount_s, list) else amount_s
for idx, (pos, a) in enumerate(zip(n_new, amounts)):
if not self.global_amount > self.max_global_amount:
if dirt := self.by_pos(pos):
dirt = next(dirt.iter())
new_value = dirt.amount + amount
new_value = dirt.amount + a
dirt.set_new_amount(new_value)
else:
dirt = DirtPile(pos, amount=amount)
self.add_item(dirt)
super().spawn([pos], amount=a)
spawn_counter += 1
else:
return Result(identifier=f'{self.name}_spawn', validity=c.NOT_VALID, reward=0,
value=spawn_counter)
return Result(identifier=f'{self.name}_spawn', validity=c.VALID, reward=0, value=spawn_counter)
return Result(identifier=f'{self.name}_spawn', validity=c.NOT_VALID, value=spawn_counter)
def trigger_dirt_spawn(self, n, amount, state, n_var=0.2, amount_var=0.2) -> Result:
free_for_dirt = [x for x in state.entities.floorlist if len(state.entities.pos_dict[x]) == 0 or (
len(state.entities.pos_dict[x]) >= 1 and isinstance(next(y for y in x), DirtPile))]
# free_for_dirt = [x for x in state[c.FLOOR]
# if len(x.guests) == 0 or (
# len(x.guests) == 1 and
# isinstance(next(y for y in x.guests), DirtPile))]
state.rng.shuffle(free_for_dirt)
new_spawn = int(abs(n + (state.rng.uniform(-n_var, n_var))))
new_amount_s = [abs(amount + (amount*state.rng.uniform(-amount_var, amount_var))) for _ in range(new_spawn)]
n_dirty_positions = free_for_dirt[:new_spawn]
return self.spawn(n_dirty_positions, new_amount_s)
return Result(identifier=f'{self.name}_spawn', validity=c.VALID, value=spawn_counter)
def __repr__(self):
s = super(DirtPiles, self).__repr__()
return f'{s[:-1]}, {self.amount})'
return f'{s[:-1]}, {self.global_amount}]'

View File

@@ -22,58 +22,37 @@ class DoneOnAllDirtCleaned(Rule):
def on_check_done(self, state) -> [DoneResult]:
if len(state[d.DIRT]) == 0 and state.curr_step:
return [DoneResult(validity=c.VALID, identifier=self.name, reward=self.reward)]
return [DoneResult(validity=c.NOT_VALID, identifier=self.name, reward=0)]
return [DoneResult(validity=c.NOT_VALID, identifier=self.name)]
class SpawnDirt(Rule):
class RespawnDirt(Rule):
def __init__(self, initial_n: int = 5, initial_amount: float = 1.3,
respawn_n: int = 3, respawn_amount: float = 0.8,
n_var: float = 0.2, amount_var: float = 0.2, spawn_freq: int = 15):
def __init__(self, respawn_freq: int = 15, respawn_n: int = 5, respawn_amount: float = 1.0):
"""
Defines the spawn pattern of intial and additional 'Dirt'-entitites.
First chooses positions, then trys to spawn dirt until 'respawn_n' or the maximal global amount is reached.
If there is allready some, it is topped up to min(max_local_amount, amount).
:type spawn_freq: int
:parameter spawn_freq: In which frequency should this Rule try to spawn new 'Dirt'?
:type respawn_freq: int
:parameter respawn_freq: In which frequency should this Rule try to spawn new 'Dirt'?
:type respawn_n: int
:parameter respawn_n: How many respawn positions are considered.
:type initial_n: int
:parameter initial_n: How much initial positions are considered.
:type amount_var: float
:parameter amount_var: Variance of amount to spawn.
:type n_var: float
:parameter n_var: Variance of n to spawn.
:type respawn_amount: float
:parameter respawn_amount: Defines how much dirt 'amount' is placed every 'spawn_freq' ticks.
:type initial_amount: float
:parameter initial_amount: Defines how much dirt 'amount' is initially placed.
"""
super().__init__()
self.amount_var = amount_var
self.n_var = n_var
self.respawn_amount = respawn_amount
self.respawn_n = respawn_n
self.initial_amount = initial_amount
self.initial_n = initial_n
self.spawn_freq = spawn_freq
self._next_dirt_spawn = spawn_freq
def on_init(self, state, lvl_map) -> str:
result = state[d.DIRT].trigger_dirt_spawn(self.initial_n, self.initial_amount, state,
n_var=self.n_var, amount_var=self.amount_var)
state.print(f'Initial Dirt was spawned on: {[x.pos for x in state[d.DIRT]]}')
return result
self.respawn_amount = respawn_amount
self.respawn_freq = respawn_freq
self._next_dirt_spawn = respawn_freq
def tick_step(self, state):
collection = state[d.DIRT]
if self._next_dirt_spawn < 0:
pass # No DirtPile Spawn
elif not self._next_dirt_spawn:
result = [state[d.DIRT].trigger_dirt_spawn(self.respawn_n, self.respawn_amount, state,
n_var=self.n_var, amount_var=self.amount_var)]
self._next_dirt_spawn = self.spawn_freq
result = [collection.trigger_spawn(state, coords_or_quantity=self.respawn_n, amount=self.respawn_amount)]
self._next_dirt_spawn = self.respawn_freq
else:
self._next_dirt_spawn -= 1
result = []
@@ -99,8 +78,8 @@ class EntitiesSmearDirtOnMove(Rule):
for entity in state.moving_entites:
if is_move(entity.state.identifier) and entity.state.validity == c.VALID:
if old_pos_dirt := state[d.DIRT].by_pos(entity.last_pos):
old_pos_dirt = next(iter(old_pos_dirt))
if smeared_dirt := round(old_pos_dirt.amount * self.smear_ratio, 2):
if state[d.DIRT].spawn(entity.pos, amount=smeared_dirt):
results.append(TickResult(identifier=self.name, entity=entity,
reward=0, validity=c.VALID))
results.append(TickResult(identifier=self.name, entity=entity, validity=c.VALID))
return results

View File

@@ -1,4 +1,7 @@
from .actions import DestAction
from .entitites import Destination
from .groups import Destinations
from .rules import DoneAtDestinationReachAll, SpawnDestinations
from .rules import (DoneAtDestinationReachAll,
DoneAtDestinationReachAny,
SpawnDestinationsPerAgent,
DestinationReachReward)

View File

@@ -9,30 +9,6 @@ from marl_factory_grid.utils.utility_classes import RenderEntity
class Destination(Entity):
@property
def var_can_move(self):
return False
@property
def var_can_collide(self):
return False
@property
def var_has_position(self):
return True
@property
def var_is_blocking_pos(self):
return False
@property
def var_is_blocking_light(self):
return False
@property
def var_can_be_bound(self):
return True
def was_reached(self):
return self._was_reached

View File

@@ -7,37 +7,14 @@ from marl_factory_grid.modules.destinations import constants as d
class Destinations(Collection):
_entity = Destination
@property
def var_is_blocking_light(self):
return False
@property
def var_can_collide(self):
return False
@property
def var_can_move(self):
return False
@property
def var_has_position(self):
return True
var_is_blocking_light = False
var_can_collide = False
var_can_move = False
var_has_position = True
var_can_be_bound = True
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __repr__(self):
return super(Destinations, self).__repr__()
@staticmethod
def trigger_destination_spawn(n_dests, state):
coordinates = state.entities.floorlist[:n_dests]
if destinations := [Destination(pos) for pos in coordinates]:
state[d.DESTINATION].add_items(destinations)
state.print(f'{n_dests} new destinations have been spawned')
return c.VALID
else:
state.print('No Destiantions are spawning, limit is reached.')
return c.NOT_VALID

View File

@@ -2,8 +2,8 @@ import ast
from random import shuffle
from typing import List, Dict, Tuple
import marl_factory_grid.modules.destinations.constants
from marl_factory_grid.environment.rules import Rule
from marl_factory_grid.utils import helpers as h
from marl_factory_grid.utils.results import TickResult, DoneResult
from marl_factory_grid.environment import constants as c
@@ -54,7 +54,7 @@ class DoneAtDestinationReachAll(DestinationReachReward):
"""
This rule triggers and sets the done flag if ALL Destinations have been reached.
:type reward_at_done: object
:type reward_at_done: float
:param reward_at_done: Specifies the reward, agent get, whenn all destinations are reached.
:type dest_reach_reward: float
:param dest_reach_reward: Specify the reward, agents get when reaching a single destination.
@@ -65,7 +65,7 @@ class DoneAtDestinationReachAll(DestinationReachReward):
def on_check_done(self, state) -> List[DoneResult]:
if all(x.was_reached() for x in state[d.DESTINATION]):
return [DoneResult(self.name, validity=c.VALID, reward=self.reward)]
return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]
return [DoneResult(self.name, validity=c.NOT_VALID)]
class DoneAtDestinationReachAny(DestinationReachReward):
@@ -75,7 +75,7 @@ class DoneAtDestinationReachAny(DestinationReachReward):
This rule triggers and sets the done flag if ANY Destinations has been reached.
!!! IMPORTANT: 'reward_at_done' is shared between the agents; 'dest_reach_reward' is bound to a specific one.
:type reward_at_done: object
:type reward_at_done: float
:param reward_at_done: Specifies the reward, all agent get, when any destinations has been reached.
Default {d.REWARD_DEST_DONE}
:type dest_reach_reward: float
@@ -87,67 +87,29 @@ class DoneAtDestinationReachAny(DestinationReachReward):
def on_check_done(self, state) -> List[DoneResult]:
if any(x.was_reached() for x in state[d.DESTINATION]):
return [DoneResult(self.name, validity=c.VALID, reward=marl_factory_grid.modules.destinations.constants.REWARD_DEST_REACHED)]
return [DoneResult(self.name, validity=c.VALID, reward=d.REWARD_DEST_REACHED)]
return []
class SpawnDestinations(Rule):
def __init__(self, n_dests: int = 1, spawn_mode: str = d.MODE_GROUPED):
f"""
Defines how destinations are initially spawned and respawned in addition.
!!! This rule introduces no kind of reward or Env.-Done condition!
:type n_dests: int
:param n_dests: How many destiantions should be maintained (and initally spawnewd) on the map?
:type spawn_mode: str
:param spawn_mode: One of {d.SPAWN_MODES}. {d.MODE_GROUPED}: Always wait for all Dstiantions do be gone,
then respawn after the given time. {d.MODE_SINGLE}: Just spawn every destination,
that has been reached, after the given time
"""
super(SpawnDestinations, self).__init__()
self.n_dests = n_dests
self.spawn_mode = spawn_mode
def on_init(self, state, lvl_map):
# noinspection PyAttributeOutsideInit
state[d.DESTINATION].trigger_destination_spawn(self.n_dests, state)
pass
def tick_pre_step(self, state) -> List[TickResult]:
pass
def tick_step(self, state) -> List[TickResult]:
if n_dest_spawn := max(0, self.n_dests - len(state[d.DESTINATION])):
if self.spawn_mode == d.MODE_GROUPED and n_dest_spawn == self.n_dests:
validity = state[d.DESTINATION].trigger_destination_spawn(n_dest_spawn, state)
return [TickResult(self.name, validity=validity, entity=None, value=n_dest_spawn)]
elif self.spawn_mode == d.MODE_SINGLE and n_dest_spawn:
validity = state[d.DESTINATION].trigger_destination_spawn(n_dest_spawn, state)
return [TickResult(self.name, validity=validity, entity=None, value=n_dest_spawn)]
else:
pass
class SpawnDestinationsPerAgent(Rule):
def __init__(self, per_agent_positions: Dict[str, List[Tuple[int, int]]]):
def __init__(self, coords_or_quantity: Dict[str, List[Tuple[int, int]]]):
"""
Special rule, that spawn distinations, that are bound to a single agent a fixed set of positions.
Usefull for introducing specialists, etc. ..
!!! This rule does not introduce any reward or done condition.
:type per_agent_positions: Dict[str, List[Tuple[int, int]]
:param per_agent_positions: Please provide a dictionary with agent names as keys; and a list of possible
:type coords_or_quantity: Dict[str, List[Tuple[int, int]]
:param coords_or_quantity: Please provide a dictionary with agent names as keys; and a list of possible
destiantion coords as value. Example: {Wolfgang: [(0, 0), (1, 1), ...]}
"""
super(Rule, self).__init__()
self.per_agent_positions = {key: [ast.literal_eval(x) for x in val] for key, val in per_agent_positions.items()}
self.per_agent_positions = {key: [ast.literal_eval(x) for x in val] for key, val in coords_or_quantity.items()}
def on_init(self, state, lvl_map):
for (agent_name, position_list) in self.per_agent_positions.items():
agent = next(x for x in state[c.AGENT] if agent_name in x.name) # Fixme: Ugly AF
agent = h.get_first(state[c.AGENT], lambda x: agent_name in x.name)
assert agent
position_list = position_list.copy()
shuffle(position_list)
while True:
@@ -155,7 +117,7 @@ class SpawnDestinationsPerAgent(Rule):
pos = position_list.pop()
except IndexError:
print(f"Could not spawn Destinations at: {self.per_agent_positions[agent_name]}")
print(f'Check your agent palcement: {state[c.AGENT]} ... Exit ...')
print(f'Check your agent placement: {state[c.AGENT]} ... Exit ...')
exit(9999)
if (not pos == agent.pos) and (not state[d.DESTINATION].by_pos(pos)):
destination = Destination(pos, bind_to=agent)

View File

@@ -1,4 +1,5 @@
from marl_factory_grid.environment.entity.entity import Entity
from marl_factory_grid.utils import Result
from marl_factory_grid.utils.utility_classes import RenderEntity
from marl_factory_grid.environment import constants as c
@@ -41,21 +42,6 @@ class Door(Entity):
def str_state(self):
return 'open' if self.is_open else 'closed'
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, **kwargs):
self._status = d.STATE_CLOSED
super(Door, self).__init__(*args, **kwargs)
self.auto_close_interval = auto_close_interval
self.time_to_close = 0
if not closed_on_init:
self._open()
else:
self._close()
def summarize_state(self):
state_dict = super().summarize_state()
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
return state_dict
@property
def is_closed(self):
return self._status == d.STATE_CLOSED
@@ -68,6 +54,25 @@ class Door(Entity):
def status(self):
return self._status
@property
def time_to_close(self):
return self._time_to_close
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, **kwargs):
self._status = d.STATE_CLOSED
super(Door, self).__init__(*args, **kwargs)
self._auto_close_interval = auto_close_interval
self._time_to_close = 0
if not closed_on_init:
self._open()
else:
self._close()
def summarize_state(self):
state_dict = super().summarize_state()
state_dict.update(state=str(self.str_state), time_to_close=self.time_to_close)
return state_dict
def render(self):
name, state = 'door_open' if self.is_open else 'door_closed', 'blank'
return RenderEntity(name, self.pos, 1, 'none', state, self.u_int + 1)
@@ -80,18 +85,35 @@ class Door(Entity):
return c.VALID
def tick(self, state):
if self.is_open and len(state.entities.pos_dict[self.pos]) == 2 and self.time_to_close:
self.time_to_close -= 1
return c.NOT_VALID
elif self.is_open and not self.time_to_close and len(state.entities.pos_dict[self.pos]) == 2:
self.use()
return c.VALID
# Check if no entity is standing in the door
if len(state.entities.pos_dict[self.pos]) <= 2:
if self.is_open and self.time_to_close:
self._decrement_timer()
return Result(f"{d.DOOR}_tick", c.VALID, entity=self)
elif self.is_open and not self.time_to_close:
self.use()
return Result(f"{d.DOOR}_closed", c.VALID, entity=self)
else:
# No one is in door, but it is closed... Nothing to do....
return None
else:
return c.NOT_VALID
# Entity is standing in the door, reset timer
self._reset_timer()
return Result(f"{d.DOOR}_reset", c.VALID, entity=self)
def _open(self):
self._status = d.STATE_OPEN
self.time_to_close = self.auto_close_interval
self._reset_timer()
return True
def _close(self):
self._status = d.STATE_CLOSED
return True
def _decrement_timer(self):
self._time_to_close -= 1
return True
def _reset_timer(self):
self._time_to_close = self._auto_close_interval
return True

View File

@@ -18,8 +18,10 @@ class Doors(Collection):
super(Doors, self).__init__(*args, can_collide=True, **kwargs)
def tick_doors(self, state):
result_dict = dict()
results = list()
for door in self:
did_tick = door.tick(state)
result_dict.update({door.name: did_tick})
return result_dict
tick_result = door.tick(state)
if tick_result is not None:
results.append(tick_result)
# TODO: Should return a Result object, not a random dict.
return results

View File

@@ -19,10 +19,10 @@ class DoorAutoClose(Rule):
def tick_step(self, state):
if doors := state[d.DOORS]:
doors_tick_result = doors.tick_doors(state)
doors_that_ticked = [key for key, val in doors_tick_result.items() if val]
state.print(f'{doors_that_ticked} were auto-closed'
if doors_that_ticked else 'No Doors were auto-closed')
doors_tick_results = doors.tick_doors(state)
doors_that_closed = [x.entity.name for x in doors_tick_results if 'closed' in x.identifier]
door_str = doors_that_closed if doors_that_closed else "No Doors"
state.print(f'{door_str} were auto-closed')
return [TickResult(self.name, validity=c.VALID, value=1)]
state.print('There are no doors, but you loaded the corresponding Module')
return []

View File

@@ -1,4 +1,3 @@
from .actions import ItemAction
from .entitites import Item, DropOffLocation
from .groups import DropOffLocations, Items, Inventory, Inventories
from .rules import ItemRules

View File

@@ -29,7 +29,7 @@ class ItemAction(Action):
elif items := state[i.ITEM].by_pos(entity.pos):
item = items[0]
item.change_parent_collection(inventory)
item.set_pos_to(c.VALUE_NO_POS)
item.set_pos(c.VALUE_NO_POS)
state.print(f'{entity.name} just picked up an item at {entity.pos}')
return ActionResult(entity=entity, identifier=self._identifier, validity=c.VALID, reward=r.PICK_UP_VALID)

View File

@@ -8,16 +8,11 @@ from marl_factory_grid.modules.items import constants as i
class Item(Entity):
@property
def var_can_collide(self):
return False
def render(self):
return RenderEntity(i.ITEM, self.pos) if self.pos != c.VALUE_NO_POS else None
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._auto_despawn = -1
@property
def auto_despawn(self):
@@ -31,9 +26,6 @@ class Item(Entity):
def set_auto_despawn(self, auto_despawn):
self._auto_despawn = auto_despawn
def set_pos_to(self, no_pos):
self._pos = no_pos
def summarize_state(self) -> dict:
super_summarization = super(Item, self).summarize_state()
super_summarization.update(dict(auto_despawn=self.auto_despawn))
@@ -42,21 +34,6 @@ class Item(Entity):
class DropOffLocation(Entity):
@property
def var_can_collide(self):
return False
@property
def var_can_move(self):
return False
@property
def var_is_blocking_light(self):
return False
@property
def var_has_position(self):
return True
def render(self):
return RenderEntity(i.DROP_OFF, self.pos)

View File

@@ -8,6 +8,7 @@ from marl_factory_grid.environment.groups.objects import _Objects
from marl_factory_grid.environment.groups.mixins import IsBoundMixin
from marl_factory_grid.environment.entity.agent import Agent
from marl_factory_grid.modules.items.entitites import Item, DropOffLocation
from marl_factory_grid.utils.results import Result
class Items(Collection):
@@ -15,7 +16,7 @@ class Items(Collection):
@property
def var_has_position(self):
return False
return True
@property
def is_blocking_light(self):
@@ -28,18 +29,18 @@ class Items(Collection):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@staticmethod
def trigger_item_spawn(state, n_items, spawn_frequency):
if item_to_spawns := max(0, (n_items - len(state[i.ITEM]))):
position_list = [x for x in state.entities.floorlist]
shuffle(position_list)
position_list = state.entities.floorlist[:item_to_spawns]
state[i.ITEM].spawn(position_list)
state.print(f'{item_to_spawns} new items have been spawned; next spawn in {spawn_frequency}')
return len(position_list)
def trigger_spawn(self, state, *entity_args, coords_or_quantity=None, **entity_kwargs) -> [Result]:
coords_or_quantity = coords_or_quantity if coords_or_quantity else self._coords_or_quantity
assert coords_or_quantity
if item_to_spawns := max(0, (coords_or_quantity - len(self))):
return super().trigger_spawn(state,
*entity_args,
coords_or_quantity=item_to_spawns,
**entity_kwargs)
else:
state.print('No Items are spawning, limit is reached.')
return 0
return Result(identifier=f'{self.name}_spawn', validity=c.NOT_VALID, value=coords_or_quantity)
class Inventory(IsBoundMixin, Collection):
@@ -76,9 +77,15 @@ class Inventory(IsBoundMixin, Collection):
class Inventories(_Objects):
_entity = Inventory
var_can_move = False
var_has_position = False
symbol = None
@property
def var_can_move(self):
return False
def spawn_rule(self):
return {c.SPAWN_ENTITY_RULE: dict(collection=self, coords_or_quantity=None)}
def __init__(self, size: int, *args, **kwargs):
super(Inventories, self).__init__(*args, **kwargs)
@@ -86,10 +93,12 @@ class Inventories(_Objects):
self._obs = None
self._lazy_eval_transforms = []
def spawn(self, agents):
inventories = [self._entity(agent, self.size, )
for _, agent in enumerate(agents)]
self.add_items(inventories)
def spawn(self, agents, *args, **kwargs):
self.add_items([self._entity(agent, self.size, *args, **kwargs) for _, agent in enumerate(agents)])
return [Result(identifier=f'{self.name}_spawn', validity=c.VALID, value=len(self))]
def trigger_spawn(self, state, *args, **kwargs) -> [Result]:
return self.spawn(state[c.AGENT], *args, **kwargs)
def idx_by_entity(self, entity):
try:
@@ -106,9 +115,6 @@ class Inventories(_Objects):
def summarize_states(self, **kwargs):
return [val.summarize_states(**kwargs) for key, val in self.items()]
@staticmethod
def trigger_inventory_spawn(state):
state[i.INVENTORY].spawn(state[c.AGENT])
class DropOffLocations(Collection):
@@ -135,7 +141,7 @@ class DropOffLocations(Collection):
@staticmethod
def trigger_drop_off_location_spawn(state, n_locations):
empty_positions = state.entities.empty_positions()[:n_locations]
empty_positions = state.entities.empty_positions[:n_locations]
do_entites = state[i.DROP_OFF]
drop_offs = [DropOffLocation(pos) for pos in empty_positions]
do_entites.add_items(drop_offs)

View File

@@ -6,52 +6,28 @@ from marl_factory_grid.utils.results import TickResult
from marl_factory_grid.modules.items import constants as i
class ItemRules(Rule):
class RespawnItems(Rule):
def __init__(self, n_items: int = 5, spawn_frequency: int = 15,
n_locations: int = 5, max_dropoff_storage_size: int = 0):
def __init__(self, n_items: int = 5, respawn_freq: int = 15, n_locations: int = 5):
super().__init__()
self.spawn_frequency = spawn_frequency
self._next_item_spawn = spawn_frequency
self.spawn_frequency = respawn_freq
self._next_item_spawn = respawn_freq
self.n_items = n_items
self.max_dropoff_storage_size = max_dropoff_storage_size
self.n_locations = n_locations
def on_init(self, state, lvl_map):
state[i.DROP_OFF].trigger_drop_off_location_spawn(state, self.n_locations)
self._next_item_spawn = self.spawn_frequency
state[i.INVENTORY].trigger_inventory_spawn(state)
state[i.ITEM].trigger_item_spawn(state, self.n_items, self.spawn_frequency)
def tick_step(self, state):
for item in list(state[i.ITEM].values()):
if item.auto_despawn >= 1:
item.set_auto_despawn(item.auto_despawn - 1)
elif not item.auto_despawn:
state[i.ITEM].delete_env_object(item)
else:
pass
if not self._next_item_spawn:
state[i.ITEM].trigger_item_spawn(state, self.n_items, self.spawn_frequency)
state[i.ITEM].trigger_spawn(state, self.n_items, self.spawn_frequency)
else:
self._next_item_spawn = max(0, self._next_item_spawn - 1)
return []
def tick_post_step(self, state) -> List[TickResult]:
for item in list(state[i.ITEM].values()):
if item.auto_despawn >= 1:
item.set_auto_despawn(item.auto_despawn-1)
elif not item.auto_despawn:
state[i.ITEM].delete_env_object(item)
else:
pass
if not self._next_item_spawn:
if spawned_items := state[i.ITEM].trigger_item_spawn(state, self.n_items, self.spawn_frequency):
return [TickResult(self.name, validity=c.VALID, value=spawned_items, entity=None)]
if spawned_items := state[i.ITEM].trigger_spawn(state, self.n_items, self.spawn_frequency):
return [TickResult(self.name, validity=c.VALID, value=spawned_items.value)]
else:
return [TickResult(self.name, validity=c.NOT_VALID, value=0, entity=None)]
return [TickResult(self.name, validity=c.NOT_VALID, value=0)]
else:
self._next_item_spawn = max(0, self._next_item_spawn-1)
return []

View File

@@ -1,3 +1,2 @@
from .entitites import Machine
from .groups import Machines
from .rules import MachineRule

View File

@@ -5,6 +5,7 @@ from marl_factory_grid.utils.results import ActionResult
from marl_factory_grid.modules.machines import constants as m, rewards as r
from marl_factory_grid.environment import constants as c
from marl_factory_grid.utils import helpers as h
class MachineAction(Action):
@@ -13,13 +14,10 @@ class MachineAction(Action):
super().__init__(m.MACHINE_ACTION)
def do(self, entity, state) -> Union[None, ActionResult]:
if machine := state[m.MACHINES].by_pos(entity.pos):
if machine := h.get_first(state[m.MACHINES].by_pos(entity.pos)):
if valid := machine.maintain():
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, reward=r.MAINTAIN_VALID)
else:
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, reward=r.MAINTAIN_FAIL)
else:
return ActionResult(entity=entity, identifier=self._identifier, validity=c.NOT_VALID, reward=r.MAINTAIN_FAIL)

View File

@@ -8,22 +8,6 @@ from . import constants as m
class Machine(Entity):
@property
def var_can_collide(self):
return False
@property
def var_can_move(self):
return False
@property
def var_is_blocking_light(self):
return False
@property
def var_has_position(self):
return True
@property
def encoding(self):
return self._encodings[self.status]
@@ -46,12 +30,12 @@ class Machine(Entity):
else:
return c.NOT_VALID
def tick(self):
def tick(self, state):
# if self.status == m.STATE_MAINTAIN and any([c.AGENT in x.name for x in self.tile.guests]):
if self.status == m.STATE_MAINTAIN and any([c.AGENT in x.name for x in self.state.entities.pos_dict[self.pos]]):
return TickResult(identifier=self.name, validity=c.VALID, reward=0, entity=self)
if self.status == m.STATE_MAINTAIN and any([c.AGENT in x.name for x in state.entities.pos_dict[self.pos]]):
return TickResult(identifier=self.name, validity=c.VALID, entity=self)
# elif self.status == m.STATE_MAINTAIN and not any([c.AGENT in x.name for x in self.tile.guests]):
elif self.status == m.STATE_MAINTAIN and not any([c.AGENT in x.name for x in self.state.entities.pos_dict[self.pos]]):
elif self.status == m.STATE_MAINTAIN and not any([c.AGENT in x.name for x in state.entities.pos_dict[self.pos]]):
self.status = m.STATE_WORK
self.reset_counter()
return None

View File

@@ -1,28 +0,0 @@
from typing import List
from marl_factory_grid.environment.rules import Rule
from marl_factory_grid.utils.results import TickResult, DoneResult
from marl_factory_grid.environment import constants as c
from marl_factory_grid.modules.machines import constants as m
from marl_factory_grid.modules.machines.entitites import Machine
class MachineRule(Rule):
def __init__(self, n_machines: int = 2):
super(MachineRule, self).__init__()
self.n_machines = n_machines
def on_init(self, state, lvl_map):
state[m.MACHINES].spawn(state.entities.empty_positions())
def tick_pre_step(self, state) -> List[TickResult]:
pass
def tick_step(self, state) -> List[TickResult]:
pass
def tick_post_step(self, state) -> List[TickResult]:
pass
def on_check_done(self, state) -> List[DoneResult]:
pass

View File

@@ -1,48 +1,35 @@
from random import shuffle
import networkx as nx
import numpy as np
from ...algorithms.static.utils import points_to_graph
from ...environment import constants as c
from ...environment.actions import Action, ALL_BASEACTIONS
from ...environment.entity.entity import Entity
from ..doors import constants as do
from ..maintenance import constants as mi
from ...utils.helpers import MOVEMAP
from ...utils.utility_classes import RenderEntity
from ...utils.states import Gamestate
from ...utils import helpers as h
from ...utils.utility_classes import RenderEntity, Floor
from ..doors import DoorUse
class Maintainer(Entity):
@property
def var_can_collide(self):
return True
@property
def var_can_move(self):
return False
@property
def var_is_blocking_light(self):
return False
@property
def var_has_position(self):
return True
def __init__(self, state: Gamestate, objective: str, action: Action, *args, **kwargs):
def __init__(self, objective: str, action: Action, *args, **kwargs):
super().__init__(*args, **kwargs)
self.action = action
self.actions = [x() for x in ALL_BASEACTIONS]
self.actions = [x() for x in ALL_BASEACTIONS] + [DoorUse()]
self.objective = objective
self._path = None
self._next = []
self._last = []
self._last_serviced = 'None'
self._floortile_graph = points_to_graph(state.entities.floorlist)
self._floortile_graph = None
def tick(self, state):
if found_objective := state[self.objective].by_pos(self.pos):
if found_objective := h.get_first(state[self.objective].by_pos(self.pos)):
if found_objective.name != self._last_serviced:
self.action.do(self, state)
self._last_serviced = found_objective.name
@@ -54,24 +41,27 @@ class Maintainer(Entity):
return action.do(self, state)
def get_move_action(self, state) -> Action:
if not self._floortile_graph:
state.print("Generating Floorgraph....")
self._floortile_graph = points_to_graph(state.entities.floorlist)
if self._path is None or not self._path:
if not self._next:
self._next = list(state[self.objective].values())
self._next = list(state[self.objective].values()) + [Floor(*state.random_free_position)]
shuffle(self._next)
self._last = []
self._last.append(self._next.pop())
state.print("Calculating shortest path....")
self._path = self.calculate_route(self._last[-1])
if door := self._door_is_close(state):
if door.is_closed:
# Translate the action_object to an integer to have the same output as any other model
action = do.ACTION_DOOR_USE
else:
action = self._predict_move(state)
if door := self._closed_door_in_path(state):
state.print(f"{self} found {door} that is closed. Attempt to open.")
# Translate the action_object to an integer to have the same output as any other model
action = do.ACTION_DOOR_USE
else:
action = self._predict_move(state)
# Translate the action_object to an integer to have the same output as any other model
try:
action_obj = next(x for x in self.actions if x.name == action)
action_obj = h.get_first(self.actions, lambda x: x.name == action)
except (StopIteration, UnboundLocalError):
print('Will not happen')
raise EnvironmentError
@@ -81,11 +71,10 @@ class Maintainer(Entity):
route = nx.shortest_path(self._floortile_graph, self.pos, entity.pos)
return route[1:]
def _door_is_close(self, state):
state.print("Found a door that is close.")
try:
return next(y for x in state.entities.neighboring_positions(self.state.pos) for y in state.entities.pos_dict[x] if do.DOOR in y.name)
except StopIteration:
def _closed_door_in_path(self, state):
if self._path:
return h.get_first(state[do.DOORS].by_pos(self._path[0]), lambda x: x.is_closed)
else:
return None
def _predict_move(self, state):
@@ -96,7 +85,7 @@ class Maintainer(Entity):
next_pos = self._path.pop(0)
diff = np.subtract(next_pos, self.pos)
# Retrieve action based on the pos dif (like in: What do I have to do to get there?)
action = next(action for action, pos_diff in MOVEMAP.items() if np.all(diff == pos_diff))
action = next(action for action, pos_diff in h.MOVEMAP.items() if np.all(diff == pos_diff))
return action
def render(self):

View File

@@ -1,4 +1,4 @@
from typing import Union, List, Tuple
from typing import Union, List, Tuple, Dict
from marl_factory_grid.environment.groups.collection import Collection
from .entities import Maintainer
@@ -10,25 +10,21 @@ from ...utils.states import Gamestate
class Maintainers(Collection):
_entity = Maintainer
@property
def var_can_collide(self):
return True
var_can_collide = True
var_can_move = True
var_is_blocking_light = False
var_has_position = True
@property
def var_can_move(self):
return True
@property
def var_is_blocking_light(self):
return False
@property
def var_has_position(self):
return True
def __init__(self, size, *args, coords_or_quantity: int = None,
spawnrule: Union[None, Dict[str, dict]] = None,
**kwargs):
super(Collection, self).__init__(*args, **kwargs)
self._coords_or_quantity = coords_or_quantity
self.size = size
self._spawnrule = spawnrule
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def spawn(self, coords_or_quantity: Union[int, List[Tuple[(int, int)]]], *entity_args):
state = entity_args[0]
self.add_items([self._entity(state, mc.MACHINES, MachineAction(), pos) for pos in coords_or_quantity])
self.add_items([self._entity(mc.MACHINES, MachineAction(), pos) for pos in coords_or_quantity])

View File

@@ -4,29 +4,24 @@ from marl_factory_grid.utils.results import TickResult, DoneResult
from marl_factory_grid.environment import constants as c
from . import rewards as r
from . import constants as M
from marl_factory_grid.utils.states import Gamestate
class MaintenanceRule(Rule):
class MoveMaintainers(Rule):
def __init__(self, n_maintainer: int = 1, *args, **kwargs):
super(MaintenanceRule, self).__init__(*args, **kwargs)
self.n_maintainer = n_maintainer
def on_init(self, state: Gamestate, lvl_map):
state[M.MAINTAINERS].spawn(state.entities.empty_positions[:self.n_maintainer], state)
pass
def tick_pre_step(self, state) -> List[TickResult]:
pass
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def tick_step(self, state) -> List[TickResult]:
for maintainer in state[M.MAINTAINERS]:
maintainer.tick(state)
# Todo: Return a Result Object.
return []
def tick_post_step(self, state) -> List[TickResult]:
pass
class DoneAtMaintainerCollision(Rule):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def on_check_done(self, state) -> List[DoneResult]:
agents = list(state[c.AGENT].values())

View File

@@ -1,8 +1,8 @@
from random import choices, choice
from . import constants as z, Zone
from .. import Destination
from ..destinations import constants as d
from ... import Destination
from ...environment.rules import Rule
from ...environment import constants as c