mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-05-22 14:56:43 +02:00
Merge remote-tracking branch 'origin/main'
This commit is contained in:
commit
0aa7afbe7e
@ -58,7 +58,7 @@ General:
|
||||
individual_rewards: true
|
||||
level_name: large
|
||||
pomdp_r: 3
|
||||
verbose: True
|
||||
verbose: false
|
||||
tests: false
|
||||
|
||||
Rules:
|
||||
|
@ -11,6 +11,10 @@ from marl_factory_grid.utils import helpers as h
|
||||
class Charge(Action):
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Checks if a charge pod is present at the entity's position.
|
||||
If found, it attempts to charge the battery using the charge pod.
|
||||
"""
|
||||
super().__init__(b.ACTION_CHARGE, b.REWARD_CHARGE_VALID, b.Reward_CHARGE_FAIL)
|
||||
|
||||
def do(self, entity, state) -> Union[None, ActionResult]:
|
||||
|
@ -13,7 +13,12 @@ class Battery(Object):
|
||||
return True
|
||||
|
||||
@property
|
||||
def is_discharged(self):
|
||||
def is_discharged(self) -> bool:
|
||||
"""
|
||||
Indicates whether the Batteries charge level is at 0 or not.
|
||||
|
||||
:return: Whether this battery is empty.
|
||||
"""
|
||||
return self.charge_level == 0
|
||||
|
||||
@property
|
||||
@ -24,12 +29,27 @@ class Battery(Object):
|
||||
def encoding(self):
|
||||
return self.charge_level
|
||||
|
||||
def __init__(self, initial_charge_level: float, owner: Entity, *args, **kwargs):
|
||||
def __init__(self, initial_charge_level, owner, *args, **kwargs):
|
||||
"""
|
||||
Represents a battery entity in the environment that can be bound to an agent and charged at chargepods.
|
||||
|
||||
:param initial_charge_level: The current charge level of the battery, ranging from 0 to 1.
|
||||
:type initial_charge_level: float
|
||||
|
||||
:param owner: The entity to which the battery is bound.
|
||||
:type owner: Entity
|
||||
"""
|
||||
super(Battery, self).__init__(*args, **kwargs)
|
||||
self.charge_level = initial_charge_level
|
||||
self.bind_to(owner)
|
||||
|
||||
def do_charge_action(self, amount):
|
||||
def do_charge_action(self, amount) -> bool:
|
||||
"""
|
||||
Updates the Battery's charge level accordingly.
|
||||
|
||||
:param amount: Amount added to the Battery's charge level.
|
||||
:returns: whether the battery could be charged. if not, it was already fully charged.
|
||||
"""
|
||||
if self.charge_level < 1:
|
||||
# noinspection PyTypeChecker
|
||||
self.charge_level = min(1, amount + self.charge_level)
|
||||
@ -37,7 +57,10 @@ class Battery(Object):
|
||||
else:
|
||||
return c.NOT_VALID
|
||||
|
||||
def decharge(self, amount) -> float:
|
||||
def decharge(self, amount) -> bool:
|
||||
"""
|
||||
Decreases the charge value of a battery. Currently only riggered by the battery-decharge rule.
|
||||
"""
|
||||
if self.charge_level != 0:
|
||||
# noinspection PyTypeChecker
|
||||
self.charge_level = max(0, amount + self.charge_level)
|
||||
@ -57,13 +80,27 @@ class ChargePod(Entity):
|
||||
def encoding(self):
|
||||
return b.CHARGE_POD_SYMBOL
|
||||
|
||||
def __init__(self, *args, charge_rate: float = 0.4,
|
||||
multi_charge: bool = False, **kwargs):
|
||||
def __init__(self, *args, charge_rate: float = 0.4, multi_charge: bool = False, **kwargs):
|
||||
"""
|
||||
Represents a charging pod for batteries in the environment.
|
||||
|
||||
:param charge_rate: The rate at which the charging pod charges batteries. Default is 0.4.
|
||||
:type charge_rate: float
|
||||
|
||||
:param multi_charge: Indicates whether the charging pod supports charging multiple batteries simultaneously.
|
||||
Default is False.
|
||||
:type multi_charge: bool
|
||||
"""
|
||||
super(ChargePod, self).__init__(*args, **kwargs)
|
||||
self.charge_rate = charge_rate
|
||||
self.multi_charge = multi_charge
|
||||
|
||||
def charge_battery(self, entity, state):
|
||||
def charge_battery(self, entity, state) -> bool:
|
||||
"""
|
||||
Checks whether the battery can be charged. If so, triggers the charge action.
|
||||
|
||||
:returns: whether the action was successful (valid) or not.
|
||||
"""
|
||||
battery = state[b.BATTERIES].by_entity(entity)
|
||||
if battery.charge_level >= 1.0:
|
||||
return c.NOT_VALID
|
||||
@ -76,6 +113,6 @@ class ChargePod(Entity):
|
||||
return RenderEntity(b.CHARGE_PODS, self.pos)
|
||||
|
||||
def summarize_state(self) -> dict:
|
||||
summery = super().summarize_state()
|
||||
summery.update(charge_rate=self.charge_rate)
|
||||
return summery
|
||||
summary = super().summarize_state()
|
||||
summary.update(charge_rate=self.charge_rate)
|
||||
return summary
|
||||
|
@ -9,18 +9,32 @@ from marl_factory_grid.utils.results import Result
|
||||
class Batteries(Collection):
|
||||
_entity = Battery
|
||||
|
||||
var_has_position = False
|
||||
var_can_be_bound = True
|
||||
@property
|
||||
def var_has_position(self):
|
||||
return False
|
||||
|
||||
def __init__(self, size, initial_charge_level: float=1.0, *args, **kwargs):
|
||||
@property
|
||||
def var_can_be_bound(self):
|
||||
return True
|
||||
|
||||
def __init__(self, size, initial_charge_level=1.0, *args, **kwargs):
|
||||
"""
|
||||
A collection of batteries that can spawn batteries.
|
||||
|
||||
:param size: The maximum allowed size of the collection. Ensures that the collection does not exceed this size.
|
||||
:type size: int
|
||||
|
||||
:param initial_charge_level: The initial charge level of the battery.
|
||||
:type initial_charge_level: float
|
||||
"""
|
||||
super(Batteries, self).__init__(size, *args, **kwargs)
|
||||
self.initial_charge_level = initial_charge_level
|
||||
|
||||
def spawn(self, coords_or_quantity: Union[int, List[Tuple[(int, int)]]], agents, *entity_args, **entity_kwargs):
|
||||
batteries = [self._entity(self.initial_charge_level, agent) for _, agent in enumerate(agents)]
|
||||
def spawn(self, coords_or_quantity: Union[int, List[Tuple[(int, int)]]], *entity_args, **entity_kwargs):
|
||||
batteries = [self._entity(self.initial_charge_level, agent) for _, agent in enumerate(entity_args[0])]
|
||||
self.add_items(batteries)
|
||||
|
||||
def trigger_spawn(self, state, *entity_args, coords_or_quantity=None, **entity_kwargs):
|
||||
def trigger_spawn(self, state, *entity_args, coords_or_quantity=None, **entity_kwargs):
|
||||
self.spawn(0, state[c.AGENT])
|
||||
return Result(identifier=f'{self.name}_spawn', validity=c.VALID, value=len(self))
|
||||
|
||||
@ -29,6 +43,9 @@ class ChargePods(Collection):
|
||||
_entity = ChargePod
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
A collection of charge pods in the environment.
|
||||
"""
|
||||
super(ChargePods, self).__init__(*args, **kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
|
@ -24,16 +24,16 @@ class BatteryDecharge(Rule):
|
||||
2. float: each action "costs" the same.
|
||||
----
|
||||
!!! Does not introduce any Env.-Done condition.
|
||||
!!! Batterys can only be charged if agent posses the "Charge(Action.
|
||||
!!! Batterys can only be charged if there are "Charpods" and they are spawned!
|
||||
!!! Batteries can only be charged if agent posses the "Charge" Action.
|
||||
!!! Batteries can only be charged if there are "Charge Pods" and they are spawned!
|
||||
----
|
||||
:type initial_charge: float
|
||||
:param initial_charge: How much juice they have.
|
||||
:type battery_discharge_reward: float
|
||||
:param battery_discharge_reward: Negativ reward, when agents let their batters discharge.
|
||||
:param battery_discharge_reward: Negative reward, when agents let their batters discharge.
|
||||
Default: {b.REWARD_BATTERY_DISCHARGED}
|
||||
:type battery_failed_reward: float
|
||||
:param battery_failed_reward: Negativ reward, when agent cannot charge, but do (overcharge, not on station).
|
||||
:param battery_failed_reward: Negative reward, when agent cannot charge, but do (overcharge, not on station).
|
||||
Default: {b.Reward_CHARGE_FAIL}
|
||||
:type battery_charge_reward: float
|
||||
:param battery_charge_reward: Positive reward, when agent actually charge their battery.
|
||||
@ -48,7 +48,6 @@ class BatteryDecharge(Rule):
|
||||
self.initial_charge = initial_charge
|
||||
|
||||
def tick_step(self, state) -> List[TickResult]:
|
||||
# Decharge
|
||||
batteries = state[b.BATTERIES]
|
||||
results = []
|
||||
|
||||
@ -104,13 +103,13 @@ class DoneAtBatteryDischarge(BatteryDecharge):
|
||||
:type initial_charge: float
|
||||
:param initial_charge: How much juice they have.
|
||||
:type reward_discharge_done: float
|
||||
:param reward_discharge_done: Global negativ reward, when agents let their batters discharge.
|
||||
:param reward_discharge_done: Global negative reward, when agents let their batters discharge.
|
||||
Default: {b.REWARD_BATTERY_DISCHARGED}
|
||||
:type battery_discharge_reward: float
|
||||
:param battery_discharge_reward: Negativ reward, when agents let their batters discharge.
|
||||
:param battery_discharge_reward: Negative reward, when agents let their batters discharge.
|
||||
Default: {b.REWARD_BATTERY_DISCHARGED}
|
||||
:type battery_failed_reward: float
|
||||
:param battery_failed_reward: Negativ reward, when agent cannot charge, but do (overcharge, not on station).
|
||||
:param battery_failed_reward: Negative reward, when agent cannot charge, but do (overcharge, not on station).
|
||||
Default: {b.Reward_CHARGE_FAIL}
|
||||
:type battery_charge_reward: float
|
||||
:param battery_charge_reward: Positive reward, when agent actually charge their battery.
|
||||
|
@ -3,7 +3,7 @@ from typing import Union
|
||||
from marl_factory_grid.environment.actions import Action
|
||||
from marl_factory_grid.utils.results import ActionResult
|
||||
|
||||
from marl_factory_grid.modules.clean_up import constants as d, rewards as r
|
||||
from marl_factory_grid.modules.clean_up import constants as d
|
||||
|
||||
from marl_factory_grid.environment import constants as c
|
||||
|
||||
@ -11,7 +11,10 @@ from marl_factory_grid.environment import constants as c
|
||||
class Clean(Action):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(d.CLEAN_UP, r.CLEAN_UP_VALID, r.CLEAN_UP_FAIL)
|
||||
"""
|
||||
Attempts to reduce dirt amount on entity's position.
|
||||
"""
|
||||
super().__init__(d.CLEAN_UP, d.REWARD_CLEAN_UP_VALID, d.REWARD_CLEAN_UP_FAIL)
|
||||
|
||||
def do(self, entity, state) -> Union[None, ActionResult]:
|
||||
if dirt := next((x for x in state.entities.pos_dict[entity.pos] if "dirt" in x.name.lower()), None):
|
||||
|
@ -5,3 +5,7 @@ CLEAN_UP = 'do_cleanup_action'
|
||||
CLEAN_UP_VALID = 'clean_up_valid'
|
||||
CLEAN_UP_FAIL = 'clean_up_fail'
|
||||
CLEAN_UP_ALL = 'all_cleaned_up'
|
||||
|
||||
REWARD_CLEAN_UP_VALID: float = 0.5
|
||||
REWARD_CLEAN_UP_FAIL: float = -0.1
|
||||
REWARD_CLEAN_UP_ALL: float = 4.5
|
||||
|
@ -7,19 +7,33 @@ class DirtPile(Entity):
|
||||
|
||||
@property
|
||||
def amount(self):
|
||||
"""
|
||||
Internal Usage
|
||||
"""
|
||||
return self._amount
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
# Edit this if you want items to be drawn in the ops differntly
|
||||
return self._amount
|
||||
|
||||
def __init__(self, *args, amount=2, max_local_amount=5, **kwargs):
|
||||
"""
|
||||
Represents a pile of dirt at a specific position in the environment.
|
||||
|
||||
:param amount: The amount of dirt in the pile.
|
||||
:type amount: float
|
||||
|
||||
:param max_local_amount: The maximum amount of dirt allowed in a single pile at one position.
|
||||
:type max_local_amount: float
|
||||
"""
|
||||
super(DirtPile, self).__init__(*args, **kwargs)
|
||||
self._amount = amount
|
||||
self.max_local_amount = max_local_amount
|
||||
|
||||
def set_new_amount(self, amount):
|
||||
"""
|
||||
Internal Usage
|
||||
"""
|
||||
self._amount = min(amount, self.max_local_amount)
|
||||
|
||||
def summarize_state(self):
|
||||
|
@ -7,24 +7,56 @@ from marl_factory_grid.utils.results import Result
|
||||
class DirtPiles(Collection):
|
||||
_entity = DirtPile
|
||||
|
||||
var_is_blocking_light = False
|
||||
var_can_collide = False
|
||||
var_can_move = False
|
||||
var_has_position = True
|
||||
@property
|
||||
def var_is_blocking_light(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def global_amount(self):
|
||||
def var_can_collide(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def var_can_move(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def var_has_position(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def global_amount(self) -> float:
|
||||
"""
|
||||
Internal Usage
|
||||
"""
|
||||
return sum([dirt.amount for dirt in self])
|
||||
|
||||
def __init__(self, *args,
|
||||
max_local_amount=5,
|
||||
clean_amount=1,
|
||||
max_global_amount: int = 20,
|
||||
coords_or_quantity=10,
|
||||
initial_amount=2,
|
||||
amount_var=0.2,
|
||||
n_var=0.2,
|
||||
**kwargs):
|
||||
def __init__(self, *args, max_local_amount=5, clean_amount=1, max_global_amount: int = 20, coords_or_quantity=10,
|
||||
initial_amount=2, amount_var=0.2, n_var=0.2, **kwargs):
|
||||
"""
|
||||
A Collection of dirt piles that triggers their spawn.
|
||||
|
||||
:param max_local_amount: The maximum amount of dirt allowed in a single pile at one position.
|
||||
:type max_local_amount: int
|
||||
|
||||
:param clean_amount: The amount of dirt removed by a single cleaning action.
|
||||
:type clean_amount: int
|
||||
|
||||
:param max_global_amount: The maximum total amount of dirt allowed in the environment.
|
||||
:type max_global_amount: int
|
||||
|
||||
:param coords_or_quantity: Determines whether to use coordinates or quantity when triggering dirt pile spawn.
|
||||
:type coords_or_quantity: Union[Tuple[int, int], int]
|
||||
|
||||
:param initial_amount: The initial amount of dirt in each newly spawned pile.
|
||||
:type initial_amount: int
|
||||
|
||||
:param amount_var: The variability in the initial amount of dirt in each pile.
|
||||
:type amount_var: float
|
||||
|
||||
:param n_var: The variability in the number of new dirt piles spawned.
|
||||
:type n_var: float
|
||||
|
||||
"""
|
||||
super(DirtPiles, self).__init__(*args, **kwargs)
|
||||
self.amount_var = amount_var
|
||||
self.n_var = n_var
|
||||
|
@ -1,3 +0,0 @@
|
||||
CLEAN_UP_VALID: float = 0.5
|
||||
CLEAN_UP_FAIL: float = -0.1
|
||||
CLEAN_UP_ALL: float = 4.5
|
@ -1,4 +1,4 @@
|
||||
from marl_factory_grid.modules.clean_up import constants as d, rewards as r
|
||||
from marl_factory_grid.modules.clean_up import constants as d
|
||||
from marl_factory_grid.environment import constants as c
|
||||
|
||||
from marl_factory_grid.environment.rules import Rule
|
||||
@ -9,9 +9,9 @@ from marl_factory_grid.utils.results import DoneResult
|
||||
|
||||
class DoneOnAllDirtCleaned(Rule):
|
||||
|
||||
def __init__(self, reward: float = r.CLEAN_UP_ALL):
|
||||
def __init__(self, reward: float = d.REWARD_CLEAN_UP_ALL):
|
||||
"""
|
||||
Defines a 'Done'-condition which tirggers, when there is no more 'Dirt' in the environment.
|
||||
Defines a 'Done'-condition which triggers, when there is no more 'Dirt' in the environment.
|
||||
|
||||
:type reward: float
|
||||
:parameter reward: Given reward when condition triggers.
|
||||
@ -29,9 +29,9 @@ class RespawnDirt(Rule):
|
||||
|
||||
def __init__(self, respawn_freq: int = 15, respawn_n: int = 5, respawn_amount: float = 1.0):
|
||||
"""
|
||||
Defines the spawn pattern of intial and additional 'Dirt'-entitites.
|
||||
First chooses positions, then trys to spawn dirt until 'respawn_n' or the maximal global amount is reached.
|
||||
If there is allready some, it is topped up to min(max_local_amount, amount).
|
||||
Defines the spawn pattern of initial and additional 'Dirt'-entities.
|
||||
First chooses positions, then tries to spawn dirt until 'respawn_n' or the maximal global amount is reached.
|
||||
If there is already some, it is topped up to min(max_local_amount, amount).
|
||||
|
||||
:type respawn_freq: int
|
||||
:parameter respawn_freq: In which frequency should this Rule try to spawn new 'Dirt'?
|
||||
|
@ -1,16 +1,17 @@
|
||||
from typing import Union
|
||||
|
||||
import marl_factory_grid.modules.destinations.constants
|
||||
from marl_factory_grid.environment.actions import Action
|
||||
from marl_factory_grid.utils.results import ActionResult
|
||||
|
||||
from marl_factory_grid.modules.destinations import constants as d
|
||||
from marl_factory_grid.environment import constants as c
|
||||
from marl_factory_grid.environment.actions import Action
|
||||
from marl_factory_grid.modules.destinations import constants as d
|
||||
from marl_factory_grid.utils.results import ActionResult
|
||||
|
||||
|
||||
class DestAction(Action):
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Attempts to wait at destination.
|
||||
"""
|
||||
super().__init__(d.DESTINATION, d.REWARD_WAIT_VALID, d.REWARD_WAIT_FAIL)
|
||||
|
||||
def do(self, entity, state) -> Union[None, ActionResult]:
|
||||
|
@ -9,24 +9,37 @@ from marl_factory_grid.utils.utility_classes import RenderEntity
|
||||
|
||||
class Destination(Entity):
|
||||
|
||||
def was_reached(self):
|
||||
return self._was_reached
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return d.DEST_SYMBOL
|
||||
|
||||
def __init__(self, *args, action_counts=0, **kwargs):
|
||||
"""
|
||||
Represents a destination in the environment that agents aim to reach.
|
||||
|
||||
"""
|
||||
super(Destination, self).__init__(*args, **kwargs)
|
||||
self._was_reached = False
|
||||
self.action_counts = action_counts
|
||||
self._per_agent_actions = defaultdict(lambda: 0)
|
||||
|
||||
def do_wait_action(self, agent: Agent):
|
||||
def do_wait_action(self, agent) -> bool:
|
||||
"""
|
||||
Performs a wait action for the given agent at the destination.
|
||||
|
||||
:param agent: The agent performing the wait action.
|
||||
:type agent: Agent
|
||||
|
||||
:return: Whether the action was valid or not.
|
||||
:rtype: bool
|
||||
"""
|
||||
self._per_agent_actions[agent.name] += 1
|
||||
return c.VALID
|
||||
|
||||
def has_just_been_reached(self, state):
|
||||
"""
|
||||
Checks if the destination has just been reached based on the current state.
|
||||
"""
|
||||
if self.was_reached():
|
||||
return False
|
||||
agent_at_position = any(state[c.AGENT].by_pos(self.pos))
|
||||
@ -38,6 +51,9 @@ class Destination(Entity):
|
||||
return agent_at_position or any(x >= self.action_counts for x in self._per_agent_actions.values())
|
||||
|
||||
def agent_did_action(self, agent: Agent):
|
||||
"""
|
||||
Internal usage, currently no usage.
|
||||
"""
|
||||
return self._per_agent_actions[agent.name] >= self.action_counts
|
||||
|
||||
def summarize_state(self) -> dict:
|
||||
@ -57,3 +73,6 @@ class Destination(Entity):
|
||||
|
||||
def unmark_as_reached(self):
|
||||
self._was_reached = False
|
||||
|
||||
def was_reached(self) -> bool:
|
||||
return self._was_reached
|
||||
|
@ -5,13 +5,30 @@ from marl_factory_grid.modules.destinations.entitites import Destination
|
||||
class Destinations(Collection):
|
||||
_entity = Destination
|
||||
|
||||
var_is_blocking_light = False
|
||||
var_can_collide = False
|
||||
var_can_move = False
|
||||
var_has_position = True
|
||||
var_can_be_bound = True
|
||||
@property
|
||||
def var_is_blocking_light(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def var_can_collide(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def var_can_move(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def var_has_position(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def var_can_be_bound(self):
|
||||
return True
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
A collection of destinations.
|
||||
"""
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
|
@ -11,18 +11,17 @@ from marl_factory_grid.modules.destinations import constants as d
|
||||
from marl_factory_grid.modules.destinations.entitites import Destination
|
||||
from marl_factory_grid.utils.states import Gamestate
|
||||
|
||||
|
||||
ANY = 'any'
|
||||
ALL = 'all'
|
||||
SIMULTANOIUS = 'simultanious'
|
||||
CONDITIONS =[ALL, ANY, SIMULTANOIUS]
|
||||
ANY = 'any'
|
||||
ALL = 'all'
|
||||
SIMULTANEOUS = 'simultanious'
|
||||
CONDITIONS = [ALL, ANY, SIMULTANEOUS]
|
||||
|
||||
|
||||
class DestinationReachReward(Rule):
|
||||
|
||||
def __init__(self, dest_reach_reward=d.REWARD_DEST_REACHED):
|
||||
"""
|
||||
This rule introduces the basic functionality, so that targts (Destinations) can be reached and marked as such.
|
||||
This rule introduces the basic functionality, so that targets (Destinations) can be reached and marked as such.
|
||||
Additionally, rewards are reported.
|
||||
|
||||
:type dest_reach_reward: float
|
||||
@ -62,7 +61,7 @@ class DoneAtDestinationReach(DestinationReachReward):
|
||||
This rule triggers and sets the done flag if ALL Destinations have been reached.
|
||||
|
||||
:type reward_at_done: float
|
||||
:param reward_at_done: Specifies the reward, agent get, whenn all destinations are reached.
|
||||
:param reward_at_done: Specifies the reward, agent get, when all destinations are reached.
|
||||
:type dest_reach_reward: float
|
||||
:param dest_reach_reward: Specify the reward, agents get when reaching a single destination.
|
||||
"""
|
||||
@ -78,7 +77,7 @@ class DoneAtDestinationReach(DestinationReachReward):
|
||||
elif self.condition == ALL:
|
||||
if all(x.was_reached() for x in state[d.DESTINATION]):
|
||||
return [DoneResult(self.name, validity=c.VALID, reward=self.reward)]
|
||||
elif self.condition == SIMULTANOIUS:
|
||||
elif self.condition == SIMULTANEOUS:
|
||||
if all(x.was_reached() for x in state[d.DESTINATION]):
|
||||
return [DoneResult(self.name, validity=c.VALID, reward=self.reward)]
|
||||
else:
|
||||
@ -101,13 +100,13 @@ class DoneAtDestinationReach(DestinationReachReward):
|
||||
class SpawnDestinationsPerAgent(Rule):
|
||||
def __init__(self, coords_or_quantity: Dict[str, List[Tuple[int, int] | int]]):
|
||||
"""
|
||||
Special rule, that spawn distinations, that are bound to a single agent a fixed set of positions.
|
||||
Usefull for introducing specialists, etc. ..
|
||||
Special rule, that spawn destinations, that are bound to a single agent a fixed set of positions.
|
||||
Useful for introducing specialists, etc. ..
|
||||
|
||||
!!! This rule does not introduce any reward or done condition.
|
||||
|
||||
:param coords_or_quantity: Please provide a dictionary with agent names as keys; and a list of possible
|
||||
destiantion coords as value. Example: {Wolfgang: [(0, 0), (1, 1), ...]}
|
||||
destination coords as value. Example: {Wolfgang: [(0, 0), (1, 1), ...]}
|
||||
"""
|
||||
super(Rule, self).__init__()
|
||||
self.per_agent_positions = dict()
|
||||
|
@ -1,16 +1,18 @@
|
||||
from typing import Union
|
||||
|
||||
from marl_factory_grid.environment.actions import Action
|
||||
from marl_factory_grid.modules.doors import constants as d
|
||||
from marl_factory_grid.modules.doors.entitites import Door
|
||||
from marl_factory_grid.modules.doors import constants as d, rewards as r
|
||||
from marl_factory_grid.environment import constants as c
|
||||
from marl_factory_grid.utils.results import ActionResult
|
||||
|
||||
|
||||
class DoorUse(Action):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(d.ACTION_DOOR_USE, r.USE_DOOR_VALID, r.USE_DOOR_FAIL, **kwargs)
|
||||
"""
|
||||
Attempts to interact with door (open/close it) and returns an action result if successful.
|
||||
"""
|
||||
super().__init__(d.ACTION_DOOR_USE, d.REWARD_USE_DOOR_VALID, d.REWARD_USE_DOOR_FAIL, **kwargs)
|
||||
|
||||
def do(self, entity, state) -> Union[None, ActionResult]:
|
||||
# Check if agent really is standing on a door:
|
||||
@ -26,6 +28,6 @@ class DoorUse(Action):
|
||||
except AttributeError:
|
||||
pass
|
||||
if not valid:
|
||||
# When he doesn't stand necxxt to a door tell me.
|
||||
# When he doesn't stand next to a door tell me.
|
||||
state.print(f'{entity.name} just tried to use a door at {entity.pos}, but there is none.')
|
||||
return self.get_result(valid, entity)
|
||||
|
@ -16,3 +16,7 @@ STATE_OPEN = 'open' # Identifier to compare door-is-
|
||||
|
||||
# Actions
|
||||
ACTION_DOOR_USE = 'use_door' # Identifier for door-action
|
||||
|
||||
# Rewards
|
||||
REWARD_USE_DOOR_VALID: float = -0.00 # Reward for successful door use
|
||||
REWARD_USE_DOOR_FAIL: float = -0.01 # Reward for unsuccessful door use
|
||||
|
@ -1,3 +1,5 @@
|
||||
from typing import Union
|
||||
|
||||
from marl_factory_grid.environment.entity.entity import Entity
|
||||
from marl_factory_grid.utils import Result
|
||||
from marl_factory_grid.utils.utility_classes import RenderEntity
|
||||
@ -16,6 +18,9 @@ class DoorIndicator(Entity):
|
||||
return []
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
Is added around a door for agents to see.
|
||||
"""
|
||||
super().__init__(*args, **kwargs)
|
||||
self.__delattr__('move')
|
||||
|
||||
@ -39,22 +44,38 @@ class Door(Entity):
|
||||
return d.VALUE_CLOSED_DOOR if self.is_closed else d.VALUE_OPEN_DOOR
|
||||
|
||||
@property
|
||||
def str_state(self):
|
||||
def str_state(self) -> str:
|
||||
"""
|
||||
Internal Usage
|
||||
"""
|
||||
return 'open' if self.is_open else 'closed'
|
||||
|
||||
@property
|
||||
def is_closed(self):
|
||||
def is_closed(self) -> bool:
|
||||
return self._state == d.STATE_CLOSED
|
||||
|
||||
@property
|
||||
def is_open(self):
|
||||
def is_open(self) -> bool:
|
||||
return self._state == d.STATE_OPEN
|
||||
|
||||
@property
|
||||
def time_to_close(self):
|
||||
"""
|
||||
:returns: The time it takes for the door to close.
|
||||
:rtype: float
|
||||
"""
|
||||
return self._time_to_close
|
||||
|
||||
def __init__(self, *args, closed_on_init=True, auto_close_interval=10, **kwargs):
|
||||
"""
|
||||
A door entity that can be opened or closed by agents or rules.
|
||||
|
||||
:param closed_on_init: Whether the door spawns as open or closed.
|
||||
:type closed_on_init: bool
|
||||
|
||||
:param auto_close_interval: after how many steps should the door automatically close itself,
|
||||
:type auto_close_interval: int
|
||||
"""
|
||||
self._state = d.STATE_CLOSED
|
||||
super(Door, self).__init__(*args, **kwargs)
|
||||
self._auto_close_interval = auto_close_interval
|
||||
@ -73,14 +94,17 @@ class Door(Entity):
|
||||
name, state = 'door_open' if self.is_open else 'door_closed', 'blank'
|
||||
return RenderEntity(name, self.pos, 1, 'none', state, self.u_int + 1)
|
||||
|
||||
def use(self):
|
||||
def use(self) -> bool:
|
||||
"""
|
||||
Internal usage
|
||||
"""
|
||||
if self._state == d.STATE_OPEN:
|
||||
self._close()
|
||||
else:
|
||||
self._open()
|
||||
return c.VALID
|
||||
|
||||
def tick(self, state):
|
||||
def tick(self, state) -> Union[Result, None]:
|
||||
# Check if no entity is standing in the door
|
||||
if len(state.entities.pos_dict[self.pos]) <= 2:
|
||||
if self.is_open and self.time_to_close:
|
||||
@ -97,23 +121,38 @@ class Door(Entity):
|
||||
self._reset_timer()
|
||||
return Result(f"{d.DOOR}_reset", c.VALID, entity=self)
|
||||
|
||||
def _open(self):
|
||||
def _open(self) -> bool:
|
||||
"""
|
||||
Internal Usage
|
||||
"""
|
||||
self._state = d.STATE_OPEN
|
||||
self._reset_timer()
|
||||
return True
|
||||
|
||||
def _close(self):
|
||||
def _close(self) -> bool:
|
||||
"""
|
||||
Internal Usage
|
||||
"""
|
||||
self._state = d.STATE_CLOSED
|
||||
return True
|
||||
|
||||
def _decrement_timer(self):
|
||||
def _decrement_timer(self) -> bool:
|
||||
"""
|
||||
Internal Usage
|
||||
"""
|
||||
self._time_to_close -= 1
|
||||
return True
|
||||
|
||||
def _reset_timer(self):
|
||||
def _reset_timer(self) -> bool:
|
||||
"""
|
||||
Internal Usage
|
||||
"""
|
||||
self._time_to_close = self._auto_close_interval
|
||||
return True
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Internal Usage
|
||||
"""
|
||||
self._close()
|
||||
self._reset_timer()
|
||||
|
@ -1,6 +1,9 @@
|
||||
from typing import List
|
||||
|
||||
from marl_factory_grid.environment.groups.collection import Collection
|
||||
from marl_factory_grid.modules.doors import constants as d
|
||||
from marl_factory_grid.modules.doors.entitites import Door
|
||||
from marl_factory_grid.utils import Result
|
||||
|
||||
|
||||
class Doors(Collection):
|
||||
@ -13,16 +16,18 @@ class Doors(Collection):
|
||||
return True
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
A collection of doors that can tick and reset all doors.
|
||||
"""
|
||||
super(Doors, self).__init__(*args, can_collide=True, **kwargs)
|
||||
|
||||
def tick_doors(self, state):
|
||||
def tick_doors(self, state) -> List[Result]:
|
||||
results = list()
|
||||
for door in self:
|
||||
assert isinstance(door, Door)
|
||||
tick_result = door.tick(state)
|
||||
if tick_result is not None:
|
||||
results.append(tick_result)
|
||||
# TODO: Should return a Result object, not a random dict.
|
||||
return results
|
||||
|
||||
def reset(self):
|
||||
|
@ -1,2 +0,0 @@
|
||||
USE_DOOR_VALID: float = -0.00
|
||||
USE_DOOR_FAIL: float = -0.01
|
@ -3,18 +3,38 @@ from typing import Union
|
||||
from marl_factory_grid.environment.actions import Action
|
||||
from marl_factory_grid.utils.results import ActionResult
|
||||
|
||||
from marl_factory_grid.modules.items import constants as i, rewards as r
|
||||
from marl_factory_grid.modules.items import constants as i
|
||||
from marl_factory_grid.environment import constants as c
|
||||
|
||||
|
||||
class ItemAction(Action):
|
||||
|
||||
def __init__(self, failed_dropoff_reward: float | None = None, valid_dropoff_reward: float | None = None, **kwargs):
|
||||
super().__init__(i.ITEM_ACTION, r.PICK_UP_FAIL, r.PICK_UP_VALID, **kwargs)
|
||||
self.failed_drop_off_reward = failed_dropoff_reward if failed_dropoff_reward is not None else r.DROP_OFF_FAIL
|
||||
self.valid_drop_off_reward = valid_dropoff_reward if valid_dropoff_reward is not None else r.DROP_OFF_FAIL
|
||||
"""
|
||||
Allows an entity to pick up or drop off items in the environment.
|
||||
|
||||
def get_dropoff_result(self, validity, entity):
|
||||
:param failed_drop_off_reward: The reward assigned when a drop-off action fails. Default is None.
|
||||
:type failed_dropoff_reward: float | None
|
||||
:param valid_drop_off_reward: The reward assigned when a drop-off action is successful. Default is None.
|
||||
:type valid_dropoff_reward: float | None
|
||||
"""
|
||||
super().__init__(i.ITEM_ACTION, i.REWARD_PICK_UP_FAIL, i.REWARD_PICK_UP_VALID, **kwargs)
|
||||
self.failed_drop_off_reward = failed_dropoff_reward if failed_dropoff_reward is not None else i.REWARD_DROP_OFF_FAIL
|
||||
self.valid_drop_off_reward = valid_dropoff_reward if valid_dropoff_reward is not None else i.REWARD_DROP_OFF_VALID
|
||||
|
||||
def get_dropoff_result(self, validity, entity) -> ActionResult:
|
||||
"""
|
||||
Generates an ActionResult for a drop-off action based on its validity.
|
||||
|
||||
:param validity: Whether the drop-off action is valid.
|
||||
:type validity: bool
|
||||
|
||||
:param entity: The entity performing the action.
|
||||
:type entity: Entity
|
||||
|
||||
:return: ActionResult for the drop-off action.
|
||||
:rtype: ActionResult
|
||||
"""
|
||||
reward = self.valid_drop_off_reward if validity else self.failed_drop_off_reward
|
||||
return ActionResult(self.__name__, validity, reward=reward, entity=entity)
|
||||
|
||||
|
@ -6,3 +6,9 @@ INVENTORY = 'Inventories'
|
||||
DROP_OFF = 'DropOffLocations'
|
||||
|
||||
ITEM_ACTION = 'ITEMACTION'
|
||||
|
||||
# Rewards
|
||||
REWARD_DROP_OFF_VALID: float = 0.1
|
||||
REWARD_DROP_OFF_FAIL: float = -0.1
|
||||
REWARD_PICK_UP_FAIL: float = -0.1
|
||||
REWARD_PICK_UP_VALID: float = 0.1
|
||||
|
@ -8,39 +8,52 @@ from marl_factory_grid.modules.items import constants as i
|
||||
|
||||
class Item(Entity):
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return 1
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
An item that can be picked up or dropped by agents. If picked up, it enters the agents inventory.
|
||||
"""
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def render(self):
|
||||
return RenderEntity(i.ITEM, self.pos) if self.pos != c.VALUE_NO_POS else None
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
# Edit this if you want items to be drawn in the ops differently
|
||||
return 1
|
||||
|
||||
|
||||
class DropOffLocation(Entity):
|
||||
|
||||
def render(self):
|
||||
return RenderEntity(i.DROP_OFF, self.pos)
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return i.SYMBOL_DROP_OFF
|
||||
|
||||
def __init__(self, *args, storage_size_until_full: int = 5, **kwargs):
|
||||
@property
|
||||
def is_full(self) -> bool:
|
||||
"""
|
||||
Checks whether the drop-off location is full or whether another item can be dropped here.
|
||||
"""
|
||||
return False if not self.storage.maxlen else self.storage.maxlen == len(self.storage)
|
||||
|
||||
def __init__(self, *args, storage_size_until_full=5, **kwargs):
|
||||
"""
|
||||
Represents a drop-off location in the environment that agents aim to drop items at.
|
||||
|
||||
:param storage_size_until_full: The number of items that can be dropped here until it is considered full.
|
||||
:type storage_size_until_full: int
|
||||
"""
|
||||
super(DropOffLocation, self).__init__(*args, **kwargs)
|
||||
self.storage = deque(maxlen=storage_size_until_full or None)
|
||||
|
||||
def place_item(self, item: Item):
|
||||
def place_item(self, item: Item) -> bool:
|
||||
"""
|
||||
If the storage of the drop-off location is not full, the item is placed. Otherwise, a RuntimeWarning is raised.
|
||||
"""
|
||||
if self.is_full:
|
||||
raise RuntimeWarning("There is currently no way to clear the storage or make it unfull.")
|
||||
return bc.NOT_VALID
|
||||
return c.NOT_VALID
|
||||
else:
|
||||
self.storage.append(item)
|
||||
return c.VALID
|
||||
|
||||
@property
|
||||
def is_full(self):
|
||||
return False if not self.storage.maxlen else self.storage.maxlen == len(self.storage)
|
||||
def render(self):
|
||||
return RenderEntity(i.DROP_OFF, self.pos)
|
||||
|
@ -1,3 +1,5 @@
|
||||
from typing import Dict, Any
|
||||
|
||||
from marl_factory_grid.environment import constants as c
|
||||
from marl_factory_grid.environment.entity.agent import Agent
|
||||
from marl_factory_grid.environment.groups.collection import Collection
|
||||
@ -16,14 +18,17 @@ class Items(Collection):
|
||||
return True
|
||||
|
||||
@property
|
||||
def is_blocking_light(self):
|
||||
def var_is_blocking_light(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def can_collide(self):
|
||||
def var_can_collide(self):
|
||||
return False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
A collection of items that triggers their spawn.
|
||||
"""
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def trigger_spawn(self, state, *entity_args, coords_or_quantity=None, **entity_kwargs) -> [Result]:
|
||||
@ -55,7 +60,13 @@ class Inventory(IsBoundMixin, Collection):
|
||||
def name(self):
|
||||
return f'{self.__class__.__name__}[{self._bound_entity.name}]'
|
||||
|
||||
def __init__(self, agent: Agent, *args, **kwargs):
|
||||
def __init__(self, agent, *args, **kwargs):
|
||||
"""
|
||||
An inventory that can hold items picked up by the agent this is bound to.
|
||||
|
||||
:param agent: The agent this inventory is bound to and belongs to.
|
||||
:type agent: Agent
|
||||
"""
|
||||
super(Inventory, self).__init__(*args, **kwargs)
|
||||
self._collection = None
|
||||
self.bind(agent)
|
||||
@ -69,38 +80,57 @@ class Inventory(IsBoundMixin, Collection):
|
||||
attr_dict.update(dict(name=self.name, belongs_to=self._bound_entity.name))
|
||||
return attr_dict
|
||||
|
||||
def pop(self):
|
||||
def pop(self) -> Item:
|
||||
"""
|
||||
Removes and returns the first item in the inventory.
|
||||
"""
|
||||
item_to_pop = self[0]
|
||||
self.delete_env_object(item_to_pop)
|
||||
return item_to_pop
|
||||
|
||||
def set_collection(self, collection):
|
||||
"""
|
||||
No usage
|
||||
"""
|
||||
self._collection = collection
|
||||
|
||||
def clear_temp_state(self):
|
||||
# Entites need this, but inventories have no state....
|
||||
"""
|
||||
Entites need this, but inventories have no state.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class Inventories(Objects):
|
||||
_entity = Inventory
|
||||
|
||||
var_can_move = False
|
||||
var_has_position = False
|
||||
|
||||
symbol = None
|
||||
|
||||
@property
|
||||
def spawn_rule(self):
|
||||
def var_can_move(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def var_has_position(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def spawn_rule(self) -> dict[Any, dict[str, Any]]:
|
||||
"""
|
||||
:returns: a dict containing the specified spawn rule and its arguments.
|
||||
:rtype: dict(dict(collection=self, coords_or_quantity=None))
|
||||
"""
|
||||
return {c.SPAWN_ENTITY_RULE: dict(collection=self, coords_or_quantity=None)}
|
||||
|
||||
def __init__(self, size: int, *args, **kwargs):
|
||||
"""
|
||||
TODO
|
||||
"""
|
||||
super(Inventories, self).__init__(*args, **kwargs)
|
||||
self.size = size
|
||||
self._obs = None
|
||||
self._lazy_eval_transforms = []
|
||||
|
||||
def spawn(self, agents, *args, **kwargs):
|
||||
def spawn(self, agents, *args, **kwargs) -> [Result]:
|
||||
self.add_items([self._entity(agent, self.size, *args, **kwargs) for _, agent in enumerate(agents)])
|
||||
return [Result(identifier=f'{self.name}_spawn', validity=c.VALID, value=len(self))]
|
||||
|
||||
@ -137,6 +167,9 @@ class DropOffLocations(Collection):
|
||||
return True
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
A Collection of Drop-off locations that can trigger their spawn.
|
||||
"""
|
||||
super(DropOffLocations, self).__init__(*args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
|
@ -1,4 +0,0 @@
|
||||
DROP_OFF_VALID: float = 0.1
|
||||
DROP_OFF_FAIL: float = -0.1
|
||||
PICK_UP_FAIL: float = -0.1
|
||||
PICK_UP_VALID: float = 0.1
|
@ -9,6 +9,16 @@ from marl_factory_grid.modules.items import constants as i
|
||||
class RespawnItems(Rule):
|
||||
|
||||
def __init__(self, n_items: int = 5, respawn_freq: int = 15, n_locations: int = 5):
|
||||
"""
|
||||
Defines the respawning behaviour of items.
|
||||
|
||||
:param n_items: Specifies how many items should respawn.
|
||||
:type n_items: int
|
||||
:param respawn_freq: Specifies how often items should respawn.
|
||||
:type respawn_freq: int
|
||||
:param n_locations: Specifies at how many locations items should be able to respawn.
|
||||
:type: int
|
||||
"""
|
||||
super().__init__()
|
||||
self.spawn_frequency = respawn_freq
|
||||
self._next_item_spawn = respawn_freq
|
||||
|
@ -1,17 +1,18 @@
|
||||
from typing import Union
|
||||
|
||||
import marl_factory_grid.modules.machines.constants
|
||||
from marl_factory_grid.environment.actions import Action
|
||||
from marl_factory_grid.utils.results import ActionResult
|
||||
|
||||
from marl_factory_grid.modules.machines import constants as m
|
||||
from marl_factory_grid.environment import constants as c
|
||||
from marl_factory_grid.environment.actions import Action
|
||||
from marl_factory_grid.modules.machines import constants as m
|
||||
from marl_factory_grid.utils import helpers as h
|
||||
from marl_factory_grid.utils.results import ActionResult
|
||||
|
||||
|
||||
class MachineAction(Action):
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Attempts to maintain the machine and returns an action result if successful.
|
||||
"""
|
||||
super().__init__(m.MACHINE_ACTION, m.MAINTAIN_VALID, m.MAINTAIN_FAIL)
|
||||
|
||||
def do(self, entity, state) -> Union[None, ActionResult]:
|
||||
|
@ -13,6 +13,14 @@ class Machine(Entity):
|
||||
return self._encodings[self.status]
|
||||
|
||||
def __init__(self, *args, work_interval: int = 10, pause_interval: int = 15, **kwargs):
|
||||
"""
|
||||
Represents a machine entity that the maintainer will try to maintain.
|
||||
|
||||
:param work_interval: How long should the machine work before pausing.
|
||||
:type work_interval: int
|
||||
:param pause_interval: How long should the machine pause before continuing to work.
|
||||
:type pause_interval: int
|
||||
"""
|
||||
super(Machine, self).__init__(*args, **kwargs)
|
||||
self._intervals = dict({m.STATE_IDLE: pause_interval, m.STATE_WORK: work_interval})
|
||||
self._encodings = dict({m.STATE_IDLE: pause_interval, m.STATE_WORK: work_interval})
|
||||
@ -21,7 +29,10 @@ class Machine(Entity):
|
||||
self.health = 100
|
||||
self._counter = 0
|
||||
|
||||
def maintain(self):
|
||||
def maintain(self) -> bool:
|
||||
"""
|
||||
Attempts to maintain the machine by increasing its health.
|
||||
"""
|
||||
if self.status == m.STATE_WORK:
|
||||
return c.NOT_VALID
|
||||
if self.health <= 98:
|
||||
@ -31,6 +42,15 @@ class Machine(Entity):
|
||||
return c.NOT_VALID
|
||||
|
||||
def tick(self, state):
|
||||
"""
|
||||
Updates the machine's mode (work, pause) depending on its current counter and whether an agent is currently on
|
||||
its position. If no agent is standing on the machine's position, it decrements its own health.
|
||||
|
||||
:param state: The current game state.
|
||||
:type state: GameState
|
||||
:return: The result of the tick operation on the machine.
|
||||
:rtype: TickResult | None
|
||||
"""
|
||||
others = state.entities.pos_dict[self.pos]
|
||||
if self.status == m.STATE_MAINTAIN and any([c.AGENT in x.name for x in others]):
|
||||
return TickResult(identifier=self.name, validity=c.VALID, entity=self)
|
||||
@ -48,6 +68,9 @@ class Machine(Entity):
|
||||
return None
|
||||
|
||||
def reset_counter(self):
|
||||
"""
|
||||
Internal Usage
|
||||
"""
|
||||
self._counter = self._intervals[self.status]
|
||||
|
||||
def render(self):
|
||||
|
@ -20,5 +20,8 @@ class Machines(Collection):
|
||||
return True
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
A Collection of Machines.
|
||||
"""
|
||||
super(Machines, self).__init__(*args, **kwargs)
|
||||
|
||||
|
@ -15,7 +15,15 @@ from ..doors import DoorUse
|
||||
|
||||
class Maintainer(Entity):
|
||||
|
||||
def __init__(self, objective: str, action: Action, *args, **kwargs):
|
||||
def __init__(self, objective, action, *args, **kwargs):
|
||||
"""
|
||||
Represents the maintainer entity that aims to maintain machines.
|
||||
|
||||
:param objective: The maintainer's objective, e.g., "Machines".
|
||||
:type objective: str
|
||||
:param action: The default action to be performed by the maintainer.
|
||||
:type action: Action
|
||||
"""
|
||||
super().__init__(*args, **kwargs)
|
||||
self.action = action
|
||||
self.actions = [x() for x in ALL_BASEACTIONS] + [DoorUse()]
|
||||
@ -26,6 +34,16 @@ class Maintainer(Entity):
|
||||
self._last_serviced = 'None'
|
||||
|
||||
def tick(self, state):
|
||||
"""
|
||||
If there is an objective at the current position, the maintainer performs its action on the objective.
|
||||
If the objective has changed since the last servicing, the maintainer performs the action and updates
|
||||
the last serviced objective. Otherwise, it calculates a move action and performs it.
|
||||
|
||||
:param state: The current game state.
|
||||
:type state: GameState
|
||||
:return: The result of the action performed by the maintainer.
|
||||
:rtype: ActionResult
|
||||
"""
|
||||
if found_objective := h.get_first(state[self.objective].by_pos(self.pos)):
|
||||
if found_objective.name != self._last_serviced:
|
||||
result = self.action.do(self, state)
|
||||
@ -40,9 +58,24 @@ class Maintainer(Entity):
|
||||
return result
|
||||
|
||||
def set_state(self, action_result):
|
||||
"""
|
||||
Updates the maintainers own status with an action result.
|
||||
"""
|
||||
self._status = action_result
|
||||
|
||||
def get_move_action(self, state) -> Action:
|
||||
"""
|
||||
Retrieves the next move action for the agent.
|
||||
|
||||
If a path is not already determined, the agent calculates the shortest path to its objective, considering doors
|
||||
and obstacles. If a closed door is found in the calculated path, the agent attempts to open it.
|
||||
|
||||
:param state: The current state of the environment.
|
||||
:type state: GameState
|
||||
|
||||
:return: The chosen move action for the agent.
|
||||
:rtype: Action
|
||||
"""
|
||||
if self._path is None or not len(self._path):
|
||||
if not self._next:
|
||||
self._next = list(state[self.objective].values()) + [Floor(*state.random_free_position)]
|
||||
@ -70,17 +103,27 @@ class Maintainer(Entity):
|
||||
raise EnvironmentError
|
||||
return action_obj
|
||||
|
||||
def calculate_route(self, entity, floortile_graph):
|
||||
def calculate_route(self, entity, floortile_graph) -> list:
|
||||
"""
|
||||
:returns: path, include both the source and target position
|
||||
:rtype: list
|
||||
"""
|
||||
route = nx.shortest_path(floortile_graph, self.pos, entity.pos)
|
||||
return route[1:]
|
||||
|
||||
def _closed_door_in_path(self, state):
|
||||
"""
|
||||
Internal Use
|
||||
"""
|
||||
if self._path:
|
||||
return h.get_first(state[do.DOORS].by_pos(self._path[0]), lambda x: x.is_closed)
|
||||
else:
|
||||
return None
|
||||
|
||||
def _predict_move(self, state):
|
||||
def _predict_move(self, state) -> Action:
|
||||
"""
|
||||
Internal Use
|
||||
"""
|
||||
next_pos = self._path[0]
|
||||
if any(x for x in state.entities.pos_dict[next_pos] if x.var_can_collide) > 0:
|
||||
action = c.NOOP
|
||||
|
@ -9,12 +9,26 @@ from ..machines.actions import MachineAction
|
||||
class Maintainers(Collection):
|
||||
_entity = Maintainer
|
||||
|
||||
var_can_collide = True
|
||||
var_can_move = True
|
||||
var_is_blocking_light = False
|
||||
var_has_position = True
|
||||
@property
|
||||
def var_can_collide(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def var_can_move(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def var_is_blocking_light(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def var_has_position(self):
|
||||
return True
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
A collection of maintainers
|
||||
"""
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def spawn(self, coords_or_quantity: Union[int, List[Tuple[(int, int)]]], *entity_args):
|
||||
|
@ -9,6 +9,9 @@ from . import constants as M
|
||||
class MoveMaintainers(Rule):
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
This rule is responsible for moving the maintainers at every step of the environment.
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
def tick_step(self, state) -> List[TickResult]:
|
||||
@ -21,6 +24,9 @@ class MoveMaintainers(Rule):
|
||||
class DoneAtMaintainerCollision(Rule):
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
When active, this rule stops the environment after a maintainer reports a collision with another entity.
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
def on_check_done(self, state) -> List[DoneResult]:
|
||||
|
@ -1,8 +1,7 @@
|
||||
import importlib
|
||||
|
||||
from collections import defaultdict
|
||||
from pathlib import PurePath, Path
|
||||
from typing import Union, Dict, List, Iterable, Callable
|
||||
from typing import Union, Dict, List, Iterable, Callable, Any
|
||||
|
||||
import numpy as np
|
||||
from numpy.typing import ArrayLike
|
||||
@ -21,23 +20,22 @@ This file is used for:
|
||||
In this file they are defined to be used across the entire package.
|
||||
"""
|
||||
|
||||
LEVELS_DIR = 'levels' # for use in studies and experiments
|
||||
STEPS_START = 1 # Define where to the stepcount; which is the first step
|
||||
|
||||
LEVELS_DIR = 'levels' # for use in studies and experiments
|
||||
STEPS_START = 1 # Define where to the stepcount; which is the first step
|
||||
|
||||
IGNORED_DF_COLUMNS = ['Episode', 'Run', # For plotting, which values are ignored when loading monitor files
|
||||
IGNORED_DF_COLUMNS = ['Episode', 'Run', # For plotting, which values are ignored when loading monitor files
|
||||
'train_step', 'step', 'index', 'dirt_amount', 'dirty_pos_count', 'terminal_observation',
|
||||
'episode']
|
||||
|
||||
POS_MASK = np.asarray([[[-1, -1], [0, -1], [1, -1]],
|
||||
[[-1, 0], [0, 0], [1, 0]],
|
||||
[[-1, 1], [0, 1], [1, 1]]])
|
||||
[[-1, 0], [0, 0], [1, 0]],
|
||||
[[-1, 1], [0, 1], [1, 1]]])
|
||||
|
||||
MOVEMAP = defaultdict(lambda: (0, 0),
|
||||
MOVEMAP = defaultdict(lambda: (0, 0),
|
||||
{c.NORTH: (-1, 0), c.NORTHEAST: (-1, 1),
|
||||
c.EAST: (0, 1), c.SOUTHEAST: (1, 1),
|
||||
c.SOUTH: (1, 0), c.SOUTHWEST: (1, -1),
|
||||
c.WEST: (0, -1), c.NORTHWEST: (-1, -1)
|
||||
c.EAST: (0, 1), c.SOUTHEAST: (1, 1),
|
||||
c.SOUTH: (1, 0), c.SOUTHWEST: (1, -1),
|
||||
c.WEST: (0, -1), c.NORTHWEST: (-1, -1)
|
||||
}
|
||||
)
|
||||
|
||||
@ -80,7 +78,19 @@ class ObservationTranslator:
|
||||
self._this_named_obs_space = this_named_observation_space
|
||||
self._per_agent_named_obs_space = list(per_agent_named_obs_spaces)
|
||||
|
||||
def translate_observation(self, agent_idx: int, obs):
|
||||
def translate_observation(self, agent_idx, obs) -> ArrayLike:
|
||||
"""
|
||||
Translates the observation of the given agent.
|
||||
|
||||
:param agent_idx: Agent identifier.
|
||||
:type agent_idx: int
|
||||
|
||||
:param obs: The observation to be translated.
|
||||
:type obs: ArrayLike
|
||||
|
||||
:return: The translated observation.
|
||||
:rtype: ArrayLike
|
||||
"""
|
||||
target_obs_space = self._per_agent_named_obs_space[agent_idx]
|
||||
translation = dict()
|
||||
for name, idxs in target_obs_space.items():
|
||||
@ -98,7 +108,10 @@ class ObservationTranslator:
|
||||
translation = dict(sorted(translation.items()))
|
||||
return np.concatenate(list(translation.values()), axis=-3)
|
||||
|
||||
def translate_observations(self, observations: List[ArrayLike]):
|
||||
def translate_observations(self, observations) -> List[ArrayLike]:
|
||||
"""
|
||||
Internal Usage
|
||||
"""
|
||||
return [self.translate_observation(idx, observation) for idx, observation in enumerate(observations)]
|
||||
|
||||
def __call__(self, observations):
|
||||
@ -129,11 +142,26 @@ class ActionTranslator:
|
||||
self._per_agent_idx_actions = [{idx: a for a, idx in x.items()} for x in self._per_agent_named_action_space]
|
||||
|
||||
def translate_action(self, agent_idx: int, action: int):
|
||||
"""
|
||||
Translates the observation of the given agent.
|
||||
|
||||
:param agent_idx: Agent identifier.
|
||||
:type agent_idx: int
|
||||
|
||||
:param action: The action to be translated.
|
||||
:type action: int
|
||||
|
||||
:return: The translated action.
|
||||
:rtype: ArrayLike
|
||||
"""
|
||||
named_action = self._per_agent_idx_actions[agent_idx][action]
|
||||
translated_action = self._target_named_action_space[named_action]
|
||||
return translated_action
|
||||
|
||||
def translate_actions(self, actions: List[int]):
|
||||
"""
|
||||
Intenal Usage
|
||||
"""
|
||||
return [self.translate_action(idx, action) for idx, action in enumerate(actions)]
|
||||
|
||||
def __call__(self, actions):
|
||||
@ -179,6 +207,13 @@ def one_hot_level(level, symbol: str):
|
||||
|
||||
|
||||
def is_move(action_name: str):
|
||||
"""
|
||||
Check if the given action name corresponds to a movement action.
|
||||
|
||||
:param action_name: The name of the action to check.
|
||||
:type action_name: str
|
||||
:return: True if the action is a movement action, False otherwise.
|
||||
"""
|
||||
return action_name in MOVEMAP.keys()
|
||||
|
||||
|
||||
@ -208,7 +243,18 @@ def asset_str(agent):
|
||||
|
||||
|
||||
def locate_and_import_class(class_name, folder_path: Union[str, PurePath] = ''):
|
||||
"""Locate an object by name or dotted path, importing as necessary."""
|
||||
"""
|
||||
Locate an object by name or dotted path.
|
||||
|
||||
:param class_name: The class name to be imported
|
||||
:type class_name: str
|
||||
|
||||
:param folder_path: The path to the module containing the class.
|
||||
:type folder_path: Union[str, PurePath]
|
||||
|
||||
:return: The imported module class.
|
||||
:raises AttributeError: If the specified class is not found in the provided folder path.
|
||||
"""
|
||||
import sys
|
||||
sys.path.append("../../environment")
|
||||
folder_path = Path(folder_path).resolve()
|
||||
@ -220,15 +266,15 @@ def locate_and_import_class(class_name, folder_path: Union[str, PurePath] = ''):
|
||||
for module_path in module_paths:
|
||||
module_parts = [x.replace('.py', '') for idx, x in enumerate(module_path.parts) if idx >= package_pos]
|
||||
mod = importlib.import_module('.'.join(module_parts))
|
||||
all_found_modules.extend([x for x in dir(mod) if (not(x.startswith('__') or len(x) <= 2) and x.istitle())
|
||||
and x not in ['Entity', 'NamedTuple', 'List', 'Rule', 'Union',
|
||||
all_found_modules.extend([x for x in dir(mod) if (not (x.startswith('__') or len(x) <= 2) and x.istitle())
|
||||
and x not in ['Entity', 'NamedTuple', 'List', 'Rule', 'Union',
|
||||
'TickResult', 'ActionResult', 'Action', 'Agent',
|
||||
'RenderEntity', 'TemplateRule', 'Objects', 'PositionMixin',
|
||||
'IsBoundMixin', 'EnvObject', 'EnvObjects', 'Dict', 'Any', 'Factory',
|
||||
'Move8']])
|
||||
try:
|
||||
model_class = mod.__getattribute__(class_name)
|
||||
return model_class
|
||||
module_class = mod.__getattribute__(class_name)
|
||||
return module_class
|
||||
except AttributeError:
|
||||
continue
|
||||
raise AttributeError(f'Class "{class_name}" was not found in "{folder_path.name}"', list(set(all_found_modules)))
|
||||
@ -244,16 +290,33 @@ def add_pos_name(name_str, bound_e):
|
||||
return name_str
|
||||
|
||||
|
||||
def get_first(iterable: Iterable, filter_by: Callable[[any], bool] = lambda _: True):
|
||||
def get_first(iterable: Iterable, filter_by: Callable[[any], bool] = lambda _: True) -> Any | None:
|
||||
"""
|
||||
Get the first element from an iterable that satisfies the specified condition.
|
||||
|
||||
:param iterable: The iterable to search.
|
||||
:type iterable: Iterable
|
||||
|
||||
:param filter_by: A function that filters elements, defaults to lambda _: True.
|
||||
:type filter_by: Callable[[Any], bool]
|
||||
|
||||
:return: The first element that satisfies the condition, or None if none is found.
|
||||
:rtype: Any
|
||||
"""
|
||||
return next((x for x in iterable if filter_by(x)), None)
|
||||
|
||||
|
||||
def get_first_index(iterable: Iterable, filter_by: Callable[[any], bool] = lambda _: True):
|
||||
def get_first_index(iterable: Iterable, filter_by: Callable[[any], bool] = lambda _: True) -> int | None:
|
||||
"""
|
||||
todo
|
||||
Get the index of the first element from an iterable that satisfies the specified condition.
|
||||
|
||||
:param iterable:
|
||||
:param filter_by:
|
||||
:return:
|
||||
:param iterable: The iterable to search.
|
||||
:type iterable: Iterable
|
||||
|
||||
:param filter_by: A function that filters elements, defaults to lambda _: True.
|
||||
:type filter_by: Callable[[Any], bool]
|
||||
|
||||
:return: The index of the first element that satisfies the condition, or None if none is found.
|
||||
:rtype: Optional[int]
|
||||
"""
|
||||
return next((idx for idx, x in enumerate(iterable) if filter_by(x)), None)
|
||||
|
@ -15,9 +15,24 @@ class LevelParser(object):
|
||||
|
||||
@property
|
||||
def pomdp_d(self):
|
||||
"""
|
||||
Internal Usage
|
||||
"""
|
||||
return self.pomdp_r * 2 + 1
|
||||
|
||||
def __init__(self, level_file_path: PathLike, entity_parse_dict: Dict[Entities, dict], pomdp_r=0):
|
||||
"""
|
||||
Parses a level file and creates the initial state of the environment.
|
||||
|
||||
:param level_file_path: Path to the level file.
|
||||
:type level_file_path: PathLike
|
||||
|
||||
:param entity_parse_dict: Dictionary specifying how to parse different entities.
|
||||
:type entity_parse_dict: Dict[Entities, dict]
|
||||
|
||||
:param pomdp_r: The POMDP radius. Defaults to 0.
|
||||
:type pomdp_r: int
|
||||
"""
|
||||
self.pomdp_r = pomdp_r
|
||||
self.e_p_dict = entity_parse_dict
|
||||
self._parsed_level = h.parse_level(Path(level_file_path))
|
||||
@ -25,14 +40,30 @@ class LevelParser(object):
|
||||
self.level_shape = level_array.shape
|
||||
self.size = self.pomdp_r ** 2 if self.pomdp_r else np.prod(self.level_shape)
|
||||
|
||||
def get_coordinates_for_symbol(self, symbol, negate=False):
|
||||
def get_coordinates_for_symbol(self, symbol, negate=False) -> np.ndarray:
|
||||
"""
|
||||
Get the coordinates for a given symbol in the parsed level.
|
||||
|
||||
:param symbol: The symbol to search for.
|
||||
:param negate: If True, get coordinates not matching the symbol. Defaults to False.
|
||||
|
||||
:return: Array of coordinates.
|
||||
:rtype: np.ndarray
|
||||
"""
|
||||
level_array = h.one_hot_level(self._parsed_level, symbol)
|
||||
if negate:
|
||||
return np.argwhere(level_array != c.VALUE_OCCUPIED_CELL)
|
||||
else:
|
||||
return np.argwhere(level_array == c.VALUE_OCCUPIED_CELL)
|
||||
|
||||
def do_init(self):
|
||||
def do_init(self) -> Entities:
|
||||
"""
|
||||
Initialize the environment map state by creating entities such as Walls, Agents or Machines according to the
|
||||
entity parse dict.
|
||||
|
||||
:return: A dict of all parsed entities with their positions.
|
||||
:rtype: Entities
|
||||
"""
|
||||
# Global Entities
|
||||
list_of_all_positions = ([tuple(f) for f in self.get_coordinates_for_symbol(c.SYMBOL_WALL, negate=True)])
|
||||
entities = Entities(list_of_all_positions)
|
||||
|
@ -3,7 +3,7 @@ from dataclasses import dataclass
|
||||
|
||||
from marl_factory_grid.environment.entity.object import Object
|
||||
|
||||
TYPE_VALUE = 'value'
|
||||
TYPE_VALUE = 'value'
|
||||
TYPE_REWARD = 'reward'
|
||||
TYPES = [TYPE_VALUE, TYPE_REWARD]
|
||||
|
||||
@ -11,10 +11,7 @@ TYPES = [TYPE_VALUE, TYPE_REWARD]
|
||||
@dataclass
|
||||
class InfoObject:
|
||||
"""
|
||||
TODO
|
||||
|
||||
|
||||
:return:
|
||||
Data class representing information about an entity or the global environment.
|
||||
"""
|
||||
identifier: str
|
||||
val_type: str
|
||||
@ -24,10 +21,14 @@ class InfoObject:
|
||||
@dataclass
|
||||
class Result:
|
||||
"""
|
||||
TODO
|
||||
A generic result class representing outcomes of operations or actions.
|
||||
|
||||
|
||||
:return:
|
||||
Attributes:
|
||||
- identifier: A unique identifier for the result.
|
||||
- validity: A boolean indicating whether the operation or action was successful.
|
||||
- reward: The reward associated with the result, if applicable.
|
||||
- value: The value associated with the result, if applicable.
|
||||
- entity: The entity associated with the result, if applicable.
|
||||
"""
|
||||
identifier: str
|
||||
validity: bool
|
||||
@ -36,6 +37,11 @@ class Result:
|
||||
entity: Object = None
|
||||
|
||||
def get_infos(self):
|
||||
"""
|
||||
Get information about the result.
|
||||
|
||||
:return: A list of InfoObject representing different types of information.
|
||||
"""
|
||||
n = self.entity.name if self.entity is not None else "Global"
|
||||
# Return multiple Info Dicts
|
||||
return [InfoObject(identifier=f'{n}_{self.identifier}',
|
||||
@ -50,32 +56,30 @@ class Result:
|
||||
return f'{self.__class__.__name__}({self.identifier.capitalize()} {valid}valid{reward}{value}{entity})'
|
||||
|
||||
|
||||
@dataclass
|
||||
class TickResult(Result):
|
||||
"""
|
||||
TODO
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ActionResult(Result):
|
||||
"""
|
||||
TODO
|
||||
A specific Result class representing outcomes of actions.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ActionResult(Result):
|
||||
pass
|
||||
|
||||
@dataclass
|
||||
class State(Result):
|
||||
# TODO: change identifiert to action/last_action
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class DoneResult(Result):
|
||||
"""
|
||||
A specific Result class representing the completion of an action or operation.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class State(Result):
|
||||
# TODO: change identifier to action/last_action
|
||||
pass
|
||||
|
||||
@dataclass
|
||||
class TickResult(Result):
|
||||
"""
|
||||
A specific Result class representing outcomes of tick operations.
|
||||
"""
|
||||
pass
|
||||
|
Loading…
x
Reference in New Issue
Block a user