Merge branch 'main' into refactor_rename

# Conflicts:
#	marl_factory_grid/modules/clean_up/groups.py
#	marl_factory_grid/modules/clean_up/rules.py
#	marl_factory_grid/modules/destinations/rules.py
This commit is contained in:
Chanumask
2023-10-27 13:12:54 +02:00
31 changed files with 332 additions and 105 deletions

View File

@ -60,7 +60,7 @@ Just define what your environment needs in a *yaml*-configfile like:
done_at_collisions: !!bool True
ItemRespawn:
spawn_freq: 5
DoorAutoClose: {}
DoDoorAutoClose: {}
Assets:
- Defaults

View File

@ -20,6 +20,14 @@ class Agent(Entity):
def var_can_move(self):
return True
@property
def var_is_paralyzed(self):
return len(self._paralyzed)
@property
def paralyze_reasons(self):
return [x for x in self._paralyzed]
@property
def var_is_blocking_pos(self):
return False
@ -57,6 +65,7 @@ class Agent(Entity):
def __init__(self, actions: List[Action], observations: List[str], *args, **kwargs):
super(Agent, self).__init__(*args, **kwargs)
self._paralyzed = set()
self.step_result = dict()
self._actions = actions
self._observations = observations
@ -75,6 +84,17 @@ class Agent(Entity):
def set_state(self, action_result):
self._state = action_result
def paralyze(self, reason):
self._paralyzed.add(reason)
return c.VALID
def de_paralyze(self, reason):
try:
self._paralyzed.remove(reason)
return c.VALID
except KeyError:
return c.NOT_VALID
def render(self):
i = next(idx for idx, x in enumerate(self._collection) if x.name == self.name)
curr_state = self.state

View File

@ -99,7 +99,7 @@ class Entity(_Object, abc.ABC):
try:
self.bind_to(bind_to)
except AttributeError:
print(f'Objects of {self.__class__.__name__} can not be bound to other entities.')
print(f'Objects of class "{self.__class__.__name__}" can not be bound to other entities.')
exit()
def summarize_state(self) -> dict:

View File

@ -37,7 +37,7 @@ class Entities(_Objects):
return[x for val in self.pos_dict[pos] for x in val if x.var_can_collide]
def empty_positions(self):
empty_positions= [key for key in self.floorlist if self.pos_dict[key]]
empty_positions = [key for key in self.floorlist if not self.pos_dict[key]]
shuffle(empty_positions)
return empty_positions

View File

@ -1,4 +1,4 @@
from .actions import BtryCharge
from .entitites import Pod, Battery
from .groups import ChargePods, Batteries
from .rules import BtryDoneAtDischarge, Btry
from .rules import BtryDoneAtDischarge, BatteryDecharge

View File

@ -1,16 +1,17 @@
from typing import Union
import marl_factory_grid.modules.batteries.constants
from marl_factory_grid.environment.actions import Action
from marl_factory_grid.utils.results import ActionResult
from marl_factory_grid.modules.batteries import constants as b, rewards as r
from marl_factory_grid.modules.batteries import constants as b
from marl_factory_grid.environment import constants as c
class BtryCharge(Action):
def __init__(self):
super().__init__(b.CHARGE)
super().__init__(b.ACTION_CHARGE)
def do(self, entity, state) -> Union[None, ActionResult]:
if charge_pod := state[b.CHARGE_PODS].by_pos(entity.pos):
@ -23,4 +24,4 @@ class BtryCharge(Action):
valid = c.NOT_VALID
state.print(f'{entity.name} failed to charged batteries at {entity.pos}.')
return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
reward=r.CHARGE_VALID if valid else r.CHARGE_FAIL)
reward=marl_factory_grid.modules.batteries.constants.REWARD_CHARGE_VALID if valid else marl_factory_grid.modules.batteries.constants.Reward_CHARGE_FAIL)

View File

@ -4,5 +4,14 @@ BATTERIES = 'Batteries'
BATTERY_DISCHARGED = 'DISCHARGED'
CHARGE_POD_SYMBOL = 1
ACTION_CHARGE = 'do_charge_action'
CHARGE = 'do_charge_action'
REWARD_CHARGE_VALID: float = 0.1
Reward_CHARGE_FAIL: float = -0.1
REWARD_BATTERY_DISCHARGED: float = -1.0
REWARD_DISCHARGE_DONE: float = -1.0
GROUPED = "single"
SINGLE = "grouped"
MODES = [GROUPED, SINGLE]

View File

@ -1,3 +0,0 @@
CHARGE_VALID: float = 0.1
CHARGE_FAIL: float = -0.1
BATTERY_DISCHARGED: float = -1.0

View File

@ -1,24 +1,58 @@
from typing import List, Union
import marl_factory_grid.modules.batteries.constants
from marl_factory_grid.environment.rules import Rule
from marl_factory_grid.utils.results import TickResult, DoneResult
from marl_factory_grid.environment import constants as c
from marl_factory_grid.modules.batteries import constants as b, rewards as r
from marl_factory_grid.modules.batteries import constants as b
class Btry(Rule):
class BatteryDecharge(Rule):
def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02):
def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02,
battery_charge_reward: float = b.REWARD_CHARGE_VALID,
battery_failed_reward: float = b.Reward_CHARGE_FAIL,
battery_discharge_reward: float = b.REWARD_BATTERY_DISCHARGED,
paralyze_agents_on_discharge: bool = False):
f"""
Enables the Battery Charge/Discharge functionality.
:type paralyze_agents_on_discharge: bool
:param paralyze_agents_on_discharge: Wether agents are still able to perform actions when discharged.
:type per_action_costs: Union[dict, float] = 0.02
:param per_action_costs: 1. dict: with an action name as key, provide a value for each
(maybe walking is less tedious as opening a door? Just saying...).
2. float: each action "costs" the same.
----
!!! Does not introduce any Env.-Done condition.
!!! Batterys can only be charged if agent posses the "Charge(Action.
!!! Batterys can only be charged if there are "Charpods" and they are spawned!
----
:type initial_charge: float
:param initial_charge: How much juice they have.
:type battery_discharge_reward: float
:param battery_discharge_reward: Negativ reward, when agents let their batters discharge.
Default: {b.REWARD_BATTERY_DISCHARGED}
:type battery_failed_reward: float
:param battery_failed_reward: Negativ reward, when agent cannot charge, but do (overcharge, not on station).
Default: {b.Reward_CHARGE_FAIL}
:type battery_charge_reward: float
:param battery_charge_reward: Positive reward, when agent actually charge their battery.
Default: {b.REWARD_CHARGE_VALID}
"""
super().__init__()
self.paralyze_agents_on_discharge = paralyze_agents_on_discharge
self.battery_discharge_reward = battery_discharge_reward
self.battery_failed_reward = battery_failed_reward
self.battery_charge_reward = battery_charge_reward
self.per_action_costs = per_action_costs
self.initial_charge = initial_charge
def on_init(self, state, lvl_map):
assert len(state[c.AGENT]), "There are no agents, did you already spawn them?"
state[b.BATTERIES].spawn(state[c.AGENT], self.initial_charge)
def tick_pre_step(self, state) -> List[TickResult]:
pass
def tick_step(self, state) -> List[TickResult]:
# Decharge
batteries = state[b.BATTERIES]
@ -42,27 +76,78 @@ class Btry(Rule):
if btry.is_discharged:
state.print(f'Battery of {btry.bound_entity.name} is discharged!')
results.append(
TickResult(self.name, entity=btry.bound_entity, reward=r.BATTERY_DISCHARGED, validity=c.VALID))
else:
pass
TickResult(self.name, entity=btry.bound_entity, reward=self.battery_discharge_reward,
validity=c.VALID)
)
if self.paralyze_agents_on_discharge:
btry.bound_entity.paralyze(self.name)
results.append(
TickResult("Paralyzed", entity=btry.bound_entity, reward=0, validity=c.VALID)
)
state.print(f'{btry.bound_entity.name} has just been paralyzed!')
if btry.bound_entity.var_is_paralyzed and not btry.is_discharged:
btry.bound_entity.de_paralyze(self.name)
results.append(
TickResult("De-Paralyzed", entity=btry.bound_entity, reward=0, validity=c.VALID)
)
state.print(f'{btry.bound_entity.name} has just been de-paralyzed!')
return results
class BtryDoneAtDischarge(Rule):
class BtryDoneAtDischarge(BatteryDecharge):
def __init__(self):
super().__init__()
def __init__(self, reward_discharge_done=b.REWARD_DISCHARGE_DONE, mode: str = b.SINGLE, **kwargs):
f"""
Enables the Battery Charge/Discharge functionality. Additionally
:type mode: str
:param mode: Does this Done rule trigger, when any battery is or all batteries are discharged?
:type per_action_costs: Union[dict, float] = 0.02
:param per_action_costs: 1. dict: with an action name as key, provide a value for each
(maybe walking is less tedious as opening a door? Just saying...).
2. float: each action "costs" the same.
:type initial_charge: float
:param initial_charge: How much juice they have.
:type reward_discharge_done: float
:param reward_discharge_done: Global negativ reward, when agents let their batters discharge.
Default: {b.REWARD_BATTERY_DISCHARGED}
:type battery_discharge_reward: float
:param battery_discharge_reward: Negativ reward, when agents let their batters discharge.
Default: {b.REWARD_BATTERY_DISCHARGED}
:type battery_failed_reward: float
:param battery_failed_reward: Negativ reward, when agent cannot charge, but do (overcharge, not on station).
Default: {b.Reward_CHARGE_FAIL}
:type battery_charge_reward: float
:param battery_charge_reward: Positive reward, when agent actually charge their battery.
Default: {b.REWARD_CHARGE_VALID}
"""
super().__init__(**kwargs)
self.mode = mode
self.reward_discharge_done = reward_discharge_done
def on_check_done(self, state) -> List[DoneResult]:
if btry_done := any(battery.is_discharged for battery in state[b.BATTERIES]):
return [DoneResult(self.name, validity=c.VALID, reward=r.BATTERY_DISCHARGED)]
any_discharged = (self.mode == b.SINGLE and any(battery.is_discharged for battery in state[b.BATTERIES]))
all_discharged = (self.mode == b.SINGLE and all(battery.is_discharged for battery in state[b.BATTERIES]))
if any_discharged or all_discharged:
return [DoneResult(self.name, validity=c.VALID, reward=self.reward_discharge_done)]
else:
return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]
class PodRules(Rule):
class SpawnChargePods(Rule):
def __init__(self, n_pods: int, charge_rate: float = 0.4, multi_charge: bool = False):
"""
Spawn Chargepods in accordance to the given parameters.
:type n_pods: int
:param n_pods: How many charge pods are there?
:type charge_rate: float
:param charge_rate: How much juice does each use of the charge action top up?
:type multi_charge: bool
:param multi_charge: Whether multiple agents are able to charge at the same time.
"""
super().__init__()
self.multi_charge = multi_charge
self.charge_rate = charge_rate
@ -73,5 +158,5 @@ class PodRules(Rule):
empty_positions = state.entities.empty_positions()
pods = pod_collection.from_coordinates(empty_positions, entity_kwargs=dict(
multi_charge=self.multi_charge, charge_rate=self.charge_rate)
)
)
pod_collection.add_items(pods)

View File

@ -1,4 +1,4 @@
from .actions import CleanUp
from .entitites import DirtPile
from .groups import DirtPiles
from .rules import DirtRespawnRule, DirtSmearOnMove, DirtAllCleanDone
from .rules import SpawnDirt, EntitiesSmearDirtOnMove, DoneOnAllDirtCleaned

View File

@ -32,11 +32,9 @@ class DirtPile(Entity):
# Edit this if you want items to be drawn in the ops differntly
return self._amount
def __init__(self, *args, max_local_amount=5, initial_amount=2, spawn_variation=0.05, **kwargs):
def __init__(self, *args, amount=2, max_local_amount=5, **kwargs):
super(DirtPile, self).__init__(*args, **kwargs)
self._amount = abs(initial_amount + (
random.normal(loc=0, scale=spawn_variation, size=1).item() * initial_amount)
)
self._amount = amount
self.max_local_amount = max_local_amount
def set_new_amount(self, amount):

View File

@ -1,4 +1,5 @@
from marl_factory_grid.environment import constants as c
from marl_factory_grid.utils.results import Result
from marl_factory_grid.environment.groups.collection import Collection
from marl_factory_grid.modules.clean_up.entitites import DirtPile
@ -27,42 +28,44 @@ class DirtPiles(Collection):
return sum([dirt.amount for dirt in self])
def __init__(self, *args,
initial_amount=2,
initial_dirt_ratio=0.05,
dirt_spawn_r_var=0.1,
max_local_amount=5,
clean_amount=1,
max_global_amount: int = 20, **kwargs):
super(DirtPiles, self).__init__(*args, **kwargs)
self.clean_amount = clean_amount
self.initial_amount = initial_amount
self.initial_dirt_ratio = initial_dirt_ratio
self.dirt_spawn_r_var = dirt_spawn_r_var
self.max_global_amount = max_global_amount
self.max_local_amount = max_local_amount
def spawn(self, then_dirty_positions, amount) -> bool:
for pos in then_dirty_positions:
def spawn(self, then_dirty_positions, amount_s) -> Result:
spawn_counter = 0
for idx, pos in enumerate(then_dirty_positions):
if not self.amount > self.max_global_amount:
amount = amount_s[idx] if isinstance(amount_s, list) else amount_s
if dirt := self.by_pos(pos):
new_value = dirt.amount + amount
dirt.set_new_amount(new_value)
else:
dirt = DirtPile(pos, initial_amount=amount, spawn_variation=self.dirt_spawn_r_var)
dirt = DirtPile(pos, amount=amount)
self.add_item(dirt)
spawn_counter += 1
else:
return c.NOT_VALID
return c.VALID
return Result(identifier=f'{self.name}_spawn', validity=c.NOT_VALID, reward=0,
value=spawn_counter)
return Result(identifier=f'{self.name}_spawn', validity=c.VALID, reward=0, value=spawn_counter)
def trigger_dirt_spawn(self, state, initial_spawn=False) -> bool:
def trigger_dirt_spawn(self, n, amount, state, n_var=0.2, amount_var=0.2) -> Result:
free_for_dirt = [x for x in state.entities.floorlist if len(state.entities.pos_dict[x]) == 1 or (
len(state.entities.pos_dict[x]) == 2 and isinstance(next(y for y in x), DirtPile))]
len(state.entities.pos_dict[x]) == 2 and isinstance(next(y for y in x), DirtPile))]
# free_for_dirt = [x for x in state[c.FLOOR]
# if len(x.guests) == 0 or (
# len(x.guests) == 1 and
# isinstance(next(y for y in x.guests), DirtPile))]
state.rng.shuffle(free_for_dirt)
var = self.dirt_spawn_r_var
new_spawn = abs(self.initial_dirt_ratio + (state.rng.uniform(-var, var) if initial_spawn else 0))
n_dirty_positions = max(0, int(new_spawn * len(free_for_dirt)))
return self.spawn(free_for_dirt[:n_dirty_positions], self.initial_amount)
new_spawn = int(abs(n + (state.rng.uniform(-n_var, n_var))))
new_amount_s = [abs(amount + (amount*state.rng.uniform(-amount_var, amount_var))) for _ in range(new_spawn)]
n_dirty_positions = free_for_dirt[:new_spawn]
return self.spawn(n_dirty_positions, new_amount_s)
def __repr__(self):
s = super(DirtPiles, self).__repr__()

View File

@ -7,53 +7,98 @@ from marl_factory_grid.utils.results import TickResult
from marl_factory_grid.utils.results import DoneResult
class DirtAllCleanDone(Rule):
class DoneOnAllDirtCleaned(Rule):
def __init__(self):
def __init__(self, reward: float = r.CLEAN_UP_ALL):
"""
Defines a 'Done'-condition which tirggers, when there is no more 'Dirt' in the environment.
:type reward: float
:parameter reward: Given reward when condition triggers.
"""
super().__init__()
self.reward = reward
def on_check_done(self, state) -> [DoneResult]:
if len(state[d.DIRT]) == 0 and state.curr_step:
return [DoneResult(validity=c.VALID, identifier=self.name, reward=r.CLEAN_UP_ALL)]
return [DoneResult(validity=c.VALID, identifier=self.name, reward=self.reward)]
return [DoneResult(validity=c.NOT_VALID, identifier=self.name, reward=0)]
class DirtRespawnRule(Rule):
class SpawnDirt(Rule):
def __init__(self, spawn_freq=15):
def __init__(self, initial_n: int, initial_amount: float, respawn_n: int, respawn_amount: float,
n_var: float = 0.2, amount_var: float = 0.2, spawn_freq: int = 15):
"""
Defines the spawn pattern of intial and additional 'Dirt'-entitites.
First chooses positions, then trys to spawn dirt until 'respawn_n' or the maximal global amount is reached.
If there is allready some, it is topped up to min(max_local_amount, amount).
:type spawn_freq: int
:parameter spawn_freq: In which frequency should this Rule try to spawn new 'Dirt'?
:type respawn_n: int
:parameter respawn_n: How many respawn positions are considered.
:type initial_n: int
:parameter initial_n: How much initial positions are considered.
:type amount_var: float
:parameter amount_var: Variance of amount to spawn.
:type n_var: float
:parameter n_var: Variance of n to spawn.
:type respawn_amount: float
:parameter respawn_amount: Defines how much dirt 'amount' is placed every 'spawn_freq' ticks.
:type initial_amount: float
:parameter initial_amount: Defines how much dirt 'amount' is initially placed.
"""
super().__init__()
self.amount_var = amount_var
self.n_var = n_var
self.respawn_amount = respawn_amount
self.respawn_n = respawn_n
self.initial_amount = initial_amount
self.initial_n = initial_n
self.spawn_freq = spawn_freq
self._next_dirt_spawn = spawn_freq
def on_init(self, state, lvl_map) -> str:
state[d.DIRT].trigger_dirt_spawn(state, initial_spawn=True)
return f'Initial Dirt was spawned on: {[x.pos for x in state[d.DIRT]]}'
result = state[d.DIRT].trigger_dirt_spawn(self.initial_n, self.initial_amount, state,
n_var=self.n_var, amount_var=self.amount_var)
state.print(f'Initial Dirt was spawned on: {[x.pos for x in state[d.DIRT]]}')
return result
def tick_step(self, state):
if self._next_dirt_spawn < 0:
pass # No DirtPile Spawn
elif not self._next_dirt_spawn:
validity = state[d.DIRT].trigger_dirt_spawn(state)
return [TickResult(entity=None, validity=validity, identifier=self.name, reward=0)]
result = [state[d.DIRT].trigger_dirt_spawn(self.respawn_n, self.respawn_amount, state,
n_var=self.n_var, amount_var=self.amount_var)]
self._next_dirt_spawn = self.spawn_freq
else:
self._next_dirt_spawn -= 1
return []
result = []
return result
class DirtSmearOnMove(Rule):
class EntitiesSmearDirtOnMove(Rule):
def __init__(self, smear_amount: float = 0.2):
def __init__(self, smear_ratio: float = 0.2):
"""
Enables 'smearing'. Entities that move through dirt, will leave a trail behind them.
They take dirt * smear_ratio of it with them to their next position.
:type smear_ratio: float
:parameter smear_ratio: How much percent dirt is smeared by entities to their next position.
"""
assert smear_ratio < 1, "'Smear Amount' must be smaller than 1"
super().__init__()
self.smear_amount = smear_amount
self.smear_ratio = smear_ratio
def tick_post_step(self, state):
results = list()
for entity in state.moving_entites:
if is_move(entity.state.identifier) and entity.state.validity == c.VALID:
if old_pos_dirt := state[d.DIRT].by_pos(entity.last_pos):
if smeared_dirt := round(old_pos_dirt.amount * self.smear_amount, 2):
if smeared_dirt := round(old_pos_dirt.amount * self.smear_ratio, 2):
if state[d.DIRT].spawn(entity.pos, amount=smeared_dirt):
results.append(TickResult(identifier=self.name, entity=entity,
reward=0, validity=c.VALID))

View File

@ -1,4 +1,4 @@
from .actions import DestAction
from .entitites import Destination
from .groups import Destinations
from .rules import DestinationReachAll, DestinationSpawn
from .rules import DoneAtDestinationReachAll, SpawnDestinations

View File

@ -1,9 +1,10 @@
from typing import Union
import marl_factory_grid.modules.destinations.constants
from marl_factory_grid.environment.actions import Action
from marl_factory_grid.utils.results import ActionResult
from marl_factory_grid.modules.destinations import constants as d, rewards as r
from marl_factory_grid.modules.destinations import constants as d
from marl_factory_grid.environment import constants as c
@ -20,4 +21,4 @@ class DestAction(Action):
valid = c.NOT_VALID
state.print(f'{entity.name} just tried to do_wait_action do_wait_action at {entity.pos} but failed')
return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
reward=r.WAIT_VALID if valid else r.WAIT_FAIL)
reward=marl_factory_grid.modules.destinations.constants.REWARD_WAIT_VALID if valid else marl_factory_grid.modules.destinations.constants.REWARD_WAIT_FAIL)

View File

@ -3,10 +3,11 @@
DESTINATION = 'Destinations'
DEST_SYMBOL = 1
WAIT_ON_DEST = 'WAIT'
MODE_SINGLE = 'SINGLE'
MODE_GROUPED = 'GROUPED'
SPAWN_MODES = [MODE_SINGLE, MODE_GROUPED]
DONE_ALL = 'DONE_ALL'
DONE_SINGLE = 'DONE_SINGLE'
REWARD_WAIT_VALID: float = 0.1
REWARD_WAIT_FAIL: float = -0.1
REWARD_DEST_REACHED: float = 1.0
REWARD_DEST_DONE: float = 5.0

View File

@ -1,3 +0,0 @@
WAIT_VALID: float = 0.1
WAIT_FAIL: float = -0.1
DEST_REACHED: float = 5.0

View File

@ -1,18 +1,29 @@
import ast
from random import shuffle
from typing import List, Dict, Tuple
import marl_factory_grid.modules.destinations.constants
from marl_factory_grid.environment.rules import Rule
from marl_factory_grid.utils.results import TickResult, DoneResult
from marl_factory_grid.environment import constants as c
from marl_factory_grid.modules.destinations import constants as d, rewards as r
from marl_factory_grid.modules.destinations import constants as d
from marl_factory_grid.modules.destinations.entitites import Destination
class DestinationReachAll(Rule):
class DestinationReachReward(Rule):
def __init__(self):
super(DestinationReachAll, self).__init__()
def __init__(self, dest_reach_reward=marl_factory_grid.modules.destinations.constants.REWARD_DEST_REACHED):
"""
This rule introduces the basic functionality, so that targts (Destinations) can be reached and marked as such.
Additionally, rewards are reported.
:type dest_reach_reward: float
:param dest_reach_reward: Specifies the reward, agents get at destination reach.
"""
super(DestinationReachReward, self).__init__()
self.reward = dest_reach_reward
def tick_step(self, state) -> List[TickResult]:
results = []
@ -33,31 +44,69 @@ class DestinationReachAll(Rule):
if reached:
state.print(f'{dest.name} is reached now, mark as reached...')
dest.mark_as_reached()
results.append(TickResult(self.name, validity=c.VALID, reward=r.DEST_REACHED, entity=agent))
results.append(TickResult(self.name, validity=c.VALID, reward=self.reward, entity=agent))
return results
class DoneAtDestinationReachAll(DestinationReachReward):
def __init__(self, reward_at_done=marl_factory_grid.modules.destinations.constants.REWARD_DEST_DONE, **kwargs):
"""
This rule triggers and sets the done flag if ALL Destinations have been reached.
:type reward_at_done: object
:param reward_at_done: Specifies the reward, agent get, whenn all destinations are reached.
:type dest_reach_reward: float
:param dest_reach_reward: Specify the reward, agents get when reaching a single destination.
"""
super(DoneAtDestinationReachAll, self).__init__(**kwargs)
self.reward = reward_at_done
def on_check_done(self, state) -> List[DoneResult]:
if all(x.was_reached() for x in state[d.DESTINATION]):
return [DoneResult(self.name, validity=c.VALID, reward=r.DEST_REACHED)]
return [DoneResult(self.name, validity=c.VALID, reward=self.reward)]
return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]
class DestinationReachAny(DestinationReachAll):
class DoneAtDestinationReachAny(DestinationReachReward):
def __init__(self):
super(DestinationReachAny, self).__init__()
def __init__(self, reward_at_done=d.REWARD_DEST_DONE, **kwargs):
f"""
This rule triggers and sets the done flag if ANY Destinations has been reached.
!!! IMPORTANT: 'reward_at_done' is shared between the agents; 'dest_reach_reward' is bound to a specific one.
:type reward_at_done: object
:param reward_at_done: Specifies the reward, all agent get, when any destinations has been reached.
Default {d.REWARD_DEST_DONE}
:type dest_reach_reward: float
:param dest_reach_reward: Specify a single agents reward forreaching a single destination.
Default {d.REWARD_DEST_REACHED}
"""
super(DoneAtDestinationReachAny, self).__init__(**kwargs)
self.reward = reward_at_done
def on_check_done(self, state) -> List[DoneResult]:
if any(x.was_reached() for x in state[d.DESTINATION]):
return [DoneResult(self.name, validity=c.VALID, reward=r.DEST_REACHED)]
return [DoneResult(self.name, validity=c.VALID, reward=marl_factory_grid.modules.destinations.constants.REWARD_DEST_REACHED)]
return []
class DestinationSpawn(Rule):
class SpawnDestinations(Rule):
def __init__(self, n_dests: int = 1, spawn_frequency: int = 5,
spawn_mode: str = d.MODE_GROUPED):
super(DestinationSpawn, self).__init__()
def __init__(self, n_dests: int = 1, spawn_mode: str = d.MODE_GROUPED):
f"""
Defines how destinations are initially spawned and respawned in addition.
!!! This rule introduces no kind of reward or Env.-Done condition!
:type n_dests: int
:param n_dests: How many destiantions should be maintained (and initally spawnewd) on the map?
:type spawn_mode: str
:param spawn_mode: One of {d.SPAWN_MODES}. {d.MODE_GROUPED}: Always wait for all Dstiantions do be gone,
then respawn after the given time. {d.MODE_SINGLE}: Just spawn every destination,
that has been reached, after the given time
"""
super(SpawnDestinations, self).__init__()
self.n_dests = n_dests
self.spawn_mode = spawn_mode
@ -81,8 +130,18 @@ class DestinationSpawn(Rule):
pass
class FixedDestinationSpawn(Rule):
class SpawnDestinationsPerAgent(Rule):
def __init__(self, per_agent_positions: Dict[str, List[Tuple[int, int]]]):
"""
Special rule, that spawn distinations, that are bound to a single agent a fixed set of positions.
Usefull for introducing specialists, etc. ..
!!! This rule does not introduce any reward or done condition.
:type per_agent_positions: Dict[str, List[Tuple[int, int]]
:param per_agent_positions: Please provide a dictionary with agent names as keys; and a list of possible
destiantion coords as value. Example: {Wolfgang: [(0, 0), (1, 1), ...]}
"""
super(Rule, self).__init__()
self.per_agent_positions = {key: [ast.literal_eval(x) for x in val] for key, val in per_agent_positions.items()}

View File

@ -1,4 +1,4 @@
from .actions import DoorUse
from .entitites import Door, DoorIndicator
from .groups import Doors
from .rules import DoorAutoClose, DoorIndicateArea
from .rules import DoDoorAutoClose, IndicateDoorAreaInObservation

View File

@ -1,13 +1,19 @@
from marl_factory_grid.environment.rules import Rule
from marl_factory_grid.environment import constants as c
from marl_factory_grid.environment.rules import Rule
from marl_factory_grid.utils.results import TickResult
from . import constants as d
from .entitites import DoorIndicator
class DoorAutoClose(Rule):
class DoDoorAutoClose(Rule):
def __init__(self, close_frequency: int = 10):
"""
This rule closes doors, that have been opened automatically, when no entity is blocking the position.
:type close_frequency: int
:param close_frequency: How many ticks after opening, should the door close?
"""
super().__init__()
self.close_frequency = close_frequency
@ -22,9 +28,16 @@ class DoorAutoClose(Rule):
return []
class DoorIndicateArea(Rule):
class IndicateDoorAreaInObservation(Rule):
def __init__(self):
"""
Whether to add an indicator aka place a value around doors (within the door tile), for agents to see.
This could improve learning.
"""
# Todo: Discuss if this rather be a single entity with a standalone observation.
# Could then be combined with the "Combine"-approach.
super().__init__()
def on_init(self, state, lvl_map):

View File

@ -27,9 +27,3 @@ class MachineRule(Rule):
def on_check_done(self, state) -> List[DoneResult]:
pass
class DoneOnBreakRule(Rule):
def on_check_done(self, state) -> List[DoneResult]:
pass

View File

@ -22,7 +22,7 @@ This file is used for:
"""
LEVELS_DIR = 'modules/levels' # for use in studies and experiments
LEVELS_DIR = 'levels' # for use in studies and experiments
STEPS_START = 1 # Define where to the stepcount; which is the first step
IGNORED_DF_COLUMNS = ['Episode', 'Run', # For plotting, which values are ignored when loading monitor files

View File

@ -88,11 +88,15 @@ class Gamestate(object):
results.extend(self.rules.tick_pre_step_all(self))
for idx, action_int in enumerate(actions):
agent = self[c.AGENT][idx].clear_temp_state()
action = agent.actions[action_int]
action_result = action.do(agent, self)
results.append(action_result)
agent.set_state(action_result)
if not agent.var_is_paralyzed:
agent = self[c.AGENT][idx].clear_temp_state()
action = agent.actions[action_int]
action_result = action.do(agent, self)
results.append(action_result)
agent.set_state(action_result)
else:
self.print(f"{agent.name} is paralied because of: {agent.paralyze_reasons}")
continue
results.extend(self.rules.tick_step_all(self))
results.extend(self.rules.tick_post_step_all(self))