Merge branch 'main' into refactor_rename

# Conflicts: # marl_factory_grid/modules/clean_up/groups.py # marl_factory_grid/modules/clean_up/rules.py # marl_factory_grid/modules/destinations/rules.py
2025-07-07 01:51:35 +02:00 · 2023-10-27 13:12:54 +02:00
parent b13dff925b ac557232a1
commit 0e5b18c812
31 changed files with 332 additions and 105 deletions
--- a/README.md
+++ b/README.md
@ -60,7 +60,7 @@ Just define what your environment needs in a *yaml*-configfile like:
            done_at_collisions: !!bool True
        ItemRespawn:
            spawn_freq: 5
-        DoorAutoClose: {}
+        DoDoorAutoClose: {}

    Assets:
    - Defaults
--- a/marl_factory_grid/environment/entity/agent.py
+++ b/marl_factory_grid/environment/entity/agent.py
@ -20,6 +20,14 @@ class Agent(Entity):
    def var_can_move(self):
        return True

+    @property
+    def var_is_paralyzed(self):
+        return len(self._paralyzed)
+
+    @property
+    def paralyze_reasons(self):
+        return [x for x in self._paralyzed]
+
    @property
    def var_is_blocking_pos(self):
        return False
@ -57,6 +65,7 @@ class Agent(Entity):

    def __init__(self, actions: List[Action], observations: List[str], *args, **kwargs):
        super(Agent, self).__init__(*args, **kwargs)
+        self._paralyzed = set()
        self.step_result = dict()
        self._actions = actions
        self._observations = observations
@ -75,6 +84,17 @@ class Agent(Entity):
    def set_state(self, action_result):
        self._state = action_result

+    def paralyze(self, reason):
+        self._paralyzed.add(reason)
+        return c.VALID
+
+    def de_paralyze(self, reason):
+        try:
+            self._paralyzed.remove(reason)
+            return c.VALID
+        except KeyError:
+            return c.NOT_VALID
+
    def render(self):
        i = next(idx for idx, x in enumerate(self._collection) if x.name == self.name)
        curr_state = self.state
--- a/marl_factory_grid/environment/entity/entity.py
+++ b/marl_factory_grid/environment/entity/entity.py
@ -99,7 +99,7 @@ class Entity(_Object, abc.ABC):
            try:
                self.bind_to(bind_to)
            except AttributeError:
-                print(f'Objects of {self.__class__.__name__} can not be bound to other entities.')
+                print(f'Objects of class "{self.__class__.__name__}" can not be bound to other entities.')
                exit()

    def summarize_state(self) -> dict:
--- a/marl_factory_grid/environment/groups/global_entities.py
+++ b/marl_factory_grid/environment/groups/global_entities.py
@ -37,7 +37,7 @@ class Entities(_Objects):
        return[x for val in self.pos_dict[pos] for x in val if x.var_can_collide]

    def empty_positions(self):
-        empty_positions= [key for key in self.floorlist if self.pos_dict[key]]
+        empty_positions = [key for key in self.floorlist if not self.pos_dict[key]]
        shuffle(empty_positions)
        return empty_positions

--- a/marl_factory_grid/modules/levels/init.py
+++ b/marl_factory_grid/modules/levels/init.py
--- a/marl_factory_grid/modules/levels/large.txt
+++ b/marl_factory_grid/modules/levels/large.txt
--- a/marl_factory_grid/modules/levels/large_qquad.txt
+++ b/marl_factory_grid/modules/levels/large_qquad.txt
--- a/marl_factory_grid/modules/levels/narrow_corridor.txt
+++ b/marl_factory_grid/modules/levels/narrow_corridor.txt
--- a/marl_factory_grid/modules/levels/rooms.txt
+++ b/marl_factory_grid/modules/levels/rooms.txt
--- a/marl_factory_grid/modules/levels/shelves.txt
+++ b/marl_factory_grid/modules/levels/shelves.txt
--- a/marl_factory_grid/modules/levels/simple.txt
+++ b/marl_factory_grid/modules/levels/simple.txt
--- a/marl_factory_grid/modules/levels/two_rooms.txt
+++ b/marl_factory_grid/modules/levels/two_rooms.txt
--- a/marl_factory_grid/modules/batteries/init.py
+++ b/marl_factory_grid/modules/batteries/init.py
@ -1,4 +1,4 @@
 from .actions import BtryCharge
 from .entitites import Pod, Battery
 from .groups import ChargePods, Batteries
-from .rules import BtryDoneAtDischarge, Btry
+from .rules import BtryDoneAtDischarge, BatteryDecharge
--- a/marl_factory_grid/modules/batteries/actions.py
+++ b/marl_factory_grid/modules/batteries/actions.py
@ -1,16 +1,17 @@
 from typing import Union

+import marl_factory_grid.modules.batteries.constants
 from marl_factory_grid.environment.actions import Action
 from marl_factory_grid.utils.results import ActionResult

-from marl_factory_grid.modules.batteries import constants as b, rewards as r
+from marl_factory_grid.modules.batteries import constants as b
 from marl_factory_grid.environment import constants as c


 class BtryCharge(Action):

    def __init__(self):
-        super().__init__(b.CHARGE)
+        super().__init__(b.ACTION_CHARGE)

    def do(self, entity, state) -> Union[None, ActionResult]:
        if charge_pod := state[b.CHARGE_PODS].by_pos(entity.pos):
@ -23,4 +24,4 @@ class BtryCharge(Action):
            valid = c.NOT_VALID
            state.print(f'{entity.name} failed to charged batteries at {entity.pos}.')
        return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
-                            reward=r.CHARGE_VALID if valid else r.CHARGE_FAIL)
+                            reward=marl_factory_grid.modules.batteries.constants.REWARD_CHARGE_VALID if valid else marl_factory_grid.modules.batteries.constants.Reward_CHARGE_FAIL)
--- a/marl_factory_grid/modules/batteries/constants.py
+++ b/marl_factory_grid/modules/batteries/constants.py
@ -4,5 +4,14 @@ BATTERIES            = 'Batteries'
 BATTERY_DISCHARGED   = 'DISCHARGED'
 CHARGE_POD_SYMBOL    = 1

+ACTION_CHARGE                    = 'do_charge_action'

-CHARGE              = 'do_charge_action'
+REWARD_CHARGE_VALID: float       = 0.1
+Reward_CHARGE_FAIL: float        = -0.1
+REWARD_BATTERY_DISCHARGED: float = -1.0
+REWARD_DISCHARGE_DONE: float     = -1.0
+
+
+GROUPED = "single"
+SINGLE  = "grouped"
+MODES = [GROUPED, SINGLE]
--- a/marl_factory_grid/modules/batteries/rewards.py
+++ b/marl_factory_grid/modules/batteries/rewards.py
@ -1,3 +0,0 @@
-CHARGE_VALID: float = 0.1
-CHARGE_FAIL: float = -0.1
-BATTERY_DISCHARGED: float = -1.0
--- a/marl_factory_grid/modules/batteries/rules.py
+++ b/marl_factory_grid/modules/batteries/rules.py
@ -1,24 +1,58 @@
 from typing import List, Union
+
+import marl_factory_grid.modules.batteries.constants
 from marl_factory_grid.environment.rules import Rule
 from marl_factory_grid.utils.results import TickResult, DoneResult

 from marl_factory_grid.environment import constants as c
-from marl_factory_grid.modules.batteries import constants as b, rewards as r
+from marl_factory_grid.modules.batteries import constants as b


-class Btry(Rule):
+class BatteryDecharge(Rule):

-    def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02):
+    def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02,
+                 battery_charge_reward: float = b.REWARD_CHARGE_VALID,
+                 battery_failed_reward: float = b.Reward_CHARGE_FAIL,
+                 battery_discharge_reward: float = b.REWARD_BATTERY_DISCHARGED,
+                 paralyze_agents_on_discharge: bool = False):
+        f"""
+        Enables the Battery Charge/Discharge functionality.
+
+        :type paralyze_agents_on_discharge: bool
+        :param paralyze_agents_on_discharge: Wether agents are still able to perform actions when discharged.
+        :type per_action_costs: Union[dict, float] = 0.02
+        :param per_action_costs: 1. dict: with an action name as key, provide a value for each 
+                                    (maybe walking is less tedious as opening a door? Just saying...).
+                                 2. float: each action "costs" the same.
+        ----                         
+         !!! Does not introduce any Env.-Done condition. 
+         !!! Batterys can only be charged if agent posses the "Charge(Action.                 
+         !!! Batterys can only be charged if there are "Charpods" and they are spawned!                      
+        ----                         
+        :type initial_charge: float
+        :param initial_charge: How much juice they have.
+        :type battery_discharge_reward: float
+        :param battery_discharge_reward: Negativ reward, when agents let their batters discharge. 
+                                         Default: {b.REWARD_BATTERY_DISCHARGED}
+        :type battery_failed_reward: float
+        :param battery_failed_reward: Negativ reward, when agent cannot charge, but do (overcharge, not on station).
+                                       Default: {b.Reward_CHARGE_FAIL}
+        :type battery_charge_reward: float
+        :param battery_charge_reward: Positive reward, when agent actually charge their battery.
+                                       Default: {b.REWARD_CHARGE_VALID}
+        """
        super().__init__()
+        self.paralyze_agents_on_discharge = paralyze_agents_on_discharge
+        self.battery_discharge_reward = battery_discharge_reward
+        self.battery_failed_reward = battery_failed_reward
+        self.battery_charge_reward = battery_charge_reward
        self.per_action_costs = per_action_costs
        self.initial_charge = initial_charge

    def on_init(self, state, lvl_map):
+        assert len(state[c.AGENT]), "There are no agents, did you already spawn them?"
        state[b.BATTERIES].spawn(state[c.AGENT], self.initial_charge)

-    def tick_pre_step(self, state) -> List[TickResult]:
-        pass
-
    def tick_step(self, state) -> List[TickResult]:
        # Decharge
        batteries = state[b.BATTERIES]
@ -42,27 +76,78 @@ class Btry(Rule):
            if btry.is_discharged:
                state.print(f'Battery of {btry.bound_entity.name} is discharged!')
                results.append(
-                    TickResult(self.name, entity=btry.bound_entity, reward=r.BATTERY_DISCHARGED, validity=c.VALID))
-        else:
-            pass
+                    TickResult(self.name, entity=btry.bound_entity, reward=self.battery_discharge_reward,
+                               validity=c.VALID)
+                )
+                if self.paralyze_agents_on_discharge:
+                    btry.bound_entity.paralyze(self.name)
+                    results.append(
+                        TickResult("Paralyzed", entity=btry.bound_entity, reward=0, validity=c.VALID)
+                    )
+                    state.print(f'{btry.bound_entity.name} has just been paralyzed!')
+            if btry.bound_entity.var_is_paralyzed and not btry.is_discharged:
+                btry.bound_entity.de_paralyze(self.name)
+                results.append(
+                    TickResult("De-Paralyzed", entity=btry.bound_entity, reward=0, validity=c.VALID)
+                )
+                state.print(f'{btry.bound_entity.name} has just been de-paralyzed!')
        return results


-class BtryDoneAtDischarge(Rule):
+class BtryDoneAtDischarge(BatteryDecharge):

-    def __init__(self):
-        super().__init__()
+    def __init__(self, reward_discharge_done=b.REWARD_DISCHARGE_DONE, mode: str = b.SINGLE, **kwargs):
+        f"""
+        Enables the Battery Charge/Discharge functionality. Additionally 
+
+        :type mode: str
+        :param mode: Does this Done rule trigger, when any battery is or all batteries are discharged? 
+        :type per_action_costs: Union[dict, float] = 0.02
+        :param per_action_costs: 1. dict: with an action name as key, provide a value for each 
+                                    (maybe walking is less tedious as opening a door? Just saying...).
+                                 2. float: each action "costs" the same.
+                                 
+        :type initial_charge: float
+        :param initial_charge: How much juice they have.
+        :type reward_discharge_done: float
+        :param reward_discharge_done: Global negativ reward, when agents let their batters discharge. 
+                                         Default: {b.REWARD_BATTERY_DISCHARGED}
+        :type battery_discharge_reward: float
+        :param battery_discharge_reward: Negativ reward, when agents let their batters discharge. 
+                                         Default: {b.REWARD_BATTERY_DISCHARGED}
+        :type battery_failed_reward: float
+        :param battery_failed_reward: Negativ reward, when agent cannot charge, but do (overcharge, not on station).
+                                       Default: {b.Reward_CHARGE_FAIL}
+        :type battery_charge_reward: float
+        :param battery_charge_reward: Positive reward, when agent actually charge their battery.
+                                       Default: {b.REWARD_CHARGE_VALID}
+        """
+        super().__init__(**kwargs)
+        self.mode = mode
+        self.reward_discharge_done = reward_discharge_done

    def on_check_done(self, state) -> List[DoneResult]:
-        if btry_done := any(battery.is_discharged for battery in state[b.BATTERIES]):
-            return [DoneResult(self.name, validity=c.VALID, reward=r.BATTERY_DISCHARGED)]
+        any_discharged = (self.mode == b.SINGLE and any(battery.is_discharged for battery in state[b.BATTERIES]))
+        all_discharged = (self.mode == b.SINGLE and all(battery.is_discharged for battery in state[b.BATTERIES]))
+        if any_discharged or all_discharged:
+            return [DoneResult(self.name, validity=c.VALID, reward=self.reward_discharge_done)]
        else:
            return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]


-class PodRules(Rule):
+class SpawnChargePods(Rule):

    def __init__(self, n_pods: int, charge_rate: float = 0.4, multi_charge: bool = False):
+        """
+        Spawn Chargepods in accordance to the given parameters.
+
+        :type n_pods: int
+        :param n_pods: How many charge pods are there?
+        :type charge_rate: float
+        :param charge_rate: How much juice does each use of the charge action top up?
+        :type multi_charge: bool
+        :param multi_charge: Whether multiple agents are able to charge at the same time.
+        """
        super().__init__()
        self.multi_charge = multi_charge
        self.charge_rate = charge_rate
@ -73,5 +158,5 @@ class PodRules(Rule):
        empty_positions = state.entities.empty_positions()
        pods = pod_collection.from_coordinates(empty_positions, entity_kwargs=dict(
            multi_charge=self.multi_charge, charge_rate=self.charge_rate)
-                                         )
+                                               )
        pod_collection.add_items(pods)
--- a/marl_factory_grid/modules/clean_up/init.py
+++ b/marl_factory_grid/modules/clean_up/init.py
@ -1,4 +1,4 @@
 from .actions import CleanUp
 from .entitites import DirtPile
 from .groups import DirtPiles
-from .rules import DirtRespawnRule, DirtSmearOnMove, DirtAllCleanDone
+from .rules import SpawnDirt, EntitiesSmearDirtOnMove, DoneOnAllDirtCleaned
--- a/marl_factory_grid/modules/clean_up/entitites.py
+++ b/marl_factory_grid/modules/clean_up/entitites.py
@ -32,11 +32,9 @@ class DirtPile(Entity):
        # Edit this if you want items to be drawn in the ops differntly
        return self._amount

-    def __init__(self, *args, max_local_amount=5, initial_amount=2, spawn_variation=0.05, **kwargs):
+    def __init__(self, *args, amount=2, max_local_amount=5, **kwargs):
        super(DirtPile, self).__init__(*args, **kwargs)
-        self._amount = abs(initial_amount + (
-                random.normal(loc=0, scale=spawn_variation, size=1).item() * initial_amount)
-                           )
+        self._amount = amount
        self.max_local_amount = max_local_amount

    def set_new_amount(self, amount):
--- a/marl_factory_grid/modules/clean_up/groups.py
+++ b/marl_factory_grid/modules/clean_up/groups.py
@ -1,4 +1,5 @@
 from marl_factory_grid.environment import constants as c
+from marl_factory_grid.utils.results import Result
 from marl_factory_grid.environment.groups.collection import Collection
 from marl_factory_grid.modules.clean_up.entitites import DirtPile

@ -27,42 +28,44 @@ class DirtPiles(Collection):
        return sum([dirt.amount for dirt in self])

    def __init__(self, *args,
-                 initial_amount=2,
-                 initial_dirt_ratio=0.05,
-                 dirt_spawn_r_var=0.1,
                 max_local_amount=5,
                 clean_amount=1,
                 max_global_amount: int = 20, **kwargs):
        super(DirtPiles, self).__init__(*args, **kwargs)
        self.clean_amount = clean_amount
-        self.initial_amount = initial_amount
-        self.initial_dirt_ratio = initial_dirt_ratio
-        self.dirt_spawn_r_var = dirt_spawn_r_var
        self.max_global_amount = max_global_amount
        self.max_local_amount = max_local_amount

-    def spawn(self, then_dirty_positions, amount) -> bool:
-        for pos in then_dirty_positions:
+    def spawn(self, then_dirty_positions, amount_s) -> Result:
+        spawn_counter = 0
+        for idx, pos in enumerate(then_dirty_positions):
            if not self.amount > self.max_global_amount:
+                amount = amount_s[idx] if isinstance(amount_s, list) else amount_s
                if dirt := self.by_pos(pos):
                    new_value = dirt.amount + amount
                    dirt.set_new_amount(new_value)
                else:
-                    dirt = DirtPile(pos, initial_amount=amount, spawn_variation=self.dirt_spawn_r_var)
+                    dirt = DirtPile(pos, amount=amount)
                    self.add_item(dirt)
+                    spawn_counter += 1
            else:
-                return c.NOT_VALID
-        return c.VALID
+                return Result(identifier=f'{self.name}_spawn', validity=c.NOT_VALID, reward=0,
+                              value=spawn_counter)
+        return Result(identifier=f'{self.name}_spawn', validity=c.VALID, reward=0, value=spawn_counter)

-    def trigger_dirt_spawn(self, state, initial_spawn=False) -> bool:
+    def trigger_dirt_spawn(self, n, amount, state, n_var=0.2, amount_var=0.2) -> Result:
        free_for_dirt = [x for x in state.entities.floorlist if len(state.entities.pos_dict[x]) == 1 or (
-                    len(state.entities.pos_dict[x]) == 2 and isinstance(next(y for y in x), DirtPile))]
+                len(state.entities.pos_dict[x]) == 2 and isinstance(next(y for y in x), DirtPile))]
+        # free_for_dirt = [x for x in state[c.FLOOR]
+        #                  if len(x.guests) == 0 or (
+        #                          len(x.guests) == 1 and
+        #                          isinstance(next(y for y in x.guests), DirtPile))]
        state.rng.shuffle(free_for_dirt)

-        var = self.dirt_spawn_r_var
-        new_spawn = abs(self.initial_dirt_ratio + (state.rng.uniform(-var, var) if initial_spawn else 0))
-        n_dirty_positions = max(0, int(new_spawn * len(free_for_dirt)))
-        return self.spawn(free_for_dirt[:n_dirty_positions], self.initial_amount)
+        new_spawn = int(abs(n + (state.rng.uniform(-n_var, n_var))))
+        new_amount_s = [abs(amount + (amount*state.rng.uniform(-amount_var, amount_var))) for _ in range(new_spawn)]
+        n_dirty_positions = free_for_dirt[:new_spawn]
+        return self.spawn(n_dirty_positions, new_amount_s)

    def __repr__(self):
        s = super(DirtPiles, self).__repr__()
--- a/marl_factory_grid/modules/clean_up/rules.py
+++ b/marl_factory_grid/modules/clean_up/rules.py
@ -7,53 +7,98 @@ from marl_factory_grid.utils.results import TickResult
 from marl_factory_grid.utils.results import DoneResult


-class DirtAllCleanDone(Rule):
+class DoneOnAllDirtCleaned(Rule):

-    def __init__(self):
+    def __init__(self, reward: float = r.CLEAN_UP_ALL):
+        """
+        Defines a 'Done'-condition which tirggers, when there is no more 'Dirt' in the environment.
+
+        :type reward: float
+        :parameter reward: Given reward when condition triggers.
+        """
        super().__init__()
+        self.reward = reward

    def on_check_done(self, state) -> [DoneResult]:
        if len(state[d.DIRT]) == 0 and state.curr_step:
-            return [DoneResult(validity=c.VALID, identifier=self.name, reward=r.CLEAN_UP_ALL)]
+            return [DoneResult(validity=c.VALID, identifier=self.name, reward=self.reward)]
        return [DoneResult(validity=c.NOT_VALID, identifier=self.name, reward=0)]


-class DirtRespawnRule(Rule):
+class SpawnDirt(Rule):

-    def __init__(self, spawn_freq=15):
+    def __init__(self, initial_n: int, initial_amount: float, respawn_n: int, respawn_amount: float,
+                 n_var: float = 0.2, amount_var: float = 0.2, spawn_freq: int = 15):
+        """
+        Defines the spawn pattern of intial and additional 'Dirt'-entitites.
+        First chooses positions, then trys to spawn dirt until 'respawn_n' or the maximal global amount is reached.
+        If there is allready some, it is topped up to min(max_local_amount, amount).
+
+        :type spawn_freq: int
+        :parameter spawn_freq: In which frequency should this Rule try to spawn new 'Dirt'?
+        :type respawn_n: int
+        :parameter respawn_n: How many respawn positions are considered.
+        :type initial_n: int
+        :parameter initial_n: How much initial positions are considered.
+        :type amount_var: float
+        :parameter amount_var: Variance of amount to spawn.
+        :type n_var: float
+        :parameter n_var: Variance of n to spawn.
+        :type respawn_amount: float
+        :parameter respawn_amount: Defines how much dirt 'amount' is placed every 'spawn_freq' ticks.
+        :type initial_amount: float
+        :parameter initial_amount: Defines how much dirt 'amount' is initially placed.
+
+        """
        super().__init__()
+        self.amount_var = amount_var
+        self.n_var = n_var
+        self.respawn_amount = respawn_amount
+        self.respawn_n = respawn_n
+        self.initial_amount = initial_amount
+        self.initial_n = initial_n
        self.spawn_freq = spawn_freq
        self._next_dirt_spawn = spawn_freq

    def on_init(self, state, lvl_map) -> str:
-        state[d.DIRT].trigger_dirt_spawn(state, initial_spawn=True)
-        return f'Initial Dirt was spawned on: {[x.pos for x in state[d.DIRT]]}'
+        result = state[d.DIRT].trigger_dirt_spawn(self.initial_n, self.initial_amount, state,
+                                                  n_var=self.n_var, amount_var=self.amount_var)
+        state.print(f'Initial Dirt was spawned on: {[x.pos for x in state[d.DIRT]]}')
+        return result

    def tick_step(self, state):
        if self._next_dirt_spawn < 0:
            pass  # No DirtPile Spawn
        elif not self._next_dirt_spawn:
-            validity = state[d.DIRT].trigger_dirt_spawn(state)
-
-            return [TickResult(entity=None, validity=validity, identifier=self.name, reward=0)]
+            result = [state[d.DIRT].trigger_dirt_spawn(self.respawn_n, self.respawn_amount, state,
+                                                       n_var=self.n_var, amount_var=self.amount_var)]
            self._next_dirt_spawn = self.spawn_freq
        else:
            self._next_dirt_spawn -= 1
-        return []
+            result = []
+        return result


-class DirtSmearOnMove(Rule):
+class EntitiesSmearDirtOnMove(Rule):

-    def __init__(self, smear_amount: float = 0.2):
+    def __init__(self, smear_ratio: float = 0.2):
+        """
+        Enables 'smearing'. Entities that move through dirt, will leave a trail behind them.
+        They take dirt * smear_ratio of it with them to their next position.
+
+        :type smear_ratio: float
+        :parameter smear_ratio: How much percent dirt is smeared by entities to their next position.
+        """
+        assert smear_ratio < 1, "'Smear Amount' must be smaller than 1"
        super().__init__()
-        self.smear_amount = smear_amount
+        self.smear_ratio = smear_ratio

    def tick_post_step(self, state):
        results = list()
        for entity in state.moving_entites:
            if is_move(entity.state.identifier) and entity.state.validity == c.VALID:
                if old_pos_dirt := state[d.DIRT].by_pos(entity.last_pos):
-                    if smeared_dirt := round(old_pos_dirt.amount * self.smear_amount, 2):
+                    if smeared_dirt := round(old_pos_dirt.amount * self.smear_ratio, 2):
                        if state[d.DIRT].spawn(entity.pos, amount=smeared_dirt):
                            results.append(TickResult(identifier=self.name, entity=entity,
                                                      reward=0, validity=c.VALID))
--- a/marl_factory_grid/modules/destinations/init.py
+++ b/marl_factory_grid/modules/destinations/init.py
@ -1,4 +1,4 @@
 from .actions import DestAction
 from .entitites import Destination
 from .groups import Destinations
-from .rules import DestinationReachAll, DestinationSpawn
+from .rules import DoneAtDestinationReachAll, SpawnDestinations
--- a/marl_factory_grid/modules/destinations/actions.py
+++ b/marl_factory_grid/modules/destinations/actions.py
@ -1,9 +1,10 @@
 from typing import Union

+import marl_factory_grid.modules.destinations.constants
 from marl_factory_grid.environment.actions import Action
 from marl_factory_grid.utils.results import ActionResult

-from marl_factory_grid.modules.destinations import constants as d, rewards as r
+from marl_factory_grid.modules.destinations import constants as d
 from marl_factory_grid.environment import constants as c


@ -20,4 +21,4 @@ class DestAction(Action):
            valid = c.NOT_VALID
            state.print(f'{entity.name} just tried to do_wait_action do_wait_action at {entity.pos} but failed')
        return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
-                            reward=r.WAIT_VALID if valid else r.WAIT_FAIL)
+                            reward=marl_factory_grid.modules.destinations.constants.REWARD_WAIT_VALID if valid else marl_factory_grid.modules.destinations.constants.REWARD_WAIT_FAIL)
--- a/marl_factory_grid/modules/destinations/constants.py
+++ b/marl_factory_grid/modules/destinations/constants.py
@ -3,10 +3,11 @@
 DESTINATION      = 'Destinations'
 DEST_SYMBOL      = 1

-WAIT_ON_DEST     = 'WAIT'
-
 MODE_SINGLE      = 'SINGLE'
 MODE_GROUPED     = 'GROUPED'
+SPAWN_MODES      = [MODE_SINGLE, MODE_GROUPED]

-DONE_ALL         = 'DONE_ALL'
-DONE_SINGLE      = 'DONE_SINGLE'
+REWARD_WAIT_VALID:      float  = 0.1
+REWARD_WAIT_FAIL:       float  = -0.1
+REWARD_DEST_REACHED:    float  = 1.0
+REWARD_DEST_DONE:       float  = 5.0
--- a/marl_factory_grid/modules/destinations/rewards.py
+++ b/marl_factory_grid/modules/destinations/rewards.py
@ -1,3 +0,0 @@
-WAIT_VALID: float = 0.1
-WAIT_FAIL: float = -0.1
-DEST_REACHED: float = 5.0
--- a/marl_factory_grid/modules/destinations/rules.py
+++ b/marl_factory_grid/modules/destinations/rules.py
@ -1,18 +1,29 @@
 import ast
 from random import shuffle
 from typing import List, Dict, Tuple
+
+import marl_factory_grid.modules.destinations.constants
 from marl_factory_grid.environment.rules import Rule
 from marl_factory_grid.utils.results import TickResult, DoneResult
 from marl_factory_grid.environment import constants as c

-from marl_factory_grid.modules.destinations import constants as d, rewards as r
+from marl_factory_grid.modules.destinations import constants as d
 from marl_factory_grid.modules.destinations.entitites import Destination


-class DestinationReachAll(Rule):
+class DestinationReachReward(Rule):

-    def __init__(self):
-        super(DestinationReachAll, self).__init__()
+    def __init__(self, dest_reach_reward=marl_factory_grid.modules.destinations.constants.REWARD_DEST_REACHED):
+        """
+        This rule introduces the basic functionality, so that targts (Destinations) can be reached and marked as such.
+        Additionally, rewards are reported.
+
+        :type dest_reach_reward: float
+        :param dest_reach_reward: Specifies the reward, agents get at destination reach.
+
+        """
+        super(DestinationReachReward, self).__init__()
+        self.reward = dest_reach_reward

    def tick_step(self, state) -> List[TickResult]:
        results = []
@ -33,31 +44,69 @@ class DestinationReachAll(Rule):
            if reached:
                state.print(f'{dest.name} is reached now, mark as reached...')
                dest.mark_as_reached()
-                results.append(TickResult(self.name, validity=c.VALID, reward=r.DEST_REACHED, entity=agent))
+                results.append(TickResult(self.name, validity=c.VALID, reward=self.reward, entity=agent))
        return results

+
+class DoneAtDestinationReachAll(DestinationReachReward):
+
+    def __init__(self, reward_at_done=marl_factory_grid.modules.destinations.constants.REWARD_DEST_DONE, **kwargs):
+        """
+        This rule triggers and sets the done flag if ALL Destinations have been reached.
+
+        :type reward_at_done: object
+        :param reward_at_done: Specifies the reward, agent get, whenn all destinations are reached.
+        :type dest_reach_reward: float
+        :param dest_reach_reward: Specify the reward, agents get when reaching a single destination.
+        """
+        super(DoneAtDestinationReachAll, self).__init__(**kwargs)
+        self.reward = reward_at_done
+
    def on_check_done(self, state) -> List[DoneResult]:
        if all(x.was_reached() for x in state[d.DESTINATION]):
-            return [DoneResult(self.name, validity=c.VALID, reward=r.DEST_REACHED)]
+            return [DoneResult(self.name, validity=c.VALID, reward=self.reward)]
        return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]


-class DestinationReachAny(DestinationReachAll):
+class DoneAtDestinationReachAny(DestinationReachReward):

-    def __init__(self):
-        super(DestinationReachAny, self).__init__()
+    def __init__(self, reward_at_done=d.REWARD_DEST_DONE, **kwargs):
+        f"""
+        This rule triggers and sets the done flag if ANY Destinations has been reached.
+        !!! IMPORTANT: 'reward_at_done' is shared between the agents; 'dest_reach_reward' is bound to a specific one.
+                
+        :type reward_at_done: object
+        :param reward_at_done: Specifies the reward, all agent get, when any destinations has been reached. 
+                                Default {d.REWARD_DEST_DONE}
+        :type dest_reach_reward: float
+        :param dest_reach_reward: Specify a single agents reward forreaching a single destination. 
+                                   Default {d.REWARD_DEST_REACHED}
+        """
+        super(DoneAtDestinationReachAny, self).__init__(**kwargs)
+        self.reward = reward_at_done

    def on_check_done(self, state) -> List[DoneResult]:
        if any(x.was_reached() for x in state[d.DESTINATION]):
-            return [DoneResult(self.name, validity=c.VALID, reward=r.DEST_REACHED)]
+            return [DoneResult(self.name, validity=c.VALID, reward=marl_factory_grid.modules.destinations.constants.REWARD_DEST_REACHED)]
        return []


-class DestinationSpawn(Rule):
+class SpawnDestinations(Rule):

-    def __init__(self, n_dests: int = 1, spawn_frequency: int = 5,
-                 spawn_mode: str = d.MODE_GROUPED):
-        super(DestinationSpawn, self).__init__()
+    def __init__(self, n_dests: int = 1, spawn_mode: str = d.MODE_GROUPED):
+        f"""
+        Defines how destinations are initially spawned and respawned in addition.
+        !!! This rule introduces no kind of reward or Env.-Done condition!
+                
+        :type n_dests: int
+        :param n_dests: How many destiantions should be maintained (and initally spawnewd) on the map?
+        :type spawn_mode: str 
+        :param spawn_mode: One of {d.SPAWN_MODES}. {d.MODE_GROUPED}: Always wait for all Dstiantions do be gone, 
+                           then respawn after the given time. {d.MODE_SINGLE}: Just spawn every destination, 
+                           that has been reached, after the given time
+                            
+        """
+        super(SpawnDestinations, self).__init__()
        self.n_dests = n_dests
        self.spawn_mode = spawn_mode

@ -81,8 +130,18 @@ class DestinationSpawn(Rule):
                pass


-class FixedDestinationSpawn(Rule):
+class SpawnDestinationsPerAgent(Rule):
    def __init__(self, per_agent_positions: Dict[str, List[Tuple[int, int]]]):
+        """
+        Special rule, that spawn distinations, that are bound to a single agent a fixed set of positions.
+        Usefull for introducing specialists, etc. ..
+
+        !!! This rule does not introduce any reward or done condition.
+
+        :type per_agent_positions:  Dict[str, List[Tuple[int, int]]
+        :param per_agent_positions: Please provide a dictionary with agent names as keys; and a list of possible
+                                     destiantion coords as value. Example: {Wolfgang: [(0, 0), (1, 1), ...]}
+        """
        super(Rule, self).__init__()
        self.per_agent_positions = {key: [ast.literal_eval(x) for x in val] for key, val in per_agent_positions.items()}

--- a/marl_factory_grid/modules/doors/init.py
+++ b/marl_factory_grid/modules/doors/init.py
@ -1,4 +1,4 @@
 from .actions import DoorUse
 from .entitites import Door, DoorIndicator
 from .groups import Doors
-from .rules import DoorAutoClose, DoorIndicateArea
+from .rules import DoDoorAutoClose, IndicateDoorAreaInObservation
--- a/marl_factory_grid/modules/doors/rules.py
+++ b/marl_factory_grid/modules/doors/rules.py
@ -1,13 +1,19 @@
-from marl_factory_grid.environment.rules import Rule
 from marl_factory_grid.environment import constants as c
+from marl_factory_grid.environment.rules import Rule
 from marl_factory_grid.utils.results import TickResult
 from . import constants as d
 from .entitites import DoorIndicator


-class DoorAutoClose(Rule):
+class DoDoorAutoClose(Rule):

    def __init__(self, close_frequency: int = 10):
+        """
+        This rule closes doors, that have been opened automatically, when no entity is blocking the position.
+
+        :type close_frequency: int
+        :param close_frequency: How many ticks after opening, should the door close?
+        """
        super().__init__()
        self.close_frequency = close_frequency

@ -22,9 +28,16 @@ class DoorAutoClose(Rule):
        return []


-class DoorIndicateArea(Rule):
+class IndicateDoorAreaInObservation(Rule):

    def __init__(self):
+        """
+        Whether to add an indicator aka place a value around doors (within the door tile), for agents to see.
+        This could improve learning.
+
+        """
+        # Todo: Discuss if this rather be a single entity with a standalone observation.
+        #  Could then be combined with the "Combine"-approach.
        super().__init__()

    def on_init(self, state, lvl_map):
--- a/marl_factory_grid/modules/machines/rules.py
+++ b/marl_factory_grid/modules/machines/rules.py
@ -27,9 +27,3 @@ class MachineRule(Rule):

    def on_check_done(self, state) -> List[DoneResult]:
        pass
-
-
-class DoneOnBreakRule(Rule):
-
-    def on_check_done(self, state) -> List[DoneResult]:
-        pass
--- a/marl_factory_grid/utils/helpers.py
+++ b/marl_factory_grid/utils/helpers.py
@ -22,7 +22,7 @@ This file is used for:
 """


-LEVELS_DIR = 'modules/levels'                    # for use in studies and experiments
+LEVELS_DIR = 'levels'                    # for use in studies and experiments
 STEPS_START = 1                                  # Define where to the stepcount; which is the first step

 IGNORED_DF_COLUMNS = ['Episode', 'Run',          # For plotting, which values are ignored when loading monitor files
--- a/marl_factory_grid/utils/states.py
+++ b/marl_factory_grid/utils/states.py
@ -88,11 +88,15 @@ class Gamestate(object):
        results.extend(self.rules.tick_pre_step_all(self))

        for idx, action_int in enumerate(actions):
-            agent = self[c.AGENT][idx].clear_temp_state()
-            action = agent.actions[action_int]
-            action_result = action.do(agent, self)
-            results.append(action_result)
-            agent.set_state(action_result)
+            if not agent.var_is_paralyzed:
+                agent = self[c.AGENT][idx].clear_temp_state()
+                action = agent.actions[action_int]
+                action_result = action.do(agent, self)
+                results.append(action_result)
+                agent.set_state(action_result)
+            else:
+                self.print(f"{agent.name} is paralied because of: {agent.paralyze_reasons}")
+                continue

        results.extend(self.rules.tick_step_all(self))
        results.extend(self.rules.tick_post_step_all(self))