diff --git a/marl_factory_grid/environment/entity/agent.py b/marl_factory_grid/environment/entity/agent.py index 61e33d3..285c8d2 100644 --- a/marl_factory_grid/environment/entity/agent.py +++ b/marl_factory_grid/environment/entity/agent.py @@ -20,6 +20,14 @@ class Agent(Entity): def var_can_move(self): return True + @property + def var_is_paralyzed(self): + return len(self._paralyzed) + + @property + def paralyze_reasons(self): + return [x for x in self._paralyzed] + @property def var_is_blocking_pos(self): return False @@ -57,6 +65,7 @@ class Agent(Entity): def __init__(self, actions: List[Action], observations: List[str], *args, **kwargs): super(Agent, self).__init__(*args, **kwargs) + self._paralyzed = set() self.step_result = dict() self._actions = actions self._observations = observations @@ -75,6 +84,17 @@ class Agent(Entity): def set_state(self, action_result): self._state = action_result + def paralyze(self, reason): + self._paralyzed.add(reason) + return c.VALID + + def de_paralyze(self, reason): + try: + self._paralyzed.remove(reason) + return c.VALID + except KeyError: + return c.NOT_VALID + def render(self): i = next(idx for idx, x in enumerate(self._collection) if x.name == self.name) curr_state = self.state diff --git a/marl_factory_grid/modules/batteries/__init__.py b/marl_factory_grid/modules/batteries/__init__.py index 08d5626..f4f765f 100644 --- a/marl_factory_grid/modules/batteries/__init__.py +++ b/marl_factory_grid/modules/batteries/__init__.py @@ -1,4 +1,4 @@ from .actions import BtryCharge from .entitites import Pod, Battery from .groups import ChargePods, Batteries -from .rules import BtryDoneAtDischarge, Btry +from .rules import BtryDoneAtDischarge, BatteryDecharge diff --git a/marl_factory_grid/modules/batteries/actions.py b/marl_factory_grid/modules/batteries/actions.py index 5006464..343bbcc 100644 --- a/marl_factory_grid/modules/batteries/actions.py +++ b/marl_factory_grid/modules/batteries/actions.py @@ -1,16 +1,17 @@ from typing import Union +import marl_factory_grid.modules.batteries.constants from marl_factory_grid.environment.actions import Action from marl_factory_grid.utils.results import ActionResult -from marl_factory_grid.modules.batteries import constants as b, rewards as r +from marl_factory_grid.modules.batteries import constants as b from marl_factory_grid.environment import constants as c class BtryCharge(Action): def __init__(self): - super().__init__(b.CHARGE) + super().__init__(b.ACTION_CHARGE) def do(self, entity, state) -> Union[None, ActionResult]: if charge_pod := state[b.CHARGE_PODS].by_pos(entity.pos): @@ -23,4 +24,4 @@ class BtryCharge(Action): valid = c.NOT_VALID state.print(f'{entity.name} failed to charged batteries at {entity.pos}.') return ActionResult(entity=entity, identifier=self._identifier, validity=valid, - reward=r.CHARGE_VALID if valid else r.CHARGE_FAIL) + reward=marl_factory_grid.modules.batteries.constants.REWARD_CHARGE_VALID if valid else marl_factory_grid.modules.batteries.constants.Reward_CHARGE_FAIL) diff --git a/marl_factory_grid/modules/batteries/constants.py b/marl_factory_grid/modules/batteries/constants.py index 77bd920..cbf3be0 100644 --- a/marl_factory_grid/modules/batteries/constants.py +++ b/marl_factory_grid/modules/batteries/constants.py @@ -6,5 +6,14 @@ BATTERIES = 'Batteries' BATTERY_DISCHARGED = 'DISCHARGED' CHARGE_POD_SYMBOL = 1 +ACTION_CHARGE = 'do_charge_action' -CHARGE = 'do_charge_action' +REWARD_CHARGE_VALID: float = 0.1 +Reward_CHARGE_FAIL: float = -0.1 +REWARD_BATTERY_DISCHARGED: float = -1.0 +REWARD_DISCHARGE_DONE: float = -1.0 + + +GROUPED = "single" +SINGLE = "grouped" +MODES = [GROUPED, SINGLE] diff --git a/marl_factory_grid/modules/batteries/rewards.py b/marl_factory_grid/modules/batteries/rewards.py deleted file mode 100644 index 310725d..0000000 --- a/marl_factory_grid/modules/batteries/rewards.py +++ /dev/null @@ -1,3 +0,0 @@ -CHARGE_VALID: float = 0.1 -CHARGE_FAIL: float = -0.1 -BATTERY_DISCHARGED: float = -1.0 \ No newline at end of file diff --git a/marl_factory_grid/modules/batteries/rules.py b/marl_factory_grid/modules/batteries/rules.py index 3ed9cfa..6fea262 100644 --- a/marl_factory_grid/modules/batteries/rules.py +++ b/marl_factory_grid/modules/batteries/rules.py @@ -1,15 +1,51 @@ from typing import List, Union + +import marl_factory_grid.modules.batteries.constants from marl_factory_grid.environment.rules import Rule from marl_factory_grid.utils.results import TickResult, DoneResult from marl_factory_grid.environment import constants as c -from marl_factory_grid.modules.batteries import constants as b, rewards as r +from marl_factory_grid.modules.batteries import constants as b -class Btry(Rule): +class BatteryDecharge(Rule): - def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02): + def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02, + battery_charge_reward: float = b.REWARD_CHARGE_VALID, + battery_failed_reward: float = b.Reward_CHARGE_FAIL, + battery_discharge_reward: float = b.REWARD_BATTERY_DISCHARGED, + paralyze_agents_on_discharge: bool = False): + f""" + Enables the Battery Charge/Discharge functionality. + + :type paralyze_agents_on_discharge: bool + :param paralyze_agents_on_discharge: Wether agents are still able to perform actions when discharged. + :type per_action_costs: Union[dict, float] = 0.02 + :param per_action_costs: 1. dict: with an action name as key, provide a value for each + (maybe walking is less tedious as opening a door? Just saying...). + 2. float: each action "costs" the same. + ---- + !!! Does not introduce any Env.-Done condition. + !!! Batterys can only be charged if agent posses the "Charge(Action. + !!! Batterys can only be charged if there are "Charpods" and they are spawned! + ---- + :type initial_charge: float + :param initial_charge: How much juice they have. + :type battery_discharge_reward: float + :param battery_discharge_reward: Negativ reward, when agents let their batters discharge. + Default: {b.REWARD_BATTERY_DISCHARGED} + :type battery_failed_reward: float + :param battery_failed_reward: Negativ reward, when agent cannot charge, but do (overcharge, not on station). + Default: {b.Reward_CHARGE_FAIL} + :type battery_charge_reward: float + :param battery_charge_reward: Positive reward, when agent actually charge their battery. + Default: {b.REWARD_CHARGE_VALID} + """ super().__init__() + self.paralyze_agents_on_discharge = paralyze_agents_on_discharge + self.battery_discharge_reward = battery_discharge_reward + self.battery_failed_reward = battery_failed_reward + self.battery_charge_reward = battery_charge_reward self.per_action_costs = per_action_costs self.initial_charge = initial_charge @@ -17,9 +53,6 @@ class Btry(Rule): assert len(state[c.AGENT]), "There are no agents, did you already spawn them?" state[b.BATTERIES].spawn(state[c.AGENT], self.initial_charge) - def tick_pre_step(self, state) -> List[TickResult]: - pass - def tick_step(self, state) -> List[TickResult]: # Decharge batteries = state[b.BATTERIES] @@ -43,27 +76,78 @@ class Btry(Rule): if btry.is_discharged: state.print(f'Battery of {btry.bound_entity.name} is discharged!') results.append( - TickResult(self.name, entity=btry.bound_entity, reward=r.BATTERY_DISCHARGED, validity=c.VALID)) - else: - pass + TickResult(self.name, entity=btry.bound_entity, reward=self.battery_discharge_reward, + validity=c.VALID) + ) + if self.paralyze_agents_on_discharge: + btry.bound_entity.paralyze(self.name) + results.append( + TickResult("Paralyzed", entity=btry.bound_entity, reward=0, validity=c.VALID) + ) + state.print(f'{btry.bound_entity.name} has just been paralyzed!') + if btry.bound_entity.var_is_paralyzed and not btry.is_discharged: + btry.bound_entity.de_paralyze(self.name) + results.append( + TickResult("De-Paralyzed", entity=btry.bound_entity, reward=0, validity=c.VALID) + ) + state.print(f'{btry.bound_entity.name} has just been de-paralyzed!') return results -class BtryDoneAtDischarge(Rule): +class BtryDoneAtDischarge(BatteryDecharge): - def __init__(self): - super().__init__() + def __init__(self, reward_discharge_done=b.REWARD_DISCHARGE_DONE, mode: str = b.SINGLE, **kwargs): + f""" + Enables the Battery Charge/Discharge functionality. Additionally + + :type mode: str + :param mode: Does this Done rule trigger, when any battery is or all batteries are discharged? + :type per_action_costs: Union[dict, float] = 0.02 + :param per_action_costs: 1. dict: with an action name as key, provide a value for each + (maybe walking is less tedious as opening a door? Just saying...). + 2. float: each action "costs" the same. + + :type initial_charge: float + :param initial_charge: How much juice they have. + :type reward_discharge_done: float + :param reward_discharge_done: Global negativ reward, when agents let their batters discharge. + Default: {b.REWARD_BATTERY_DISCHARGED} + :type battery_discharge_reward: float + :param battery_discharge_reward: Negativ reward, when agents let their batters discharge. + Default: {b.REWARD_BATTERY_DISCHARGED} + :type battery_failed_reward: float + :param battery_failed_reward: Negativ reward, when agent cannot charge, but do (overcharge, not on station). + Default: {b.Reward_CHARGE_FAIL} + :type battery_charge_reward: float + :param battery_charge_reward: Positive reward, when agent actually charge their battery. + Default: {b.REWARD_CHARGE_VALID} + """ + super().__init__(**kwargs) + self.mode = mode + self.reward_discharge_done = reward_discharge_done def on_check_done(self, state) -> List[DoneResult]: - if btry_done := any(battery.is_discharged for battery in state[b.BATTERIES]): - return [DoneResult(self.name, validity=c.VALID, reward=r.BATTERY_DISCHARGED)] + any_discharged = (self.mode == b.SINGLE and any(battery.is_discharged for battery in state[b.BATTERIES])) + all_discharged = (self.mode == b.SINGLE and all(battery.is_discharged for battery in state[b.BATTERIES])) + if any_discharged or all_discharged: + return [DoneResult(self.name, validity=c.VALID, reward=self.reward_discharge_done)] else: return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)] -class PodRules(Rule): +class SpawnChargePods(Rule): def __init__(self, n_pods: int, charge_rate: float = 0.4, multi_charge: bool = False): + """ + Spawn Chargepods in accordance to the given parameters. + + :type n_pods: int + :param n_pods: How many charge pods are there? + :type charge_rate: float + :param charge_rate: How much juice does each use of the charge action top up? + :type multi_charge: bool + :param multi_charge: Whether multiple agents are able to charge at the same time. + """ super().__init__() self.multi_charge = multi_charge self.charge_rate = charge_rate @@ -74,5 +158,5 @@ class PodRules(Rule): empty_positions = state.entities.empty_positions() pods = pod_collection.from_coordinates(empty_positions, entity_kwargs=dict( multi_charge=self.multi_charge, charge_rate=self.charge_rate) - ) + ) pod_collection.add_items(pods) diff --git a/marl_factory_grid/utils/states.py b/marl_factory_grid/utils/states.py index 83dbcf9..bcc8e3f 100644 --- a/marl_factory_grid/utils/states.py +++ b/marl_factory_grid/utils/states.py @@ -88,11 +88,15 @@ class Gamestate(object): results.extend(self.rules.tick_pre_step_all(self)) for idx, action_int in enumerate(actions): - agent = self[c.AGENT][idx].clear_temp_state() - action = agent.actions[action_int] - action_result = action.do(agent, self) - results.append(action_result) - agent.set_state(action_result) + if not agent.var_is_paralyzed: + agent = self[c.AGENT][idx].clear_temp_state() + action = agent.actions[action_int] + action_result = action.do(agent, self) + results.append(action_result) + agent.set_state(action_result) + else: + self.print(f"{agent.name} is paralied because of: {agent.paralyze_reasons}") + continue results.extend(self.rules.tick_step_all(self)) results.extend(self.rules.tick_post_step_all(self))