mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-09-13 22:44:00 +02:00
Merge branch 'main' into refactor_rename
# Conflicts: # marl_factory_grid/modules/clean_up/groups.py # marl_factory_grid/modules/clean_up/rules.py # marl_factory_grid/modules/destinations/rules.py
This commit is contained in:
@@ -60,7 +60,7 @@ Just define what your environment needs in a *yaml*-configfile like:
|
|||||||
done_at_collisions: !!bool True
|
done_at_collisions: !!bool True
|
||||||
ItemRespawn:
|
ItemRespawn:
|
||||||
spawn_freq: 5
|
spawn_freq: 5
|
||||||
DoorAutoClose: {}
|
DoDoorAutoClose: {}
|
||||||
|
|
||||||
Assets:
|
Assets:
|
||||||
- Defaults
|
- Defaults
|
||||||
|
@@ -20,6 +20,14 @@ class Agent(Entity):
|
|||||||
def var_can_move(self):
|
def var_can_move(self):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def var_is_paralyzed(self):
|
||||||
|
return len(self._paralyzed)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def paralyze_reasons(self):
|
||||||
|
return [x for x in self._paralyzed]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def var_is_blocking_pos(self):
|
def var_is_blocking_pos(self):
|
||||||
return False
|
return False
|
||||||
@@ -57,6 +65,7 @@ class Agent(Entity):
|
|||||||
|
|
||||||
def __init__(self, actions: List[Action], observations: List[str], *args, **kwargs):
|
def __init__(self, actions: List[Action], observations: List[str], *args, **kwargs):
|
||||||
super(Agent, self).__init__(*args, **kwargs)
|
super(Agent, self).__init__(*args, **kwargs)
|
||||||
|
self._paralyzed = set()
|
||||||
self.step_result = dict()
|
self.step_result = dict()
|
||||||
self._actions = actions
|
self._actions = actions
|
||||||
self._observations = observations
|
self._observations = observations
|
||||||
@@ -75,6 +84,17 @@ class Agent(Entity):
|
|||||||
def set_state(self, action_result):
|
def set_state(self, action_result):
|
||||||
self._state = action_result
|
self._state = action_result
|
||||||
|
|
||||||
|
def paralyze(self, reason):
|
||||||
|
self._paralyzed.add(reason)
|
||||||
|
return c.VALID
|
||||||
|
|
||||||
|
def de_paralyze(self, reason):
|
||||||
|
try:
|
||||||
|
self._paralyzed.remove(reason)
|
||||||
|
return c.VALID
|
||||||
|
except KeyError:
|
||||||
|
return c.NOT_VALID
|
||||||
|
|
||||||
def render(self):
|
def render(self):
|
||||||
i = next(idx for idx, x in enumerate(self._collection) if x.name == self.name)
|
i = next(idx for idx, x in enumerate(self._collection) if x.name == self.name)
|
||||||
curr_state = self.state
|
curr_state = self.state
|
||||||
|
@@ -99,7 +99,7 @@ class Entity(_Object, abc.ABC):
|
|||||||
try:
|
try:
|
||||||
self.bind_to(bind_to)
|
self.bind_to(bind_to)
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
print(f'Objects of {self.__class__.__name__} can not be bound to other entities.')
|
print(f'Objects of class "{self.__class__.__name__}" can not be bound to other entities.')
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
def summarize_state(self) -> dict:
|
def summarize_state(self) -> dict:
|
||||||
|
@@ -37,7 +37,7 @@ class Entities(_Objects):
|
|||||||
return[x for val in self.pos_dict[pos] for x in val if x.var_can_collide]
|
return[x for val in self.pos_dict[pos] for x in val if x.var_can_collide]
|
||||||
|
|
||||||
def empty_positions(self):
|
def empty_positions(self):
|
||||||
empty_positions= [key for key in self.floorlist if self.pos_dict[key]]
|
empty_positions = [key for key in self.floorlist if not self.pos_dict[key]]
|
||||||
shuffle(empty_positions)
|
shuffle(empty_positions)
|
||||||
return empty_positions
|
return empty_positions
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
from .actions import BtryCharge
|
from .actions import BtryCharge
|
||||||
from .entitites import Pod, Battery
|
from .entitites import Pod, Battery
|
||||||
from .groups import ChargePods, Batteries
|
from .groups import ChargePods, Batteries
|
||||||
from .rules import BtryDoneAtDischarge, Btry
|
from .rules import BtryDoneAtDischarge, BatteryDecharge
|
||||||
|
@@ -1,16 +1,17 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
|
import marl_factory_grid.modules.batteries.constants
|
||||||
from marl_factory_grid.environment.actions import Action
|
from marl_factory_grid.environment.actions import Action
|
||||||
from marl_factory_grid.utils.results import ActionResult
|
from marl_factory_grid.utils.results import ActionResult
|
||||||
|
|
||||||
from marl_factory_grid.modules.batteries import constants as b, rewards as r
|
from marl_factory_grid.modules.batteries import constants as b
|
||||||
from marl_factory_grid.environment import constants as c
|
from marl_factory_grid.environment import constants as c
|
||||||
|
|
||||||
|
|
||||||
class BtryCharge(Action):
|
class BtryCharge(Action):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__(b.CHARGE)
|
super().__init__(b.ACTION_CHARGE)
|
||||||
|
|
||||||
def do(self, entity, state) -> Union[None, ActionResult]:
|
def do(self, entity, state) -> Union[None, ActionResult]:
|
||||||
if charge_pod := state[b.CHARGE_PODS].by_pos(entity.pos):
|
if charge_pod := state[b.CHARGE_PODS].by_pos(entity.pos):
|
||||||
@@ -23,4 +24,4 @@ class BtryCharge(Action):
|
|||||||
valid = c.NOT_VALID
|
valid = c.NOT_VALID
|
||||||
state.print(f'{entity.name} failed to charged batteries at {entity.pos}.')
|
state.print(f'{entity.name} failed to charged batteries at {entity.pos}.')
|
||||||
return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
|
return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
|
||||||
reward=r.CHARGE_VALID if valid else r.CHARGE_FAIL)
|
reward=marl_factory_grid.modules.batteries.constants.REWARD_CHARGE_VALID if valid else marl_factory_grid.modules.batteries.constants.Reward_CHARGE_FAIL)
|
||||||
|
@@ -4,5 +4,14 @@ BATTERIES = 'Batteries'
|
|||||||
BATTERY_DISCHARGED = 'DISCHARGED'
|
BATTERY_DISCHARGED = 'DISCHARGED'
|
||||||
CHARGE_POD_SYMBOL = 1
|
CHARGE_POD_SYMBOL = 1
|
||||||
|
|
||||||
|
ACTION_CHARGE = 'do_charge_action'
|
||||||
|
|
||||||
CHARGE = 'do_charge_action'
|
REWARD_CHARGE_VALID: float = 0.1
|
||||||
|
Reward_CHARGE_FAIL: float = -0.1
|
||||||
|
REWARD_BATTERY_DISCHARGED: float = -1.0
|
||||||
|
REWARD_DISCHARGE_DONE: float = -1.0
|
||||||
|
|
||||||
|
|
||||||
|
GROUPED = "single"
|
||||||
|
SINGLE = "grouped"
|
||||||
|
MODES = [GROUPED, SINGLE]
|
||||||
|
@@ -1,3 +0,0 @@
|
|||||||
CHARGE_VALID: float = 0.1
|
|
||||||
CHARGE_FAIL: float = -0.1
|
|
||||||
BATTERY_DISCHARGED: float = -1.0
|
|
@@ -1,24 +1,58 @@
|
|||||||
from typing import List, Union
|
from typing import List, Union
|
||||||
|
|
||||||
|
import marl_factory_grid.modules.batteries.constants
|
||||||
from marl_factory_grid.environment.rules import Rule
|
from marl_factory_grid.environment.rules import Rule
|
||||||
from marl_factory_grid.utils.results import TickResult, DoneResult
|
from marl_factory_grid.utils.results import TickResult, DoneResult
|
||||||
|
|
||||||
from marl_factory_grid.environment import constants as c
|
from marl_factory_grid.environment import constants as c
|
||||||
from marl_factory_grid.modules.batteries import constants as b, rewards as r
|
from marl_factory_grid.modules.batteries import constants as b
|
||||||
|
|
||||||
|
|
||||||
class Btry(Rule):
|
class BatteryDecharge(Rule):
|
||||||
|
|
||||||
def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02):
|
def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02,
|
||||||
|
battery_charge_reward: float = b.REWARD_CHARGE_VALID,
|
||||||
|
battery_failed_reward: float = b.Reward_CHARGE_FAIL,
|
||||||
|
battery_discharge_reward: float = b.REWARD_BATTERY_DISCHARGED,
|
||||||
|
paralyze_agents_on_discharge: bool = False):
|
||||||
|
f"""
|
||||||
|
Enables the Battery Charge/Discharge functionality.
|
||||||
|
|
||||||
|
:type paralyze_agents_on_discharge: bool
|
||||||
|
:param paralyze_agents_on_discharge: Wether agents are still able to perform actions when discharged.
|
||||||
|
:type per_action_costs: Union[dict, float] = 0.02
|
||||||
|
:param per_action_costs: 1. dict: with an action name as key, provide a value for each
|
||||||
|
(maybe walking is less tedious as opening a door? Just saying...).
|
||||||
|
2. float: each action "costs" the same.
|
||||||
|
----
|
||||||
|
!!! Does not introduce any Env.-Done condition.
|
||||||
|
!!! Batterys can only be charged if agent posses the "Charge(Action.
|
||||||
|
!!! Batterys can only be charged if there are "Charpods" and they are spawned!
|
||||||
|
----
|
||||||
|
:type initial_charge: float
|
||||||
|
:param initial_charge: How much juice they have.
|
||||||
|
:type battery_discharge_reward: float
|
||||||
|
:param battery_discharge_reward: Negativ reward, when agents let their batters discharge.
|
||||||
|
Default: {b.REWARD_BATTERY_DISCHARGED}
|
||||||
|
:type battery_failed_reward: float
|
||||||
|
:param battery_failed_reward: Negativ reward, when agent cannot charge, but do (overcharge, not on station).
|
||||||
|
Default: {b.Reward_CHARGE_FAIL}
|
||||||
|
:type battery_charge_reward: float
|
||||||
|
:param battery_charge_reward: Positive reward, when agent actually charge their battery.
|
||||||
|
Default: {b.REWARD_CHARGE_VALID}
|
||||||
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
self.paralyze_agents_on_discharge = paralyze_agents_on_discharge
|
||||||
|
self.battery_discharge_reward = battery_discharge_reward
|
||||||
|
self.battery_failed_reward = battery_failed_reward
|
||||||
|
self.battery_charge_reward = battery_charge_reward
|
||||||
self.per_action_costs = per_action_costs
|
self.per_action_costs = per_action_costs
|
||||||
self.initial_charge = initial_charge
|
self.initial_charge = initial_charge
|
||||||
|
|
||||||
def on_init(self, state, lvl_map):
|
def on_init(self, state, lvl_map):
|
||||||
|
assert len(state[c.AGENT]), "There are no agents, did you already spawn them?"
|
||||||
state[b.BATTERIES].spawn(state[c.AGENT], self.initial_charge)
|
state[b.BATTERIES].spawn(state[c.AGENT], self.initial_charge)
|
||||||
|
|
||||||
def tick_pre_step(self, state) -> List[TickResult]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def tick_step(self, state) -> List[TickResult]:
|
def tick_step(self, state) -> List[TickResult]:
|
||||||
# Decharge
|
# Decharge
|
||||||
batteries = state[b.BATTERIES]
|
batteries = state[b.BATTERIES]
|
||||||
@@ -42,27 +76,78 @@ class Btry(Rule):
|
|||||||
if btry.is_discharged:
|
if btry.is_discharged:
|
||||||
state.print(f'Battery of {btry.bound_entity.name} is discharged!')
|
state.print(f'Battery of {btry.bound_entity.name} is discharged!')
|
||||||
results.append(
|
results.append(
|
||||||
TickResult(self.name, entity=btry.bound_entity, reward=r.BATTERY_DISCHARGED, validity=c.VALID))
|
TickResult(self.name, entity=btry.bound_entity, reward=self.battery_discharge_reward,
|
||||||
else:
|
validity=c.VALID)
|
||||||
pass
|
)
|
||||||
|
if self.paralyze_agents_on_discharge:
|
||||||
|
btry.bound_entity.paralyze(self.name)
|
||||||
|
results.append(
|
||||||
|
TickResult("Paralyzed", entity=btry.bound_entity, reward=0, validity=c.VALID)
|
||||||
|
)
|
||||||
|
state.print(f'{btry.bound_entity.name} has just been paralyzed!')
|
||||||
|
if btry.bound_entity.var_is_paralyzed and not btry.is_discharged:
|
||||||
|
btry.bound_entity.de_paralyze(self.name)
|
||||||
|
results.append(
|
||||||
|
TickResult("De-Paralyzed", entity=btry.bound_entity, reward=0, validity=c.VALID)
|
||||||
|
)
|
||||||
|
state.print(f'{btry.bound_entity.name} has just been de-paralyzed!')
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
class BtryDoneAtDischarge(Rule):
|
class BtryDoneAtDischarge(BatteryDecharge):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, reward_discharge_done=b.REWARD_DISCHARGE_DONE, mode: str = b.SINGLE, **kwargs):
|
||||||
super().__init__()
|
f"""
|
||||||
|
Enables the Battery Charge/Discharge functionality. Additionally
|
||||||
|
|
||||||
|
:type mode: str
|
||||||
|
:param mode: Does this Done rule trigger, when any battery is or all batteries are discharged?
|
||||||
|
:type per_action_costs: Union[dict, float] = 0.02
|
||||||
|
:param per_action_costs: 1. dict: with an action name as key, provide a value for each
|
||||||
|
(maybe walking is less tedious as opening a door? Just saying...).
|
||||||
|
2. float: each action "costs" the same.
|
||||||
|
|
||||||
|
:type initial_charge: float
|
||||||
|
:param initial_charge: How much juice they have.
|
||||||
|
:type reward_discharge_done: float
|
||||||
|
:param reward_discharge_done: Global negativ reward, when agents let their batters discharge.
|
||||||
|
Default: {b.REWARD_BATTERY_DISCHARGED}
|
||||||
|
:type battery_discharge_reward: float
|
||||||
|
:param battery_discharge_reward: Negativ reward, when agents let their batters discharge.
|
||||||
|
Default: {b.REWARD_BATTERY_DISCHARGED}
|
||||||
|
:type battery_failed_reward: float
|
||||||
|
:param battery_failed_reward: Negativ reward, when agent cannot charge, but do (overcharge, not on station).
|
||||||
|
Default: {b.Reward_CHARGE_FAIL}
|
||||||
|
:type battery_charge_reward: float
|
||||||
|
:param battery_charge_reward: Positive reward, when agent actually charge their battery.
|
||||||
|
Default: {b.REWARD_CHARGE_VALID}
|
||||||
|
"""
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self.mode = mode
|
||||||
|
self.reward_discharge_done = reward_discharge_done
|
||||||
|
|
||||||
def on_check_done(self, state) -> List[DoneResult]:
|
def on_check_done(self, state) -> List[DoneResult]:
|
||||||
if btry_done := any(battery.is_discharged for battery in state[b.BATTERIES]):
|
any_discharged = (self.mode == b.SINGLE and any(battery.is_discharged for battery in state[b.BATTERIES]))
|
||||||
return [DoneResult(self.name, validity=c.VALID, reward=r.BATTERY_DISCHARGED)]
|
all_discharged = (self.mode == b.SINGLE and all(battery.is_discharged for battery in state[b.BATTERIES]))
|
||||||
|
if any_discharged or all_discharged:
|
||||||
|
return [DoneResult(self.name, validity=c.VALID, reward=self.reward_discharge_done)]
|
||||||
else:
|
else:
|
||||||
return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]
|
return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]
|
||||||
|
|
||||||
|
|
||||||
class PodRules(Rule):
|
class SpawnChargePods(Rule):
|
||||||
|
|
||||||
def __init__(self, n_pods: int, charge_rate: float = 0.4, multi_charge: bool = False):
|
def __init__(self, n_pods: int, charge_rate: float = 0.4, multi_charge: bool = False):
|
||||||
|
"""
|
||||||
|
Spawn Chargepods in accordance to the given parameters.
|
||||||
|
|
||||||
|
:type n_pods: int
|
||||||
|
:param n_pods: How many charge pods are there?
|
||||||
|
:type charge_rate: float
|
||||||
|
:param charge_rate: How much juice does each use of the charge action top up?
|
||||||
|
:type multi_charge: bool
|
||||||
|
:param multi_charge: Whether multiple agents are able to charge at the same time.
|
||||||
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.multi_charge = multi_charge
|
self.multi_charge = multi_charge
|
||||||
self.charge_rate = charge_rate
|
self.charge_rate = charge_rate
|
||||||
@@ -73,5 +158,5 @@ class PodRules(Rule):
|
|||||||
empty_positions = state.entities.empty_positions()
|
empty_positions = state.entities.empty_positions()
|
||||||
pods = pod_collection.from_coordinates(empty_positions, entity_kwargs=dict(
|
pods = pod_collection.from_coordinates(empty_positions, entity_kwargs=dict(
|
||||||
multi_charge=self.multi_charge, charge_rate=self.charge_rate)
|
multi_charge=self.multi_charge, charge_rate=self.charge_rate)
|
||||||
)
|
)
|
||||||
pod_collection.add_items(pods)
|
pod_collection.add_items(pods)
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
from .actions import CleanUp
|
from .actions import CleanUp
|
||||||
from .entitites import DirtPile
|
from .entitites import DirtPile
|
||||||
from .groups import DirtPiles
|
from .groups import DirtPiles
|
||||||
from .rules import DirtRespawnRule, DirtSmearOnMove, DirtAllCleanDone
|
from .rules import SpawnDirt, EntitiesSmearDirtOnMove, DoneOnAllDirtCleaned
|
||||||
|
@@ -32,11 +32,9 @@ class DirtPile(Entity):
|
|||||||
# Edit this if you want items to be drawn in the ops differntly
|
# Edit this if you want items to be drawn in the ops differntly
|
||||||
return self._amount
|
return self._amount
|
||||||
|
|
||||||
def __init__(self, *args, max_local_amount=5, initial_amount=2, spawn_variation=0.05, **kwargs):
|
def __init__(self, *args, amount=2, max_local_amount=5, **kwargs):
|
||||||
super(DirtPile, self).__init__(*args, **kwargs)
|
super(DirtPile, self).__init__(*args, **kwargs)
|
||||||
self._amount = abs(initial_amount + (
|
self._amount = amount
|
||||||
random.normal(loc=0, scale=spawn_variation, size=1).item() * initial_amount)
|
|
||||||
)
|
|
||||||
self.max_local_amount = max_local_amount
|
self.max_local_amount = max_local_amount
|
||||||
|
|
||||||
def set_new_amount(self, amount):
|
def set_new_amount(self, amount):
|
||||||
|
@@ -1,4 +1,5 @@
|
|||||||
from marl_factory_grid.environment import constants as c
|
from marl_factory_grid.environment import constants as c
|
||||||
|
from marl_factory_grid.utils.results import Result
|
||||||
from marl_factory_grid.environment.groups.collection import Collection
|
from marl_factory_grid.environment.groups.collection import Collection
|
||||||
from marl_factory_grid.modules.clean_up.entitites import DirtPile
|
from marl_factory_grid.modules.clean_up.entitites import DirtPile
|
||||||
|
|
||||||
@@ -27,42 +28,44 @@ class DirtPiles(Collection):
|
|||||||
return sum([dirt.amount for dirt in self])
|
return sum([dirt.amount for dirt in self])
|
||||||
|
|
||||||
def __init__(self, *args,
|
def __init__(self, *args,
|
||||||
initial_amount=2,
|
|
||||||
initial_dirt_ratio=0.05,
|
|
||||||
dirt_spawn_r_var=0.1,
|
|
||||||
max_local_amount=5,
|
max_local_amount=5,
|
||||||
clean_amount=1,
|
clean_amount=1,
|
||||||
max_global_amount: int = 20, **kwargs):
|
max_global_amount: int = 20, **kwargs):
|
||||||
super(DirtPiles, self).__init__(*args, **kwargs)
|
super(DirtPiles, self).__init__(*args, **kwargs)
|
||||||
self.clean_amount = clean_amount
|
self.clean_amount = clean_amount
|
||||||
self.initial_amount = initial_amount
|
|
||||||
self.initial_dirt_ratio = initial_dirt_ratio
|
|
||||||
self.dirt_spawn_r_var = dirt_spawn_r_var
|
|
||||||
self.max_global_amount = max_global_amount
|
self.max_global_amount = max_global_amount
|
||||||
self.max_local_amount = max_local_amount
|
self.max_local_amount = max_local_amount
|
||||||
|
|
||||||
def spawn(self, then_dirty_positions, amount) -> bool:
|
def spawn(self, then_dirty_positions, amount_s) -> Result:
|
||||||
for pos in then_dirty_positions:
|
spawn_counter = 0
|
||||||
|
for idx, pos in enumerate(then_dirty_positions):
|
||||||
if not self.amount > self.max_global_amount:
|
if not self.amount > self.max_global_amount:
|
||||||
|
amount = amount_s[idx] if isinstance(amount_s, list) else amount_s
|
||||||
if dirt := self.by_pos(pos):
|
if dirt := self.by_pos(pos):
|
||||||
new_value = dirt.amount + amount
|
new_value = dirt.amount + amount
|
||||||
dirt.set_new_amount(new_value)
|
dirt.set_new_amount(new_value)
|
||||||
else:
|
else:
|
||||||
dirt = DirtPile(pos, initial_amount=amount, spawn_variation=self.dirt_spawn_r_var)
|
dirt = DirtPile(pos, amount=amount)
|
||||||
self.add_item(dirt)
|
self.add_item(dirt)
|
||||||
|
spawn_counter += 1
|
||||||
else:
|
else:
|
||||||
return c.NOT_VALID
|
return Result(identifier=f'{self.name}_spawn', validity=c.NOT_VALID, reward=0,
|
||||||
return c.VALID
|
value=spawn_counter)
|
||||||
|
return Result(identifier=f'{self.name}_spawn', validity=c.VALID, reward=0, value=spawn_counter)
|
||||||
|
|
||||||
def trigger_dirt_spawn(self, state, initial_spawn=False) -> bool:
|
def trigger_dirt_spawn(self, n, amount, state, n_var=0.2, amount_var=0.2) -> Result:
|
||||||
free_for_dirt = [x for x in state.entities.floorlist if len(state.entities.pos_dict[x]) == 1 or (
|
free_for_dirt = [x for x in state.entities.floorlist if len(state.entities.pos_dict[x]) == 1 or (
|
||||||
len(state.entities.pos_dict[x]) == 2 and isinstance(next(y for y in x), DirtPile))]
|
len(state.entities.pos_dict[x]) == 2 and isinstance(next(y for y in x), DirtPile))]
|
||||||
|
# free_for_dirt = [x for x in state[c.FLOOR]
|
||||||
|
# if len(x.guests) == 0 or (
|
||||||
|
# len(x.guests) == 1 and
|
||||||
|
# isinstance(next(y for y in x.guests), DirtPile))]
|
||||||
state.rng.shuffle(free_for_dirt)
|
state.rng.shuffle(free_for_dirt)
|
||||||
|
|
||||||
var = self.dirt_spawn_r_var
|
new_spawn = int(abs(n + (state.rng.uniform(-n_var, n_var))))
|
||||||
new_spawn = abs(self.initial_dirt_ratio + (state.rng.uniform(-var, var) if initial_spawn else 0))
|
new_amount_s = [abs(amount + (amount*state.rng.uniform(-amount_var, amount_var))) for _ in range(new_spawn)]
|
||||||
n_dirty_positions = max(0, int(new_spawn * len(free_for_dirt)))
|
n_dirty_positions = free_for_dirt[:new_spawn]
|
||||||
return self.spawn(free_for_dirt[:n_dirty_positions], self.initial_amount)
|
return self.spawn(n_dirty_positions, new_amount_s)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
s = super(DirtPiles, self).__repr__()
|
s = super(DirtPiles, self).__repr__()
|
||||||
|
@@ -7,53 +7,98 @@ from marl_factory_grid.utils.results import TickResult
|
|||||||
from marl_factory_grid.utils.results import DoneResult
|
from marl_factory_grid.utils.results import DoneResult
|
||||||
|
|
||||||
|
|
||||||
class DirtAllCleanDone(Rule):
|
class DoneOnAllDirtCleaned(Rule):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, reward: float = r.CLEAN_UP_ALL):
|
||||||
|
"""
|
||||||
|
Defines a 'Done'-condition which tirggers, when there is no more 'Dirt' in the environment.
|
||||||
|
|
||||||
|
:type reward: float
|
||||||
|
:parameter reward: Given reward when condition triggers.
|
||||||
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
self.reward = reward
|
||||||
|
|
||||||
def on_check_done(self, state) -> [DoneResult]:
|
def on_check_done(self, state) -> [DoneResult]:
|
||||||
if len(state[d.DIRT]) == 0 and state.curr_step:
|
if len(state[d.DIRT]) == 0 and state.curr_step:
|
||||||
return [DoneResult(validity=c.VALID, identifier=self.name, reward=r.CLEAN_UP_ALL)]
|
return [DoneResult(validity=c.VALID, identifier=self.name, reward=self.reward)]
|
||||||
return [DoneResult(validity=c.NOT_VALID, identifier=self.name, reward=0)]
|
return [DoneResult(validity=c.NOT_VALID, identifier=self.name, reward=0)]
|
||||||
|
|
||||||
|
|
||||||
class DirtRespawnRule(Rule):
|
class SpawnDirt(Rule):
|
||||||
|
|
||||||
def __init__(self, spawn_freq=15):
|
def __init__(self, initial_n: int, initial_amount: float, respawn_n: int, respawn_amount: float,
|
||||||
|
n_var: float = 0.2, amount_var: float = 0.2, spawn_freq: int = 15):
|
||||||
|
"""
|
||||||
|
Defines the spawn pattern of intial and additional 'Dirt'-entitites.
|
||||||
|
First chooses positions, then trys to spawn dirt until 'respawn_n' or the maximal global amount is reached.
|
||||||
|
If there is allready some, it is topped up to min(max_local_amount, amount).
|
||||||
|
|
||||||
|
:type spawn_freq: int
|
||||||
|
:parameter spawn_freq: In which frequency should this Rule try to spawn new 'Dirt'?
|
||||||
|
:type respawn_n: int
|
||||||
|
:parameter respawn_n: How many respawn positions are considered.
|
||||||
|
:type initial_n: int
|
||||||
|
:parameter initial_n: How much initial positions are considered.
|
||||||
|
:type amount_var: float
|
||||||
|
:parameter amount_var: Variance of amount to spawn.
|
||||||
|
:type n_var: float
|
||||||
|
:parameter n_var: Variance of n to spawn.
|
||||||
|
:type respawn_amount: float
|
||||||
|
:parameter respawn_amount: Defines how much dirt 'amount' is placed every 'spawn_freq' ticks.
|
||||||
|
:type initial_amount: float
|
||||||
|
:parameter initial_amount: Defines how much dirt 'amount' is initially placed.
|
||||||
|
|
||||||
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
self.amount_var = amount_var
|
||||||
|
self.n_var = n_var
|
||||||
|
self.respawn_amount = respawn_amount
|
||||||
|
self.respawn_n = respawn_n
|
||||||
|
self.initial_amount = initial_amount
|
||||||
|
self.initial_n = initial_n
|
||||||
self.spawn_freq = spawn_freq
|
self.spawn_freq = spawn_freq
|
||||||
self._next_dirt_spawn = spawn_freq
|
self._next_dirt_spawn = spawn_freq
|
||||||
|
|
||||||
def on_init(self, state, lvl_map) -> str:
|
def on_init(self, state, lvl_map) -> str:
|
||||||
state[d.DIRT].trigger_dirt_spawn(state, initial_spawn=True)
|
result = state[d.DIRT].trigger_dirt_spawn(self.initial_n, self.initial_amount, state,
|
||||||
return f'Initial Dirt was spawned on: {[x.pos for x in state[d.DIRT]]}'
|
n_var=self.n_var, amount_var=self.amount_var)
|
||||||
|
state.print(f'Initial Dirt was spawned on: {[x.pos for x in state[d.DIRT]]}')
|
||||||
|
return result
|
||||||
|
|
||||||
def tick_step(self, state):
|
def tick_step(self, state):
|
||||||
if self._next_dirt_spawn < 0:
|
if self._next_dirt_spawn < 0:
|
||||||
pass # No DirtPile Spawn
|
pass # No DirtPile Spawn
|
||||||
elif not self._next_dirt_spawn:
|
elif not self._next_dirt_spawn:
|
||||||
validity = state[d.DIRT].trigger_dirt_spawn(state)
|
result = [state[d.DIRT].trigger_dirt_spawn(self.respawn_n, self.respawn_amount, state,
|
||||||
|
n_var=self.n_var, amount_var=self.amount_var)]
|
||||||
return [TickResult(entity=None, validity=validity, identifier=self.name, reward=0)]
|
|
||||||
self._next_dirt_spawn = self.spawn_freq
|
self._next_dirt_spawn = self.spawn_freq
|
||||||
else:
|
else:
|
||||||
self._next_dirt_spawn -= 1
|
self._next_dirt_spawn -= 1
|
||||||
return []
|
result = []
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
class DirtSmearOnMove(Rule):
|
class EntitiesSmearDirtOnMove(Rule):
|
||||||
|
|
||||||
def __init__(self, smear_amount: float = 0.2):
|
def __init__(self, smear_ratio: float = 0.2):
|
||||||
|
"""
|
||||||
|
Enables 'smearing'. Entities that move through dirt, will leave a trail behind them.
|
||||||
|
They take dirt * smear_ratio of it with them to their next position.
|
||||||
|
|
||||||
|
:type smear_ratio: float
|
||||||
|
:parameter smear_ratio: How much percent dirt is smeared by entities to their next position.
|
||||||
|
"""
|
||||||
|
assert smear_ratio < 1, "'Smear Amount' must be smaller than 1"
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.smear_amount = smear_amount
|
self.smear_ratio = smear_ratio
|
||||||
|
|
||||||
def tick_post_step(self, state):
|
def tick_post_step(self, state):
|
||||||
results = list()
|
results = list()
|
||||||
for entity in state.moving_entites:
|
for entity in state.moving_entites:
|
||||||
if is_move(entity.state.identifier) and entity.state.validity == c.VALID:
|
if is_move(entity.state.identifier) and entity.state.validity == c.VALID:
|
||||||
if old_pos_dirt := state[d.DIRT].by_pos(entity.last_pos):
|
if old_pos_dirt := state[d.DIRT].by_pos(entity.last_pos):
|
||||||
if smeared_dirt := round(old_pos_dirt.amount * self.smear_amount, 2):
|
if smeared_dirt := round(old_pos_dirt.amount * self.smear_ratio, 2):
|
||||||
if state[d.DIRT].spawn(entity.pos, amount=smeared_dirt):
|
if state[d.DIRT].spawn(entity.pos, amount=smeared_dirt):
|
||||||
results.append(TickResult(identifier=self.name, entity=entity,
|
results.append(TickResult(identifier=self.name, entity=entity,
|
||||||
reward=0, validity=c.VALID))
|
reward=0, validity=c.VALID))
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
from .actions import DestAction
|
from .actions import DestAction
|
||||||
from .entitites import Destination
|
from .entitites import Destination
|
||||||
from .groups import Destinations
|
from .groups import Destinations
|
||||||
from .rules import DestinationReachAll, DestinationSpawn
|
from .rules import DoneAtDestinationReachAll, SpawnDestinations
|
||||||
|
@@ -1,9 +1,10 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
|
import marl_factory_grid.modules.destinations.constants
|
||||||
from marl_factory_grid.environment.actions import Action
|
from marl_factory_grid.environment.actions import Action
|
||||||
from marl_factory_grid.utils.results import ActionResult
|
from marl_factory_grid.utils.results import ActionResult
|
||||||
|
|
||||||
from marl_factory_grid.modules.destinations import constants as d, rewards as r
|
from marl_factory_grid.modules.destinations import constants as d
|
||||||
from marl_factory_grid.environment import constants as c
|
from marl_factory_grid.environment import constants as c
|
||||||
|
|
||||||
|
|
||||||
@@ -20,4 +21,4 @@ class DestAction(Action):
|
|||||||
valid = c.NOT_VALID
|
valid = c.NOT_VALID
|
||||||
state.print(f'{entity.name} just tried to do_wait_action do_wait_action at {entity.pos} but failed')
|
state.print(f'{entity.name} just tried to do_wait_action do_wait_action at {entity.pos} but failed')
|
||||||
return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
|
return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
|
||||||
reward=r.WAIT_VALID if valid else r.WAIT_FAIL)
|
reward=marl_factory_grid.modules.destinations.constants.REWARD_WAIT_VALID if valid else marl_factory_grid.modules.destinations.constants.REWARD_WAIT_FAIL)
|
||||||
|
@@ -3,10 +3,11 @@
|
|||||||
DESTINATION = 'Destinations'
|
DESTINATION = 'Destinations'
|
||||||
DEST_SYMBOL = 1
|
DEST_SYMBOL = 1
|
||||||
|
|
||||||
WAIT_ON_DEST = 'WAIT'
|
|
||||||
|
|
||||||
MODE_SINGLE = 'SINGLE'
|
MODE_SINGLE = 'SINGLE'
|
||||||
MODE_GROUPED = 'GROUPED'
|
MODE_GROUPED = 'GROUPED'
|
||||||
|
SPAWN_MODES = [MODE_SINGLE, MODE_GROUPED]
|
||||||
|
|
||||||
DONE_ALL = 'DONE_ALL'
|
REWARD_WAIT_VALID: float = 0.1
|
||||||
DONE_SINGLE = 'DONE_SINGLE'
|
REWARD_WAIT_FAIL: float = -0.1
|
||||||
|
REWARD_DEST_REACHED: float = 1.0
|
||||||
|
REWARD_DEST_DONE: float = 5.0
|
||||||
|
@@ -1,3 +0,0 @@
|
|||||||
WAIT_VALID: float = 0.1
|
|
||||||
WAIT_FAIL: float = -0.1
|
|
||||||
DEST_REACHED: float = 5.0
|
|
@@ -1,18 +1,29 @@
|
|||||||
import ast
|
import ast
|
||||||
from random import shuffle
|
from random import shuffle
|
||||||
from typing import List, Dict, Tuple
|
from typing import List, Dict, Tuple
|
||||||
|
|
||||||
|
import marl_factory_grid.modules.destinations.constants
|
||||||
from marl_factory_grid.environment.rules import Rule
|
from marl_factory_grid.environment.rules import Rule
|
||||||
from marl_factory_grid.utils.results import TickResult, DoneResult
|
from marl_factory_grid.utils.results import TickResult, DoneResult
|
||||||
from marl_factory_grid.environment import constants as c
|
from marl_factory_grid.environment import constants as c
|
||||||
|
|
||||||
from marl_factory_grid.modules.destinations import constants as d, rewards as r
|
from marl_factory_grid.modules.destinations import constants as d
|
||||||
from marl_factory_grid.modules.destinations.entitites import Destination
|
from marl_factory_grid.modules.destinations.entitites import Destination
|
||||||
|
|
||||||
|
|
||||||
class DestinationReachAll(Rule):
|
class DestinationReachReward(Rule):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, dest_reach_reward=marl_factory_grid.modules.destinations.constants.REWARD_DEST_REACHED):
|
||||||
super(DestinationReachAll, self).__init__()
|
"""
|
||||||
|
This rule introduces the basic functionality, so that targts (Destinations) can be reached and marked as such.
|
||||||
|
Additionally, rewards are reported.
|
||||||
|
|
||||||
|
:type dest_reach_reward: float
|
||||||
|
:param dest_reach_reward: Specifies the reward, agents get at destination reach.
|
||||||
|
|
||||||
|
"""
|
||||||
|
super(DestinationReachReward, self).__init__()
|
||||||
|
self.reward = dest_reach_reward
|
||||||
|
|
||||||
def tick_step(self, state) -> List[TickResult]:
|
def tick_step(self, state) -> List[TickResult]:
|
||||||
results = []
|
results = []
|
||||||
@@ -33,31 +44,69 @@ class DestinationReachAll(Rule):
|
|||||||
if reached:
|
if reached:
|
||||||
state.print(f'{dest.name} is reached now, mark as reached...')
|
state.print(f'{dest.name} is reached now, mark as reached...')
|
||||||
dest.mark_as_reached()
|
dest.mark_as_reached()
|
||||||
results.append(TickResult(self.name, validity=c.VALID, reward=r.DEST_REACHED, entity=agent))
|
results.append(TickResult(self.name, validity=c.VALID, reward=self.reward, entity=agent))
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
class DoneAtDestinationReachAll(DestinationReachReward):
|
||||||
|
|
||||||
|
def __init__(self, reward_at_done=marl_factory_grid.modules.destinations.constants.REWARD_DEST_DONE, **kwargs):
|
||||||
|
"""
|
||||||
|
This rule triggers and sets the done flag if ALL Destinations have been reached.
|
||||||
|
|
||||||
|
:type reward_at_done: object
|
||||||
|
:param reward_at_done: Specifies the reward, agent get, whenn all destinations are reached.
|
||||||
|
:type dest_reach_reward: float
|
||||||
|
:param dest_reach_reward: Specify the reward, agents get when reaching a single destination.
|
||||||
|
"""
|
||||||
|
super(DoneAtDestinationReachAll, self).__init__(**kwargs)
|
||||||
|
self.reward = reward_at_done
|
||||||
|
|
||||||
def on_check_done(self, state) -> List[DoneResult]:
|
def on_check_done(self, state) -> List[DoneResult]:
|
||||||
if all(x.was_reached() for x in state[d.DESTINATION]):
|
if all(x.was_reached() for x in state[d.DESTINATION]):
|
||||||
return [DoneResult(self.name, validity=c.VALID, reward=r.DEST_REACHED)]
|
return [DoneResult(self.name, validity=c.VALID, reward=self.reward)]
|
||||||
return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]
|
return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]
|
||||||
|
|
||||||
|
|
||||||
class DestinationReachAny(DestinationReachAll):
|
class DoneAtDestinationReachAny(DestinationReachReward):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, reward_at_done=d.REWARD_DEST_DONE, **kwargs):
|
||||||
super(DestinationReachAny, self).__init__()
|
f"""
|
||||||
|
This rule triggers and sets the done flag if ANY Destinations has been reached.
|
||||||
|
!!! IMPORTANT: 'reward_at_done' is shared between the agents; 'dest_reach_reward' is bound to a specific one.
|
||||||
|
|
||||||
|
:type reward_at_done: object
|
||||||
|
:param reward_at_done: Specifies the reward, all agent get, when any destinations has been reached.
|
||||||
|
Default {d.REWARD_DEST_DONE}
|
||||||
|
:type dest_reach_reward: float
|
||||||
|
:param dest_reach_reward: Specify a single agents reward forreaching a single destination.
|
||||||
|
Default {d.REWARD_DEST_REACHED}
|
||||||
|
"""
|
||||||
|
super(DoneAtDestinationReachAny, self).__init__(**kwargs)
|
||||||
|
self.reward = reward_at_done
|
||||||
|
|
||||||
def on_check_done(self, state) -> List[DoneResult]:
|
def on_check_done(self, state) -> List[DoneResult]:
|
||||||
if any(x.was_reached() for x in state[d.DESTINATION]):
|
if any(x.was_reached() for x in state[d.DESTINATION]):
|
||||||
return [DoneResult(self.name, validity=c.VALID, reward=r.DEST_REACHED)]
|
return [DoneResult(self.name, validity=c.VALID, reward=marl_factory_grid.modules.destinations.constants.REWARD_DEST_REACHED)]
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
class DestinationSpawn(Rule):
|
class SpawnDestinations(Rule):
|
||||||
|
|
||||||
def __init__(self, n_dests: int = 1, spawn_frequency: int = 5,
|
def __init__(self, n_dests: int = 1, spawn_mode: str = d.MODE_GROUPED):
|
||||||
spawn_mode: str = d.MODE_GROUPED):
|
f"""
|
||||||
super(DestinationSpawn, self).__init__()
|
Defines how destinations are initially spawned and respawned in addition.
|
||||||
|
!!! This rule introduces no kind of reward or Env.-Done condition!
|
||||||
|
|
||||||
|
:type n_dests: int
|
||||||
|
:param n_dests: How many destiantions should be maintained (and initally spawnewd) on the map?
|
||||||
|
:type spawn_mode: str
|
||||||
|
:param spawn_mode: One of {d.SPAWN_MODES}. {d.MODE_GROUPED}: Always wait for all Dstiantions do be gone,
|
||||||
|
then respawn after the given time. {d.MODE_SINGLE}: Just spawn every destination,
|
||||||
|
that has been reached, after the given time
|
||||||
|
|
||||||
|
"""
|
||||||
|
super(SpawnDestinations, self).__init__()
|
||||||
self.n_dests = n_dests
|
self.n_dests = n_dests
|
||||||
self.spawn_mode = spawn_mode
|
self.spawn_mode = spawn_mode
|
||||||
|
|
||||||
@@ -81,8 +130,18 @@ class DestinationSpawn(Rule):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class FixedDestinationSpawn(Rule):
|
class SpawnDestinationsPerAgent(Rule):
|
||||||
def __init__(self, per_agent_positions: Dict[str, List[Tuple[int, int]]]):
|
def __init__(self, per_agent_positions: Dict[str, List[Tuple[int, int]]]):
|
||||||
|
"""
|
||||||
|
Special rule, that spawn distinations, that are bound to a single agent a fixed set of positions.
|
||||||
|
Usefull for introducing specialists, etc. ..
|
||||||
|
|
||||||
|
!!! This rule does not introduce any reward or done condition.
|
||||||
|
|
||||||
|
:type per_agent_positions: Dict[str, List[Tuple[int, int]]
|
||||||
|
:param per_agent_positions: Please provide a dictionary with agent names as keys; and a list of possible
|
||||||
|
destiantion coords as value. Example: {Wolfgang: [(0, 0), (1, 1), ...]}
|
||||||
|
"""
|
||||||
super(Rule, self).__init__()
|
super(Rule, self).__init__()
|
||||||
self.per_agent_positions = {key: [ast.literal_eval(x) for x in val] for key, val in per_agent_positions.items()}
|
self.per_agent_positions = {key: [ast.literal_eval(x) for x in val] for key, val in per_agent_positions.items()}
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
from .actions import DoorUse
|
from .actions import DoorUse
|
||||||
from .entitites import Door, DoorIndicator
|
from .entitites import Door, DoorIndicator
|
||||||
from .groups import Doors
|
from .groups import Doors
|
||||||
from .rules import DoorAutoClose, DoorIndicateArea
|
from .rules import DoDoorAutoClose, IndicateDoorAreaInObservation
|
||||||
|
@@ -1,13 +1,19 @@
|
|||||||
from marl_factory_grid.environment.rules import Rule
|
|
||||||
from marl_factory_grid.environment import constants as c
|
from marl_factory_grid.environment import constants as c
|
||||||
|
from marl_factory_grid.environment.rules import Rule
|
||||||
from marl_factory_grid.utils.results import TickResult
|
from marl_factory_grid.utils.results import TickResult
|
||||||
from . import constants as d
|
from . import constants as d
|
||||||
from .entitites import DoorIndicator
|
from .entitites import DoorIndicator
|
||||||
|
|
||||||
|
|
||||||
class DoorAutoClose(Rule):
|
class DoDoorAutoClose(Rule):
|
||||||
|
|
||||||
def __init__(self, close_frequency: int = 10):
|
def __init__(self, close_frequency: int = 10):
|
||||||
|
"""
|
||||||
|
This rule closes doors, that have been opened automatically, when no entity is blocking the position.
|
||||||
|
|
||||||
|
:type close_frequency: int
|
||||||
|
:param close_frequency: How many ticks after opening, should the door close?
|
||||||
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.close_frequency = close_frequency
|
self.close_frequency = close_frequency
|
||||||
|
|
||||||
@@ -22,9 +28,16 @@ class DoorAutoClose(Rule):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
class DoorIndicateArea(Rule):
|
class IndicateDoorAreaInObservation(Rule):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
"""
|
||||||
|
Whether to add an indicator aka place a value around doors (within the door tile), for agents to see.
|
||||||
|
This could improve learning.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Todo: Discuss if this rather be a single entity with a standalone observation.
|
||||||
|
# Could then be combined with the "Combine"-approach.
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
def on_init(self, state, lvl_map):
|
def on_init(self, state, lvl_map):
|
||||||
|
@@ -27,9 +27,3 @@ class MachineRule(Rule):
|
|||||||
|
|
||||||
def on_check_done(self, state) -> List[DoneResult]:
|
def on_check_done(self, state) -> List[DoneResult]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class DoneOnBreakRule(Rule):
|
|
||||||
|
|
||||||
def on_check_done(self, state) -> List[DoneResult]:
|
|
||||||
pass
|
|
@@ -22,7 +22,7 @@ This file is used for:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
LEVELS_DIR = 'modules/levels' # for use in studies and experiments
|
LEVELS_DIR = 'levels' # for use in studies and experiments
|
||||||
STEPS_START = 1 # Define where to the stepcount; which is the first step
|
STEPS_START = 1 # Define where to the stepcount; which is the first step
|
||||||
|
|
||||||
IGNORED_DF_COLUMNS = ['Episode', 'Run', # For plotting, which values are ignored when loading monitor files
|
IGNORED_DF_COLUMNS = ['Episode', 'Run', # For plotting, which values are ignored when loading monitor files
|
||||||
|
@@ -88,11 +88,15 @@ class Gamestate(object):
|
|||||||
results.extend(self.rules.tick_pre_step_all(self))
|
results.extend(self.rules.tick_pre_step_all(self))
|
||||||
|
|
||||||
for idx, action_int in enumerate(actions):
|
for idx, action_int in enumerate(actions):
|
||||||
agent = self[c.AGENT][idx].clear_temp_state()
|
if not agent.var_is_paralyzed:
|
||||||
action = agent.actions[action_int]
|
agent = self[c.AGENT][idx].clear_temp_state()
|
||||||
action_result = action.do(agent, self)
|
action = agent.actions[action_int]
|
||||||
results.append(action_result)
|
action_result = action.do(agent, self)
|
||||||
agent.set_state(action_result)
|
results.append(action_result)
|
||||||
|
agent.set_state(action_result)
|
||||||
|
else:
|
||||||
|
self.print(f"{agent.name} is paralied because of: {agent.paralyze_reasons}")
|
||||||
|
continue
|
||||||
|
|
||||||
results.extend(self.rules.tick_step_all(self))
|
results.extend(self.rules.tick_step_all(self))
|
||||||
results.extend(self.rules.tick_post_step_all(self))
|
results.extend(self.rules.tick_post_step_all(self))
|
||||||
|
Reference in New Issue
Block a user