Merge branch 'main' into refactor_rename

# Conflicts:
#	marl_factory_grid/modules/clean_up/groups.py
#	marl_factory_grid/modules/clean_up/rules.py
#	marl_factory_grid/modules/destinations/rules.py
This commit is contained in:
Chanumask
2023-10-27 13:12:54 +02:00
31 changed files with 332 additions and 105 deletions

View File

@@ -60,7 +60,7 @@ Just define what your environment needs in a *yaml*-configfile like:
done_at_collisions: !!bool True done_at_collisions: !!bool True
ItemRespawn: ItemRespawn:
spawn_freq: 5 spawn_freq: 5
DoorAutoClose: {} DoDoorAutoClose: {}
Assets: Assets:
- Defaults - Defaults

View File

@@ -20,6 +20,14 @@ class Agent(Entity):
def var_can_move(self): def var_can_move(self):
return True return True
@property
def var_is_paralyzed(self):
return len(self._paralyzed)
@property
def paralyze_reasons(self):
return [x for x in self._paralyzed]
@property @property
def var_is_blocking_pos(self): def var_is_blocking_pos(self):
return False return False
@@ -57,6 +65,7 @@ class Agent(Entity):
def __init__(self, actions: List[Action], observations: List[str], *args, **kwargs): def __init__(self, actions: List[Action], observations: List[str], *args, **kwargs):
super(Agent, self).__init__(*args, **kwargs) super(Agent, self).__init__(*args, **kwargs)
self._paralyzed = set()
self.step_result = dict() self.step_result = dict()
self._actions = actions self._actions = actions
self._observations = observations self._observations = observations
@@ -75,6 +84,17 @@ class Agent(Entity):
def set_state(self, action_result): def set_state(self, action_result):
self._state = action_result self._state = action_result
def paralyze(self, reason):
self._paralyzed.add(reason)
return c.VALID
def de_paralyze(self, reason):
try:
self._paralyzed.remove(reason)
return c.VALID
except KeyError:
return c.NOT_VALID
def render(self): def render(self):
i = next(idx for idx, x in enumerate(self._collection) if x.name == self.name) i = next(idx for idx, x in enumerate(self._collection) if x.name == self.name)
curr_state = self.state curr_state = self.state

View File

@@ -99,7 +99,7 @@ class Entity(_Object, abc.ABC):
try: try:
self.bind_to(bind_to) self.bind_to(bind_to)
except AttributeError: except AttributeError:
print(f'Objects of {self.__class__.__name__} can not be bound to other entities.') print(f'Objects of class "{self.__class__.__name__}" can not be bound to other entities.')
exit() exit()
def summarize_state(self) -> dict: def summarize_state(self) -> dict:

View File

@@ -37,7 +37,7 @@ class Entities(_Objects):
return[x for val in self.pos_dict[pos] for x in val if x.var_can_collide] return[x for val in self.pos_dict[pos] for x in val if x.var_can_collide]
def empty_positions(self): def empty_positions(self):
empty_positions= [key for key in self.floorlist if self.pos_dict[key]] empty_positions = [key for key in self.floorlist if not self.pos_dict[key]]
shuffle(empty_positions) shuffle(empty_positions)
return empty_positions return empty_positions

View File

@@ -1,4 +1,4 @@
from .actions import BtryCharge from .actions import BtryCharge
from .entitites import Pod, Battery from .entitites import Pod, Battery
from .groups import ChargePods, Batteries from .groups import ChargePods, Batteries
from .rules import BtryDoneAtDischarge, Btry from .rules import BtryDoneAtDischarge, BatteryDecharge

View File

@@ -1,16 +1,17 @@
from typing import Union from typing import Union
import marl_factory_grid.modules.batteries.constants
from marl_factory_grid.environment.actions import Action from marl_factory_grid.environment.actions import Action
from marl_factory_grid.utils.results import ActionResult from marl_factory_grid.utils.results import ActionResult
from marl_factory_grid.modules.batteries import constants as b, rewards as r from marl_factory_grid.modules.batteries import constants as b
from marl_factory_grid.environment import constants as c from marl_factory_grid.environment import constants as c
class BtryCharge(Action): class BtryCharge(Action):
def __init__(self): def __init__(self):
super().__init__(b.CHARGE) super().__init__(b.ACTION_CHARGE)
def do(self, entity, state) -> Union[None, ActionResult]: def do(self, entity, state) -> Union[None, ActionResult]:
if charge_pod := state[b.CHARGE_PODS].by_pos(entity.pos): if charge_pod := state[b.CHARGE_PODS].by_pos(entity.pos):
@@ -23,4 +24,4 @@ class BtryCharge(Action):
valid = c.NOT_VALID valid = c.NOT_VALID
state.print(f'{entity.name} failed to charged batteries at {entity.pos}.') state.print(f'{entity.name} failed to charged batteries at {entity.pos}.')
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
reward=r.CHARGE_VALID if valid else r.CHARGE_FAIL) reward=marl_factory_grid.modules.batteries.constants.REWARD_CHARGE_VALID if valid else marl_factory_grid.modules.batteries.constants.Reward_CHARGE_FAIL)

View File

@@ -4,5 +4,14 @@ BATTERIES = 'Batteries'
BATTERY_DISCHARGED = 'DISCHARGED' BATTERY_DISCHARGED = 'DISCHARGED'
CHARGE_POD_SYMBOL = 1 CHARGE_POD_SYMBOL = 1
ACTION_CHARGE = 'do_charge_action'
CHARGE = 'do_charge_action' REWARD_CHARGE_VALID: float = 0.1
Reward_CHARGE_FAIL: float = -0.1
REWARD_BATTERY_DISCHARGED: float = -1.0
REWARD_DISCHARGE_DONE: float = -1.0
GROUPED = "single"
SINGLE = "grouped"
MODES = [GROUPED, SINGLE]

View File

@@ -1,3 +0,0 @@
CHARGE_VALID: float = 0.1
CHARGE_FAIL: float = -0.1
BATTERY_DISCHARGED: float = -1.0

View File

@@ -1,24 +1,58 @@
from typing import List, Union from typing import List, Union
import marl_factory_grid.modules.batteries.constants
from marl_factory_grid.environment.rules import Rule from marl_factory_grid.environment.rules import Rule
from marl_factory_grid.utils.results import TickResult, DoneResult from marl_factory_grid.utils.results import TickResult, DoneResult
from marl_factory_grid.environment import constants as c from marl_factory_grid.environment import constants as c
from marl_factory_grid.modules.batteries import constants as b, rewards as r from marl_factory_grid.modules.batteries import constants as b
class Btry(Rule): class BatteryDecharge(Rule):
def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02): def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02,
battery_charge_reward: float = b.REWARD_CHARGE_VALID,
battery_failed_reward: float = b.Reward_CHARGE_FAIL,
battery_discharge_reward: float = b.REWARD_BATTERY_DISCHARGED,
paralyze_agents_on_discharge: bool = False):
f"""
Enables the Battery Charge/Discharge functionality.
:type paralyze_agents_on_discharge: bool
:param paralyze_agents_on_discharge: Wether agents are still able to perform actions when discharged.
:type per_action_costs: Union[dict, float] = 0.02
:param per_action_costs: 1. dict: with an action name as key, provide a value for each
(maybe walking is less tedious as opening a door? Just saying...).
2. float: each action "costs" the same.
----
!!! Does not introduce any Env.-Done condition.
!!! Batterys can only be charged if agent posses the "Charge(Action.
!!! Batterys can only be charged if there are "Charpods" and they are spawned!
----
:type initial_charge: float
:param initial_charge: How much juice they have.
:type battery_discharge_reward: float
:param battery_discharge_reward: Negativ reward, when agents let their batters discharge.
Default: {b.REWARD_BATTERY_DISCHARGED}
:type battery_failed_reward: float
:param battery_failed_reward: Negativ reward, when agent cannot charge, but do (overcharge, not on station).
Default: {b.Reward_CHARGE_FAIL}
:type battery_charge_reward: float
:param battery_charge_reward: Positive reward, when agent actually charge their battery.
Default: {b.REWARD_CHARGE_VALID}
"""
super().__init__() super().__init__()
self.paralyze_agents_on_discharge = paralyze_agents_on_discharge
self.battery_discharge_reward = battery_discharge_reward
self.battery_failed_reward = battery_failed_reward
self.battery_charge_reward = battery_charge_reward
self.per_action_costs = per_action_costs self.per_action_costs = per_action_costs
self.initial_charge = initial_charge self.initial_charge = initial_charge
def on_init(self, state, lvl_map): def on_init(self, state, lvl_map):
assert len(state[c.AGENT]), "There are no agents, did you already spawn them?"
state[b.BATTERIES].spawn(state[c.AGENT], self.initial_charge) state[b.BATTERIES].spawn(state[c.AGENT], self.initial_charge)
def tick_pre_step(self, state) -> List[TickResult]:
pass
def tick_step(self, state) -> List[TickResult]: def tick_step(self, state) -> List[TickResult]:
# Decharge # Decharge
batteries = state[b.BATTERIES] batteries = state[b.BATTERIES]
@@ -42,27 +76,78 @@ class Btry(Rule):
if btry.is_discharged: if btry.is_discharged:
state.print(f'Battery of {btry.bound_entity.name} is discharged!') state.print(f'Battery of {btry.bound_entity.name} is discharged!')
results.append( results.append(
TickResult(self.name, entity=btry.bound_entity, reward=r.BATTERY_DISCHARGED, validity=c.VALID)) TickResult(self.name, entity=btry.bound_entity, reward=self.battery_discharge_reward,
else: validity=c.VALID)
pass )
if self.paralyze_agents_on_discharge:
btry.bound_entity.paralyze(self.name)
results.append(
TickResult("Paralyzed", entity=btry.bound_entity, reward=0, validity=c.VALID)
)
state.print(f'{btry.bound_entity.name} has just been paralyzed!')
if btry.bound_entity.var_is_paralyzed and not btry.is_discharged:
btry.bound_entity.de_paralyze(self.name)
results.append(
TickResult("De-Paralyzed", entity=btry.bound_entity, reward=0, validity=c.VALID)
)
state.print(f'{btry.bound_entity.name} has just been de-paralyzed!')
return results return results
class BtryDoneAtDischarge(Rule): class BtryDoneAtDischarge(BatteryDecharge):
def __init__(self): def __init__(self, reward_discharge_done=b.REWARD_DISCHARGE_DONE, mode: str = b.SINGLE, **kwargs):
super().__init__() f"""
Enables the Battery Charge/Discharge functionality. Additionally
:type mode: str
:param mode: Does this Done rule trigger, when any battery is or all batteries are discharged?
:type per_action_costs: Union[dict, float] = 0.02
:param per_action_costs: 1. dict: with an action name as key, provide a value for each
(maybe walking is less tedious as opening a door? Just saying...).
2. float: each action "costs" the same.
:type initial_charge: float
:param initial_charge: How much juice they have.
:type reward_discharge_done: float
:param reward_discharge_done: Global negativ reward, when agents let their batters discharge.
Default: {b.REWARD_BATTERY_DISCHARGED}
:type battery_discharge_reward: float
:param battery_discharge_reward: Negativ reward, when agents let their batters discharge.
Default: {b.REWARD_BATTERY_DISCHARGED}
:type battery_failed_reward: float
:param battery_failed_reward: Negativ reward, when agent cannot charge, but do (overcharge, not on station).
Default: {b.Reward_CHARGE_FAIL}
:type battery_charge_reward: float
:param battery_charge_reward: Positive reward, when agent actually charge their battery.
Default: {b.REWARD_CHARGE_VALID}
"""
super().__init__(**kwargs)
self.mode = mode
self.reward_discharge_done = reward_discharge_done
def on_check_done(self, state) -> List[DoneResult]: def on_check_done(self, state) -> List[DoneResult]:
if btry_done := any(battery.is_discharged for battery in state[b.BATTERIES]): any_discharged = (self.mode == b.SINGLE and any(battery.is_discharged for battery in state[b.BATTERIES]))
return [DoneResult(self.name, validity=c.VALID, reward=r.BATTERY_DISCHARGED)] all_discharged = (self.mode == b.SINGLE and all(battery.is_discharged for battery in state[b.BATTERIES]))
if any_discharged or all_discharged:
return [DoneResult(self.name, validity=c.VALID, reward=self.reward_discharge_done)]
else: else:
return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)] return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]
class PodRules(Rule): class SpawnChargePods(Rule):
def __init__(self, n_pods: int, charge_rate: float = 0.4, multi_charge: bool = False): def __init__(self, n_pods: int, charge_rate: float = 0.4, multi_charge: bool = False):
"""
Spawn Chargepods in accordance to the given parameters.
:type n_pods: int
:param n_pods: How many charge pods are there?
:type charge_rate: float
:param charge_rate: How much juice does each use of the charge action top up?
:type multi_charge: bool
:param multi_charge: Whether multiple agents are able to charge at the same time.
"""
super().__init__() super().__init__()
self.multi_charge = multi_charge self.multi_charge = multi_charge
self.charge_rate = charge_rate self.charge_rate = charge_rate
@@ -73,5 +158,5 @@ class PodRules(Rule):
empty_positions = state.entities.empty_positions() empty_positions = state.entities.empty_positions()
pods = pod_collection.from_coordinates(empty_positions, entity_kwargs=dict( pods = pod_collection.from_coordinates(empty_positions, entity_kwargs=dict(
multi_charge=self.multi_charge, charge_rate=self.charge_rate) multi_charge=self.multi_charge, charge_rate=self.charge_rate)
) )
pod_collection.add_items(pods) pod_collection.add_items(pods)

View File

@@ -1,4 +1,4 @@
from .actions import CleanUp from .actions import CleanUp
from .entitites import DirtPile from .entitites import DirtPile
from .groups import DirtPiles from .groups import DirtPiles
from .rules import DirtRespawnRule, DirtSmearOnMove, DirtAllCleanDone from .rules import SpawnDirt, EntitiesSmearDirtOnMove, DoneOnAllDirtCleaned

View File

@@ -32,11 +32,9 @@ class DirtPile(Entity):
# Edit this if you want items to be drawn in the ops differntly # Edit this if you want items to be drawn in the ops differntly
return self._amount return self._amount
def __init__(self, *args, max_local_amount=5, initial_amount=2, spawn_variation=0.05, **kwargs): def __init__(self, *args, amount=2, max_local_amount=5, **kwargs):
super(DirtPile, self).__init__(*args, **kwargs) super(DirtPile, self).__init__(*args, **kwargs)
self._amount = abs(initial_amount + ( self._amount = amount
random.normal(loc=0, scale=spawn_variation, size=1).item() * initial_amount)
)
self.max_local_amount = max_local_amount self.max_local_amount = max_local_amount
def set_new_amount(self, amount): def set_new_amount(self, amount):

View File

@@ -1,4 +1,5 @@
from marl_factory_grid.environment import constants as c from marl_factory_grid.environment import constants as c
from marl_factory_grid.utils.results import Result
from marl_factory_grid.environment.groups.collection import Collection from marl_factory_grid.environment.groups.collection import Collection
from marl_factory_grid.modules.clean_up.entitites import DirtPile from marl_factory_grid.modules.clean_up.entitites import DirtPile
@@ -27,42 +28,44 @@ class DirtPiles(Collection):
return sum([dirt.amount for dirt in self]) return sum([dirt.amount for dirt in self])
def __init__(self, *args, def __init__(self, *args,
initial_amount=2,
initial_dirt_ratio=0.05,
dirt_spawn_r_var=0.1,
max_local_amount=5, max_local_amount=5,
clean_amount=1, clean_amount=1,
max_global_amount: int = 20, **kwargs): max_global_amount: int = 20, **kwargs):
super(DirtPiles, self).__init__(*args, **kwargs) super(DirtPiles, self).__init__(*args, **kwargs)
self.clean_amount = clean_amount self.clean_amount = clean_amount
self.initial_amount = initial_amount
self.initial_dirt_ratio = initial_dirt_ratio
self.dirt_spawn_r_var = dirt_spawn_r_var
self.max_global_amount = max_global_amount self.max_global_amount = max_global_amount
self.max_local_amount = max_local_amount self.max_local_amount = max_local_amount
def spawn(self, then_dirty_positions, amount) -> bool: def spawn(self, then_dirty_positions, amount_s) -> Result:
for pos in then_dirty_positions: spawn_counter = 0
for idx, pos in enumerate(then_dirty_positions):
if not self.amount > self.max_global_amount: if not self.amount > self.max_global_amount:
amount = amount_s[idx] if isinstance(amount_s, list) else amount_s
if dirt := self.by_pos(pos): if dirt := self.by_pos(pos):
new_value = dirt.amount + amount new_value = dirt.amount + amount
dirt.set_new_amount(new_value) dirt.set_new_amount(new_value)
else: else:
dirt = DirtPile(pos, initial_amount=amount, spawn_variation=self.dirt_spawn_r_var) dirt = DirtPile(pos, amount=amount)
self.add_item(dirt) self.add_item(dirt)
spawn_counter += 1
else: else:
return c.NOT_VALID return Result(identifier=f'{self.name}_spawn', validity=c.NOT_VALID, reward=0,
return c.VALID value=spawn_counter)
return Result(identifier=f'{self.name}_spawn', validity=c.VALID, reward=0, value=spawn_counter)
def trigger_dirt_spawn(self, state, initial_spawn=False) -> bool: def trigger_dirt_spawn(self, n, amount, state, n_var=0.2, amount_var=0.2) -> Result:
free_for_dirt = [x for x in state.entities.floorlist if len(state.entities.pos_dict[x]) == 1 or ( free_for_dirt = [x for x in state.entities.floorlist if len(state.entities.pos_dict[x]) == 1 or (
len(state.entities.pos_dict[x]) == 2 and isinstance(next(y for y in x), DirtPile))] len(state.entities.pos_dict[x]) == 2 and isinstance(next(y for y in x), DirtPile))]
# free_for_dirt = [x for x in state[c.FLOOR]
# if len(x.guests) == 0 or (
# len(x.guests) == 1 and
# isinstance(next(y for y in x.guests), DirtPile))]
state.rng.shuffle(free_for_dirt) state.rng.shuffle(free_for_dirt)
var = self.dirt_spawn_r_var new_spawn = int(abs(n + (state.rng.uniform(-n_var, n_var))))
new_spawn = abs(self.initial_dirt_ratio + (state.rng.uniform(-var, var) if initial_spawn else 0)) new_amount_s = [abs(amount + (amount*state.rng.uniform(-amount_var, amount_var))) for _ in range(new_spawn)]
n_dirty_positions = max(0, int(new_spawn * len(free_for_dirt))) n_dirty_positions = free_for_dirt[:new_spawn]
return self.spawn(free_for_dirt[:n_dirty_positions], self.initial_amount) return self.spawn(n_dirty_positions, new_amount_s)
def __repr__(self): def __repr__(self):
s = super(DirtPiles, self).__repr__() s = super(DirtPiles, self).__repr__()

View File

@@ -7,53 +7,98 @@ from marl_factory_grid.utils.results import TickResult
from marl_factory_grid.utils.results import DoneResult from marl_factory_grid.utils.results import DoneResult
class DirtAllCleanDone(Rule): class DoneOnAllDirtCleaned(Rule):
def __init__(self): def __init__(self, reward: float = r.CLEAN_UP_ALL):
"""
Defines a 'Done'-condition which tirggers, when there is no more 'Dirt' in the environment.
:type reward: float
:parameter reward: Given reward when condition triggers.
"""
super().__init__() super().__init__()
self.reward = reward
def on_check_done(self, state) -> [DoneResult]: def on_check_done(self, state) -> [DoneResult]:
if len(state[d.DIRT]) == 0 and state.curr_step: if len(state[d.DIRT]) == 0 and state.curr_step:
return [DoneResult(validity=c.VALID, identifier=self.name, reward=r.CLEAN_UP_ALL)] return [DoneResult(validity=c.VALID, identifier=self.name, reward=self.reward)]
return [DoneResult(validity=c.NOT_VALID, identifier=self.name, reward=0)] return [DoneResult(validity=c.NOT_VALID, identifier=self.name, reward=0)]
class DirtRespawnRule(Rule): class SpawnDirt(Rule):
def __init__(self, spawn_freq=15): def __init__(self, initial_n: int, initial_amount: float, respawn_n: int, respawn_amount: float,
n_var: float = 0.2, amount_var: float = 0.2, spawn_freq: int = 15):
"""
Defines the spawn pattern of intial and additional 'Dirt'-entitites.
First chooses positions, then trys to spawn dirt until 'respawn_n' or the maximal global amount is reached.
If there is allready some, it is topped up to min(max_local_amount, amount).
:type spawn_freq: int
:parameter spawn_freq: In which frequency should this Rule try to spawn new 'Dirt'?
:type respawn_n: int
:parameter respawn_n: How many respawn positions are considered.
:type initial_n: int
:parameter initial_n: How much initial positions are considered.
:type amount_var: float
:parameter amount_var: Variance of amount to spawn.
:type n_var: float
:parameter n_var: Variance of n to spawn.
:type respawn_amount: float
:parameter respawn_amount: Defines how much dirt 'amount' is placed every 'spawn_freq' ticks.
:type initial_amount: float
:parameter initial_amount: Defines how much dirt 'amount' is initially placed.
"""
super().__init__() super().__init__()
self.amount_var = amount_var
self.n_var = n_var
self.respawn_amount = respawn_amount
self.respawn_n = respawn_n
self.initial_amount = initial_amount
self.initial_n = initial_n
self.spawn_freq = spawn_freq self.spawn_freq = spawn_freq
self._next_dirt_spawn = spawn_freq self._next_dirt_spawn = spawn_freq
def on_init(self, state, lvl_map) -> str: def on_init(self, state, lvl_map) -> str:
state[d.DIRT].trigger_dirt_spawn(state, initial_spawn=True) result = state[d.DIRT].trigger_dirt_spawn(self.initial_n, self.initial_amount, state,
return f'Initial Dirt was spawned on: {[x.pos for x in state[d.DIRT]]}' n_var=self.n_var, amount_var=self.amount_var)
state.print(f'Initial Dirt was spawned on: {[x.pos for x in state[d.DIRT]]}')
return result
def tick_step(self, state): def tick_step(self, state):
if self._next_dirt_spawn < 0: if self._next_dirt_spawn < 0:
pass # No DirtPile Spawn pass # No DirtPile Spawn
elif not self._next_dirt_spawn: elif not self._next_dirt_spawn:
validity = state[d.DIRT].trigger_dirt_spawn(state) result = [state[d.DIRT].trigger_dirt_spawn(self.respawn_n, self.respawn_amount, state,
n_var=self.n_var, amount_var=self.amount_var)]
return [TickResult(entity=None, validity=validity, identifier=self.name, reward=0)]
self._next_dirt_spawn = self.spawn_freq self._next_dirt_spawn = self.spawn_freq
else: else:
self._next_dirt_spawn -= 1 self._next_dirt_spawn -= 1
return [] result = []
return result
class DirtSmearOnMove(Rule): class EntitiesSmearDirtOnMove(Rule):
def __init__(self, smear_amount: float = 0.2): def __init__(self, smear_ratio: float = 0.2):
"""
Enables 'smearing'. Entities that move through dirt, will leave a trail behind them.
They take dirt * smear_ratio of it with them to their next position.
:type smear_ratio: float
:parameter smear_ratio: How much percent dirt is smeared by entities to their next position.
"""
assert smear_ratio < 1, "'Smear Amount' must be smaller than 1"
super().__init__() super().__init__()
self.smear_amount = smear_amount self.smear_ratio = smear_ratio
def tick_post_step(self, state): def tick_post_step(self, state):
results = list() results = list()
for entity in state.moving_entites: for entity in state.moving_entites:
if is_move(entity.state.identifier) and entity.state.validity == c.VALID: if is_move(entity.state.identifier) and entity.state.validity == c.VALID:
if old_pos_dirt := state[d.DIRT].by_pos(entity.last_pos): if old_pos_dirt := state[d.DIRT].by_pos(entity.last_pos):
if smeared_dirt := round(old_pos_dirt.amount * self.smear_amount, 2): if smeared_dirt := round(old_pos_dirt.amount * self.smear_ratio, 2):
if state[d.DIRT].spawn(entity.pos, amount=smeared_dirt): if state[d.DIRT].spawn(entity.pos, amount=smeared_dirt):
results.append(TickResult(identifier=self.name, entity=entity, results.append(TickResult(identifier=self.name, entity=entity,
reward=0, validity=c.VALID)) reward=0, validity=c.VALID))

View File

@@ -1,4 +1,4 @@
from .actions import DestAction from .actions import DestAction
from .entitites import Destination from .entitites import Destination
from .groups import Destinations from .groups import Destinations
from .rules import DestinationReachAll, DestinationSpawn from .rules import DoneAtDestinationReachAll, SpawnDestinations

View File

@@ -1,9 +1,10 @@
from typing import Union from typing import Union
import marl_factory_grid.modules.destinations.constants
from marl_factory_grid.environment.actions import Action from marl_factory_grid.environment.actions import Action
from marl_factory_grid.utils.results import ActionResult from marl_factory_grid.utils.results import ActionResult
from marl_factory_grid.modules.destinations import constants as d, rewards as r from marl_factory_grid.modules.destinations import constants as d
from marl_factory_grid.environment import constants as c from marl_factory_grid.environment import constants as c
@@ -20,4 +21,4 @@ class DestAction(Action):
valid = c.NOT_VALID valid = c.NOT_VALID
state.print(f'{entity.name} just tried to do_wait_action do_wait_action at {entity.pos} but failed') state.print(f'{entity.name} just tried to do_wait_action do_wait_action at {entity.pos} but failed')
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
reward=r.WAIT_VALID if valid else r.WAIT_FAIL) reward=marl_factory_grid.modules.destinations.constants.REWARD_WAIT_VALID if valid else marl_factory_grid.modules.destinations.constants.REWARD_WAIT_FAIL)

View File

@@ -3,10 +3,11 @@
DESTINATION = 'Destinations' DESTINATION = 'Destinations'
DEST_SYMBOL = 1 DEST_SYMBOL = 1
WAIT_ON_DEST = 'WAIT'
MODE_SINGLE = 'SINGLE' MODE_SINGLE = 'SINGLE'
MODE_GROUPED = 'GROUPED' MODE_GROUPED = 'GROUPED'
SPAWN_MODES = [MODE_SINGLE, MODE_GROUPED]
DONE_ALL = 'DONE_ALL' REWARD_WAIT_VALID: float = 0.1
DONE_SINGLE = 'DONE_SINGLE' REWARD_WAIT_FAIL: float = -0.1
REWARD_DEST_REACHED: float = 1.0
REWARD_DEST_DONE: float = 5.0

View File

@@ -1,3 +0,0 @@
WAIT_VALID: float = 0.1
WAIT_FAIL: float = -0.1
DEST_REACHED: float = 5.0

View File

@@ -1,18 +1,29 @@
import ast import ast
from random import shuffle from random import shuffle
from typing import List, Dict, Tuple from typing import List, Dict, Tuple
import marl_factory_grid.modules.destinations.constants
from marl_factory_grid.environment.rules import Rule from marl_factory_grid.environment.rules import Rule
from marl_factory_grid.utils.results import TickResult, DoneResult from marl_factory_grid.utils.results import TickResult, DoneResult
from marl_factory_grid.environment import constants as c from marl_factory_grid.environment import constants as c
from marl_factory_grid.modules.destinations import constants as d, rewards as r from marl_factory_grid.modules.destinations import constants as d
from marl_factory_grid.modules.destinations.entitites import Destination from marl_factory_grid.modules.destinations.entitites import Destination
class DestinationReachAll(Rule): class DestinationReachReward(Rule):
def __init__(self): def __init__(self, dest_reach_reward=marl_factory_grid.modules.destinations.constants.REWARD_DEST_REACHED):
super(DestinationReachAll, self).__init__() """
This rule introduces the basic functionality, so that targts (Destinations) can be reached and marked as such.
Additionally, rewards are reported.
:type dest_reach_reward: float
:param dest_reach_reward: Specifies the reward, agents get at destination reach.
"""
super(DestinationReachReward, self).__init__()
self.reward = dest_reach_reward
def tick_step(self, state) -> List[TickResult]: def tick_step(self, state) -> List[TickResult]:
results = [] results = []
@@ -33,31 +44,69 @@ class DestinationReachAll(Rule):
if reached: if reached:
state.print(f'{dest.name} is reached now, mark as reached...') state.print(f'{dest.name} is reached now, mark as reached...')
dest.mark_as_reached() dest.mark_as_reached()
results.append(TickResult(self.name, validity=c.VALID, reward=r.DEST_REACHED, entity=agent)) results.append(TickResult(self.name, validity=c.VALID, reward=self.reward, entity=agent))
return results return results
class DoneAtDestinationReachAll(DestinationReachReward):
def __init__(self, reward_at_done=marl_factory_grid.modules.destinations.constants.REWARD_DEST_DONE, **kwargs):
"""
This rule triggers and sets the done flag if ALL Destinations have been reached.
:type reward_at_done: object
:param reward_at_done: Specifies the reward, agent get, whenn all destinations are reached.
:type dest_reach_reward: float
:param dest_reach_reward: Specify the reward, agents get when reaching a single destination.
"""
super(DoneAtDestinationReachAll, self).__init__(**kwargs)
self.reward = reward_at_done
def on_check_done(self, state) -> List[DoneResult]: def on_check_done(self, state) -> List[DoneResult]:
if all(x.was_reached() for x in state[d.DESTINATION]): if all(x.was_reached() for x in state[d.DESTINATION]):
return [DoneResult(self.name, validity=c.VALID, reward=r.DEST_REACHED)] return [DoneResult(self.name, validity=c.VALID, reward=self.reward)]
return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)] return [DoneResult(self.name, validity=c.NOT_VALID, reward=0)]
class DestinationReachAny(DestinationReachAll): class DoneAtDestinationReachAny(DestinationReachReward):
def __init__(self): def __init__(self, reward_at_done=d.REWARD_DEST_DONE, **kwargs):
super(DestinationReachAny, self).__init__() f"""
This rule triggers and sets the done flag if ANY Destinations has been reached.
!!! IMPORTANT: 'reward_at_done' is shared between the agents; 'dest_reach_reward' is bound to a specific one.
:type reward_at_done: object
:param reward_at_done: Specifies the reward, all agent get, when any destinations has been reached.
Default {d.REWARD_DEST_DONE}
:type dest_reach_reward: float
:param dest_reach_reward: Specify a single agents reward forreaching a single destination.
Default {d.REWARD_DEST_REACHED}
"""
super(DoneAtDestinationReachAny, self).__init__(**kwargs)
self.reward = reward_at_done
def on_check_done(self, state) -> List[DoneResult]: def on_check_done(self, state) -> List[DoneResult]:
if any(x.was_reached() for x in state[d.DESTINATION]): if any(x.was_reached() for x in state[d.DESTINATION]):
return [DoneResult(self.name, validity=c.VALID, reward=r.DEST_REACHED)] return [DoneResult(self.name, validity=c.VALID, reward=marl_factory_grid.modules.destinations.constants.REWARD_DEST_REACHED)]
return [] return []
class DestinationSpawn(Rule): class SpawnDestinations(Rule):
def __init__(self, n_dests: int = 1, spawn_frequency: int = 5, def __init__(self, n_dests: int = 1, spawn_mode: str = d.MODE_GROUPED):
spawn_mode: str = d.MODE_GROUPED): f"""
super(DestinationSpawn, self).__init__() Defines how destinations are initially spawned and respawned in addition.
!!! This rule introduces no kind of reward or Env.-Done condition!
:type n_dests: int
:param n_dests: How many destiantions should be maintained (and initally spawnewd) on the map?
:type spawn_mode: str
:param spawn_mode: One of {d.SPAWN_MODES}. {d.MODE_GROUPED}: Always wait for all Dstiantions do be gone,
then respawn after the given time. {d.MODE_SINGLE}: Just spawn every destination,
that has been reached, after the given time
"""
super(SpawnDestinations, self).__init__()
self.n_dests = n_dests self.n_dests = n_dests
self.spawn_mode = spawn_mode self.spawn_mode = spawn_mode
@@ -81,8 +130,18 @@ class DestinationSpawn(Rule):
pass pass
class FixedDestinationSpawn(Rule): class SpawnDestinationsPerAgent(Rule):
def __init__(self, per_agent_positions: Dict[str, List[Tuple[int, int]]]): def __init__(self, per_agent_positions: Dict[str, List[Tuple[int, int]]]):
"""
Special rule, that spawn distinations, that are bound to a single agent a fixed set of positions.
Usefull for introducing specialists, etc. ..
!!! This rule does not introduce any reward or done condition.
:type per_agent_positions: Dict[str, List[Tuple[int, int]]
:param per_agent_positions: Please provide a dictionary with agent names as keys; and a list of possible
destiantion coords as value. Example: {Wolfgang: [(0, 0), (1, 1), ...]}
"""
super(Rule, self).__init__() super(Rule, self).__init__()
self.per_agent_positions = {key: [ast.literal_eval(x) for x in val] for key, val in per_agent_positions.items()} self.per_agent_positions = {key: [ast.literal_eval(x) for x in val] for key, val in per_agent_positions.items()}

View File

@@ -1,4 +1,4 @@
from .actions import DoorUse from .actions import DoorUse
from .entitites import Door, DoorIndicator from .entitites import Door, DoorIndicator
from .groups import Doors from .groups import Doors
from .rules import DoorAutoClose, DoorIndicateArea from .rules import DoDoorAutoClose, IndicateDoorAreaInObservation

View File

@@ -1,13 +1,19 @@
from marl_factory_grid.environment.rules import Rule
from marl_factory_grid.environment import constants as c from marl_factory_grid.environment import constants as c
from marl_factory_grid.environment.rules import Rule
from marl_factory_grid.utils.results import TickResult from marl_factory_grid.utils.results import TickResult
from . import constants as d from . import constants as d
from .entitites import DoorIndicator from .entitites import DoorIndicator
class DoorAutoClose(Rule): class DoDoorAutoClose(Rule):
def __init__(self, close_frequency: int = 10): def __init__(self, close_frequency: int = 10):
"""
This rule closes doors, that have been opened automatically, when no entity is blocking the position.
:type close_frequency: int
:param close_frequency: How many ticks after opening, should the door close?
"""
super().__init__() super().__init__()
self.close_frequency = close_frequency self.close_frequency = close_frequency
@@ -22,9 +28,16 @@ class DoorAutoClose(Rule):
return [] return []
class DoorIndicateArea(Rule): class IndicateDoorAreaInObservation(Rule):
def __init__(self): def __init__(self):
"""
Whether to add an indicator aka place a value around doors (within the door tile), for agents to see.
This could improve learning.
"""
# Todo: Discuss if this rather be a single entity with a standalone observation.
# Could then be combined with the "Combine"-approach.
super().__init__() super().__init__()
def on_init(self, state, lvl_map): def on_init(self, state, lvl_map):

View File

@@ -27,9 +27,3 @@ class MachineRule(Rule):
def on_check_done(self, state) -> List[DoneResult]: def on_check_done(self, state) -> List[DoneResult]:
pass pass
class DoneOnBreakRule(Rule):
def on_check_done(self, state) -> List[DoneResult]:
pass

View File

@@ -22,7 +22,7 @@ This file is used for:
""" """
LEVELS_DIR = 'modules/levels' # for use in studies and experiments LEVELS_DIR = 'levels' # for use in studies and experiments
STEPS_START = 1 # Define where to the stepcount; which is the first step STEPS_START = 1 # Define where to the stepcount; which is the first step
IGNORED_DF_COLUMNS = ['Episode', 'Run', # For plotting, which values are ignored when loading monitor files IGNORED_DF_COLUMNS = ['Episode', 'Run', # For plotting, which values are ignored when loading monitor files

View File

@@ -88,11 +88,15 @@ class Gamestate(object):
results.extend(self.rules.tick_pre_step_all(self)) results.extend(self.rules.tick_pre_step_all(self))
for idx, action_int in enumerate(actions): for idx, action_int in enumerate(actions):
agent = self[c.AGENT][idx].clear_temp_state() if not agent.var_is_paralyzed:
action = agent.actions[action_int] agent = self[c.AGENT][idx].clear_temp_state()
action_result = action.do(agent, self) action = agent.actions[action_int]
results.append(action_result) action_result = action.do(agent, self)
agent.set_state(action_result) results.append(action_result)
agent.set_state(action_result)
else:
self.print(f"{agent.name} is paralied because of: {agent.paralyze_reasons}")
continue
results.extend(self.rules.tick_step_all(self)) results.extend(self.rules.tick_step_all(self))
results.extend(self.rules.tick_post_step_all(self)) results.extend(self.rules.tick_post_step_all(self))