Added explanation for narrow_corridor.yaml

2025-12-20 05:56:07 +01:00 · 2023-11-10 06:54:38 +01:00
parent 06a5130b25
commit a9462a8b6f
3 changed files with 40 additions and 6 deletions
--- a/marl_factory_grid/configs/narrow_corridor.yaml
+++ b/marl_factory_grid/configs/narrow_corridor.yaml
@@ -1,23 +1,41 @@
-General:
+eneral:
  # Your Seed
  env_seed: 69
  # Individual or global rewards?
  individual_rewards: true
  # The level.txt file to load
  level_name: narrow_corridor
  # View Radius; 0 = full observatbility
  pomdp_r: 0
  # print all messages and events
  verbose: true
 Agents:
  # Agents are identified by their name 
  Wolfgang:
    # The available actions for this particular agent
    Actions:
    # Able to do nothing
    - Noop
    # Able to move in all 8 directions
    - Move8
    # Stuff the agent can observe (per 2d slice)
    #   use "Combined" if you want to merge multiple slices into one
    Observations:
    # He sees walls
    - Walls
    # he sees other agent, "karl-Heinz" in this setting would be fine, too
    - Other
    # He can see Destinations, that are assigned to him (hence the singular) 
    - Destination
    # Avaiable Spawn Positions as list
    Positions:
      - (2, 1)
      - (2, 5)
    # It is okay to collide with other agents, so that 
    #   they end up on the same position
    is_blocking_pos: true
  # See Above....
  Karl-Heinz:
    Actions:
      - Noop
@@ -31,12 +49,19 @@ Agents:
      - (2, 5)
    is_blocking_pos: true
 # Other noteworthy Entitites
 Entities:
  # The destiantions or positional targets to reach
  Destinations:
    # Let them spawn on closed doors and agent positions
    ignore_blocking: true
    # We need a special spawn rule...
    spawnrule:
      # ...which assigns the destinations per agent
      SpawnDestinationsPerAgent:
        # we use this parameter
        coords_or_quantity:
          # to enable and assign special positions per agent
          Wolfgang:
              - (2, 1)
              - (2, 5)
@@ -47,12 +72,18 @@ Entities:
    # GlobalPositions:
    #   normalized: false
 # Define the env. dynamics
 Rules:
  # Utilities
  #  This rule Checks for Collision, also it assigns the (negative) reward
  WatchCollisions:
    reward: -0.1
    reward_at_done: -1
    done_at_collisions: false
  # Done Conditions
  #   Load any of the rules, to check for done conditions. 
  # DoneAtDestinationReachAny:
  DoneAtDestinationReachAll:
  #  reward_at_done: 1
  DoneAtMaxStepsReached:
    max_steps: 500
--- a/marl_factory_grid/environment/rewards.py
+++ b/marl_factory_grid/environment/rewards.py
@@ -2,3 +2,4 @@ MOVEMENTS_VALID: float = -0.001
 MOVEMENTS_FAIL: float  = -0.05
 NOOP: float            = -0.01
 COLLISION: float       = -0.5
 COLLISION_DONE: float  = -1
--- a/marl_factory_grid/environment/rules.py
+++ b/marl_factory_grid/environment/rules.py
@@ -126,8 +126,10 @@ class AssignGlobalPositions(Rule):
 class WatchCollisions(Rule):
-    def __init__(self, done_at_collisions: bool = False):
+    def __init__(self, reward=r.COLLISION, done_at_collisions: bool = False, reward_at_done=r.COLLISION_DONE):
        super().__init__()
        self.reward_at_done = reward_at_done
        self.reward = reward
        self.done_at_collisions = done_at_collisions
        self.curr_done = False
@@ -140,12 +142,12 @@ class WatchCollisions(Rule):
            if len(guests) >= 2:
                for i, guest in enumerate(guests):
                    try:
-                        guest.set_state(TickResult(identifier=c.COLLISION, reward=r.COLLISION,
+                        guest.set_state(TickResult(identifier=c.COLLISION, reward=self.reward,
                                                   validity=c.NOT_VALID, entity=self))
                    except AttributeError:
                        pass
                    results.append(TickResult(entity=guest, identifier=c.COLLISION,
-                                              reward=r.COLLISION, validity=c.VALID))
+                                              reward=self.reward, validity=c.VALID))
                self.curr_done = True if self.done_at_collisions else False
        return results
@@ -154,5 +156,5 @@ class WatchCollisions(Rule):
            inter_entity_collision_detected = self.curr_done
            move_failed = any(h.is_move(x.state.identifier) and not x.state.validity for x in state[c.AGENT])
            if inter_entity_collision_detected or move_failed:
-                return [DoneResult(validity=c.VALID, identifier=c.COLLISION, reward=r.COLLISION)]
+                return [DoneResult(validity=c.VALID, identifier=c.COLLISION, reward=self.reward_at_done)]
-        return [DoneResult(validity=c.NOT_VALID, identifier=self.name)]
+        return []