Added explanation for narrow_corridor.yaml

2025-07-07 01:51:35 +02:00 · 2023-11-10 06:54:38 +01:00
parent 06a5130b25
commit a9462a8b6f
3 changed files with 40 additions and 6 deletions
--- a/marl_factory_grid/configs/narrow_corridor.yaml
+++ b/marl_factory_grid/configs/narrow_corridor.yaml
@ -1,23 +1,41 @@
-General:
+eneral:
+  # Your Seed
  env_seed: 69
+  # Individual or global rewards?
  individual_rewards: true
+  # The level.txt file to load
  level_name: narrow_corridor
+  # View Radius; 0 = full observatbility
  pomdp_r: 0
+  # print all messages and events
  verbose: true

 Agents:
+  # Agents are identified by their name 
  Wolfgang:
+    # The available actions for this particular agent
    Actions:
+    # Able to do nothing
    - Noop
+    # Able to move in all 8 directions
    - Move8
+    # Stuff the agent can observe (per 2d slice)
+    #   use "Combined" if you want to merge multiple slices into one
    Observations:
+    # He sees walls
    - Walls
+    # he sees other agent, "karl-Heinz" in this setting would be fine, too
    - Other
+    # He can see Destinations, that are assigned to him (hence the singular) 
    - Destination
+    # Avaiable Spawn Positions as list
    Positions:
      - (2, 1)
      - (2, 5)
+    # It is okay to collide with other agents, so that 
+    #   they end up on the same position
    is_blocking_pos: true
+  # See Above....
  Karl-Heinz:
    Actions:
      - Noop
@ -31,12 +49,19 @@ Agents:
      - (2, 5)
    is_blocking_pos: true

+# Other noteworthy Entitites
 Entities:
+  # The destiantions or positional targets to reach
  Destinations:
+    # Let them spawn on closed doors and agent positions
    ignore_blocking: true
+    # We need a special spawn rule...
    spawnrule:
+      # ...which assigns the destinations per agent
      SpawnDestinationsPerAgent:
+        # we use this parameter
        coords_or_quantity:
+          # to enable and assign special positions per agent
          Wolfgang:
              - (2, 1)
              - (2, 5)
@ -47,12 +72,18 @@ Entities:
    # GlobalPositions:
    #   normalized: false

+# Define the env. dynamics
 Rules:
  # Utilities
+  #  This rule Checks for Collision, also it assigns the (negative) reward
  WatchCollisions:
+    reward: -0.1
+    reward_at_done: -1
    done_at_collisions: false
  # Done Conditions
+  #   Load any of the rules, to check for done conditions. 
  # DoneAtDestinationReachAny:
  DoneAtDestinationReachAll:
+  #  reward_at_done: 1
  DoneAtMaxStepsReached:
    max_steps: 500
--- a/marl_factory_grid/environment/rewards.py
+++ b/marl_factory_grid/environment/rewards.py
@ -2,3 +2,4 @@ MOVEMENTS_VALID: float = -0.001
 MOVEMENTS_FAIL: float  = -0.05
 NOOP: float            = -0.01
 COLLISION: float       = -0.5
+COLLISION_DONE: float  = -1
--- a/marl_factory_grid/environment/rules.py
+++ b/marl_factory_grid/environment/rules.py
@ -126,8 +126,10 @@ class AssignGlobalPositions(Rule):

 class WatchCollisions(Rule):

-    def __init__(self, done_at_collisions: bool = False):
+    def __init__(self, reward=r.COLLISION, done_at_collisions: bool = False, reward_at_done=r.COLLISION_DONE):
        super().__init__()
+        self.reward_at_done = reward_at_done
+        self.reward = reward
        self.done_at_collisions = done_at_collisions
        self.curr_done = False

@ -140,12 +142,12 @@ class WatchCollisions(Rule):
            if len(guests) >= 2:
                for i, guest in enumerate(guests):
                    try:
-                        guest.set_state(TickResult(identifier=c.COLLISION, reward=r.COLLISION,
+                        guest.set_state(TickResult(identifier=c.COLLISION, reward=self.reward,
                                                   validity=c.NOT_VALID, entity=self))
                    except AttributeError:
                        pass
                    results.append(TickResult(entity=guest, identifier=c.COLLISION,
-                                              reward=r.COLLISION, validity=c.VALID))
+                                              reward=self.reward, validity=c.VALID))
                self.curr_done = True if self.done_at_collisions else False
        return results

@ -154,5 +156,5 @@ class WatchCollisions(Rule):
            inter_entity_collision_detected = self.curr_done
            move_failed = any(h.is_move(x.state.identifier) and not x.state.validity for x in state[c.AGENT])
            if inter_entity_collision_detected or move_failed:
-                return [DoneResult(validity=c.VALID, identifier=c.COLLISION, reward=r.COLLISION)]
-        return [DoneResult(validity=c.NOT_VALID, identifier=self.name)]
+                return [DoneResult(validity=c.VALID, identifier=c.COLLISION, reward=self.reward_at_done)]
+        return []