Merge branch 'main' into documentation

# Conflicts: # marl_factory_grid/configs/default_config.yaml # marl_factory_grid/configs/eight_puzzle.yaml # random_testrun.py
2025-06-26 13:11:36 +02:00 · 2023-11-28 12:06:26 +01:00
parent 6ce4226a20 17613c3ba9
commit c9ac92f044
20 changed files with 269 additions and 179 deletions
--- a/marl_factory_grid/configs/eight_puzzle.yaml
+++ b/marl_factory_grid/configs/eight_puzzle.yaml
@ -1,69 +1,89 @@
+# Gneral env. settings.
 General:
+  # Just the best seed.
  env_seed: 69
-  # Individual vs global rewards
+  # Each agent receives an inividual Reward.
  individual_rewards: true
-  # The level.txt file to load from marl_factory_grid/levels
+  # level file to load from .\levels\.
  level_name: eight_puzzle
-  # View Radius; 0 = full observatbility
+  # Partial Observability. 0 = Full Observation.
  pomdp_r: 0
-  # Print all messages and events
-  verbose: True
-  # Run tests
+  # Please do not spam me.
+  verbose: false
+  # Do not touch, WIP
  tests: false

-# In the "eight puzzle" there are 8 agents standing on a 3x3 map, each with a specific destination to reach.
+# RL Surrogates
 Agents:
+  # This defines the name of the agent. UTF-8
  Wolfgang:
+    # Section which defines the availabll Actions per Agent
    Actions:
-      Noop:
-        fail_reward: -0
-        valid_reward: 0
+      # Move4 adds 4 actions [`North`, `East`, `South`, `West`]
      Move4:
-        fail_reward: -0.1
-        valid_reward: -.01
+        # Reward specification which differ from the default.
+        # Agent does a valid move in the environment. He actually moves.
+        valid_reward: -0.1
+        # Agent wants to move, but fails.
+        fail_reward:  0
+      # NOOP aka agent does not do a thing.
+      Noop:
+        # The Agent decides to not do anything. Which is always valid.
+        valid_reward: 0
+        # Does not do anything, just using the same interface.
+        fail_reward: 0
+    # What the agent wants to see.
    Observations:
+      # The agent...
+      # sees other agents, but himself.
      - Other
+      # wants to see walls
      - Walls
+      # sees his associated Destination (singular). Use the Plural for `see all destinations`.
      - Destination
-    Clones:
-      - Juergen
-      - Soeren
-      - Walter
-      - Siggi
-      - Dennis
-      - Karl-Heinz
-      - Kevin
-    # multiple agents can not stand on the same location
+    # You want to have 7 clones, also possible to name them by giving names as list.
+    Clones: 7
+    # Agents are blocking their grid position from beeing entered by others.
    is_blocking_pos: true
-
+# Apart from agents, which additional endities do you want to load?
 Entities:
+  # Observable destinations, which can be reached by stepping on the same position. Has additional parameters...
  Destinations:
    # Let them spawn on closed doors and agent positions
    ignore_blocking: true
-    # We need a special spawn rule...
+    # For 8-Puzzle, we need a special spawn rule...
    spawnrule:
-      # ...which assigns the destinations per agent
-      SpawnDestinationsPerAgent:
-        # we use this parameter
-        coords_or_quantity:
-          # to enable and assign special positions per agent
-          Wolfgang: 1
-          Karl-Heinz: 1
-          Kevin: 1
-          Juergen: 1
-          Soeren: 1
-          Walter: 1
-          Siggi: 1
-          Dennis: 1
+      # ...which spawn a single position just underneath an associated agent.
+      SpawnDestinationOnAgent: {}  # There are no parameters, so we state empty kwargs.

+# This section defines which operations are performed beside agent action.
+# Without this section nothing happens, not even Done-condition checks.
+# Also, situation based rewards are specidief this way.
 Rules:
-  # Utilities
+  ## Utilities
+  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
+  # Can be omited/ignored if you do not want to take care of collisions at all.
+  #   This does not mean, that agents can not collide, its just ignored.
  WatchCollisions:
+    reward: 0
    done_at_collisions: false

-  # Done Conditions
-  # Done when all agents are standing on the correct destination at the same time
-  DoneAtDestinationReach:
-    condition: simultaneous
+  # In 8 Puzzle, do not randomize the start positions, rather move a random agent onto the single free position n-times.
+  DoRandomInitialSteps:
+    # How many times?
+    random_steps: 2
+
+  ## Done Conditions
+  # Maximum steps per episode. There is no reward for failing.
  DoneAtMaxStepsReached:
-    max_steps: 500
+    # After how many steps should the episode end?
+    max_steps: 200
+
+  # For 8 Puzzle we need a done condition that checks whether destinations have been reached, so...
+  DoneAtDestinationReach:
+    # On every step, should there be a reward for agets that reach their associated destination? No!
+    dest_reach_reward: 0  # Do not touch. This is usefull in other settings!
+    # Reward should only be given when all destiantions are reached in parallel!
+    condition: "simultanious"
+    # Reward if this is the case. Granted to each agent when all agents are at their target position simultaniously.
+    reward_at_done: 1