Merge branch 'main' into 'unit_testing'

# Conflicts: # marl_factory_grid/algorithms/static/TSP_dirt_agent.py # marl_factory_grid/utils/config_parser.py
2025-07-05 17:11:35 +02:00 · 2024-03-18 16:23:44 +01:00
parent 18a30ed17a fa1f8bec21
commit b3acb57b4e
98 changed files with 2608 additions and 554 deletions
--- a/marl_factory_grid/configs/clean_and_bring.yaml
+++ b/marl_factory_grid/configs/clean_and_bring.yaml
@ -1,17 +1,35 @@
+General:
+  # RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
+  env_seed: 69
+  # Individual vs global rewards
+  individual_rewards: true
+  # The level.txt file to load from marl_factory_grid/levels
+  level_name: rooms
+  # Radius of Partially observable Markov decision process
+  pomdp_r: 3
+  # Print all messages and events
+  verbose: true
+  # Run tests
+  tests: false
+
+# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
+# other agents aim to clean dirt piles.
 Agents:
+  # The clean agents
  Wolfgang:
    Actions:
-    - Move8
-    - DoorUse
-    - Clean
-    - Noop
+      - Move8
+      - DoorUse
+      - Clean
+      - Noop
    Observations:
-    - Walls
-    - Doors
-    - Other
-    - DirtPiles
+      - Walls
+      - Doors
+      - Other
+      - DirtPiles
    Clones: 8

+  # The item agent
  Juergen:
    Actions:
      - Move8
@ -38,37 +56,37 @@ Entities:
  DropOffLocations:
    coords_or_quantity: 1
    max_dropoff_storage_size: 0
-  Inventories: {}
+  Inventories: { }
  Items:
    coords_or_quantity: 5

-
-General:
-  env_seed: 69
-  individual_rewards: true
-  level_name: rooms
-  pomdp_r: 3
-  verbose: True
-  tests: false
-
+# Rules section specifies the rules governing the dynamics of the environment.
 Rules:
  # Environment Dynamics
+  # When stepping over a dirt pile, entities carry a ratio of the dirt to their next position
  EntitiesSmearDirtOnMove:
    smear_ratio: 0.2
+  # Doors automatically close after a certain number of time steps
  DoorAutoClose:
    close_frequency: 7

  # Respawn Stuff
+  # Define how dirt should respawn after the initial spawn
  RespawnDirt:
    respawn_freq: 30
+  # Define how items should respawn after the initial spawn
  RespawnItems:
    respawn_freq: 50

  # Utilities
+  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
+  # Can be omitted/ignored if you do not want to take care of collisions at all.
  WatchCollisions:
    done_at_collisions: false

  # Done Conditions
+  # Define the conditions for the environment to stop. Either success or a fail conditions.
+  # The environment stops when all dirt is cleaned
  DoneOnAllDirtCleaned:
  DoneAtMaxStepsReached:
    max_steps: 500
--- a/marl_factory_grid/configs/default_config.yaml
+++ b/marl_factory_grid/configs/default_config.yaml
@ -1,37 +1,74 @@
+# Default Configuration File
+
+General:
+  # RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
+  env_seed: 69
+  # Individual vs global rewards
+  individual_rewards: true
+  # The level.txt file to load from marl_factory_grid/levels
+  level_name: large
+  # View Radius; 0 = full observatbility
+  pomdp_r: 3
+  # Print all messages and events
+  verbose: false
+  # Run tests
+  tests: false
+
+# Agents section defines the characteristics of different agents in the environment.
+
+# An Agent requires a list of actions and observations.
+# Possible actions: Noop, Charge, Clean, DestAction, DoorUse, ItemAction, MachineAction, Move8, Move4, North, NorthEast, ...
+# Possible observations: All, Combined, GlobalPosition, Battery, ChargePods, DirtPiles, Destinations, Doors, Items, Inventory, DropOffLocations, Maintainers, ...
+# You can use 'clone' as the agent name to have multiple instances with either a list of names or an int specifying the number of clones.
 Agents:
  Wolfgang:
    Actions:
-    - Noop
-    - Charge
-    - Clean
-    - DestAction
-    - DoorUse
-    - ItemAction
-    - Move8
+      - Noop
+      - Charge
+      - Clean
+      - DestAction
+      - DoorUse
+      - ItemAction
+      - Move8
    Observations:
-    - Combined:
-      - Other
-      - Walls
-    - GlobalPosition
-    - Battery
-    - ChargePods
-    - DirtPiles
-    - Destinations
-    - Doors
-    - Items
-    - Inventory
-    - DropOffLocations
-    - Maintainers
-Entities:
+      - Combined:
+          - Other
+          - Walls
+      - GlobalPosition
+      - Battery
+      - ChargePods
+      - DirtPiles
+      - Destinations
+      - Doors
+      - Items
+      - Inventory
+      - DropOffLocations
+      - Maintainers

+# Entities section defines the initial parameters and behaviors of different entities in the environment.
+# Entities all spawn using coords_or_quantity, a number of entities or coordinates to place them.
+Entities:
+  # Batteries: Entities representing power sources for agents.
  Batteries:
    initial_charge: 0.8
    per_action_costs: 0.02
+
+  # ChargePods: Entities representing charging stations for Batteries.
  ChargePods:
    coords_or_quantity: 2
+
+  # Destinations: Entities representing target locations for agents.
+  # - spawn_mode: GROUPED or SINGLE. Determines how destinations are spawned.
  Destinations:
    coords_or_quantity: 1
    spawn_mode: GROUPED
+
+  # DirtPiles: Entities representing piles of dirt.
+  # - initial_amount: Initial amount of dirt in each pile.
+  # - clean_amount: Amount of dirt cleaned in each cleaning action.
+  # - dirt_spawn_r_var: Random variation in dirt spawn amounts.
+  # - max_global_amount: Maximum total amount of dirt allowed in the environment.
+  # - max_local_amount: Maximum amount of dirt allowed in one position.
  DirtPiles:
    coords_or_quantity: 10
    initial_amount: 2
@ -39,50 +76,71 @@ Entities:
    dirt_spawn_r_var: 0.1
    max_global_amount: 20
    max_local_amount: 5
+
+  # Doors are spawned using the level map.
  Doors:
+
+  # DropOffLocations: Entities representing locations where agents can drop off items.
+  # - max_dropoff_storage_size: Maximum storage capacity at each drop-off location.
  DropOffLocations:
    coords_or_quantity: 1
    max_dropoff_storage_size: 0
-  GlobalPositions: {}
-  Inventories: {}
+
+  # GlobalPositions.
+  GlobalPositions: { }
+
+  # Inventories: Entities representing inventories for agents.
+  Inventories: { }
+
+  # Items: Entities representing items in the environment.
  Items:
    coords_or_quantity: 5
+
+  # Machines: Entities representing machines in the environment.
  Machines:
    coords_or_quantity: 2
+
+  # Maintainers: Entities representing maintainers that aim to maintain machines.
  Maintainers:
    coords_or_quantity: 1
-  Zones: {}

-General:
-  env_seed: 69
-  individual_rewards: true
-  level_name: large
-  pomdp_r: 3
-  verbose: False
-  tests: false

+# Rules section specifies the rules governing the dynamics of the environment.
 Rules:
  # Environment Dynamics
+  # When stepping over a dirt pile, entities carry a ratio of the dirt to their next position
  EntitiesSmearDirtOnMove:
    smear_ratio: 0.2
+  # Doors automatically close after a certain number of time steps
  DoorAutoClose:
    close_frequency: 10
+  # Maintainers move at every time step
  MoveMaintainers:

  # Respawn Stuff
+  # Define how dirt should respawn after the initial spawn
  RespawnDirt:
    respawn_freq: 15
+  # Define how items should respawn after the initial spawn
  RespawnItems:
    respawn_freq: 15

  # Utilities
+  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
+  # Can be omitted/ignored if you do not want to take care of collisions at all.
  WatchCollisions:
    done_at_collisions: false

  # Done Conditions
+  # Define the conditions for the environment to stop. Either success or a fail conditions.
+  # The environment stops when an agent reaches a destination
  DoneAtDestinationReach:
+  # The environment stops when all dirt is cleaned
  DoneOnAllDirtCleaned:
+  # The environment stops when a battery is discharged
  DoneAtBatteryDischarge:
+  # The environment stops when a maintainer reports a collision
  DoneAtMaintainerCollision:
+  # The environment stops after max steps
  DoneAtMaxStepsReached:
    max_steps: 500
--- a/marl_factory_grid/configs/eight_puzzle.yaml
+++ b/marl_factory_grid/configs/eight_puzzle.yaml
@ -84,6 +84,6 @@ Rules:
    # On every step, should there be a reward for agets that reach their associated destination? No!
    dest_reach_reward: 0  # Do not touch. This is usefull in other settings!
    # Reward should only be given when all destiantions are reached in parallel!
-    condition: "simultanious"
+    condition: "simultaneous"
    # Reward if this is the case. Granted to each agent when all agents are at their target position simultaniously.
    reward_at_done: 1
--- a/marl_factory_grid/configs/narrow_corridor.yaml
+++ b/marl_factory_grid/configs/narrow_corridor.yaml
@ -1,14 +1,16 @@
 General:
  # Your Seed
  env_seed: 69
-  # Individual or global rewards?
+  # Individual vs global rewards
  individual_rewards: true
-  # The level.txt file to load
+  # The level.txt file to load from marl_factory_grid/levels
  level_name: narrow_corridor
  # View Radius; 0 = full observatbility
  pomdp_r: 0
  # print all messages and events
  verbose: true
+  # Run tests
+  tests: false

 Agents:
  # Agents are identified by their name 
--- a/marl_factory_grid/configs/two_rooms_one_door.yaml
+++ b/marl_factory_grid/configs/two_rooms_one_door.yaml
@ -1,51 +1,61 @@
 General:
  env_seed: 69
+  # Individual vs global rewards
  individual_rewards: true
+  # The level.txt file to load from marl_factory_grid/levels
  level_name: two_rooms
+  # View Radius; 0 = full observatbility
  pomdp_r: 3
+  # Print all messages and events
  verbose: false
+  # Run tests
+  tests: false

-Entities:
-  Destinations: {}
-  Doors: {}
-  GlobalPositions: {}
-  Zones: {}
-
-Rules:
-  # Init:
-  AssignGlobalPositions: {}
-  ZoneInit: {}
-  AgentSingleZonePlacement: {}
-  IndividualDestinationZonePlacement: {}
-  # Env Rules
-  MaxStepsReached:
-    max_steps: 10
-  Collision:
-    done_at_collisions: false
-  DoorAutoClose:
-    close_frequency: 10
-
+# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
+# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
 Agents:
  Wolfgang:
    Actions:
-    - Move8
-    - Noop
-    - DestAction
-    - DoorUse
+      - Move8
+      - Noop
+      - DestAction
+      - DoorUse
    Observations:
-    - Walls
-    - Other
-    - Doors
-    - Destination
+      - Walls
+      - Other
+      - Doors
+      - Destination
  Sigmund:
    Actions:
-    - Move8
-    - Noop
-    - DestAction
-    - DoorUse
+      - Move8
+      - Noop
+      - DestAction
+      - DoorUse
    Observations:
-    - Combined:
-      - Other
-      - Walls
-      - Destination
-      - Doors
+      - Combined:
+          - Other
+          - Walls
+          - Destination
+          - Doors
+
+Entities:
+  Destinations: { }
+  Doors: { }
+  GlobalPositions: { }
+
+Rules:
+  # Environment Dynamics
+  DoorAutoClose:
+    close_frequency: 10
+
+  # Utilities
+  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
+  WatchCollisions:
+    done_at_collisions: false
+
+  # Init
+  AssignGlobalPositions: { }
+
+  # Done Conditions
+  MaxStepsReached:
+    max_steps: 10