Added commentary to configs

2025-07-08 02:21:36 +02:00 · 2024-05-25 18:31:18 +02:00
parent dca930636e
commit a78e0dd8a3
13 changed files with 75 additions and 69 deletions
--- a/marl_factory_grid/algorithms/rl/multi_agent_configs/dirt_quadrant_eval_config.yaml
+++ b/marl_factory_grid/algorithms/rl/multi_agent_configs/dirt_quadrant_eval_config.yaml
@ -1,11 +1,11 @@
 env:
  classname:          marl_factory_grid.environment.configs.marl_eval
  env_name:           "marl_eval/dirt_quadrant_eval_config"
-  n_agents:           2
-  eval_render:        True
-  save_and_log:       False
+  n_agents:           2 # Number of agents in the environment
+  eval_render:        True # If inference should be graphically visualized
+  save_and_log:       False # If configurations and potential logging files should be saved
 algorithm:
-  pile-order:         "smart" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
-  pile-observability: "single" # Options: "single", "all"
-  pile_all_done:      "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
+  pile-order:         "smart" # Triggers implementation of our emergence prevention mechanism. Agents consider distance to other agent
+  pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
+  pile_all_done:      "shared" # Indicates that agents don't have to collect the same dirt piles
  auxiliary_piles:    False # Dirt quadrant does not use this option
--- a/marl_factory_grid/algorithms/rl/multi_agent_configs/dirt_quadrant_eval_config_emergent.yaml
+++ b/marl_factory_grid/algorithms/rl/multi_agent_configs/dirt_quadrant_eval_config_emergent.yaml
@ -1,11 +1,12 @@
+# Configuration that shows emergent behavior in out dirt-quadrant environment
 env:
  classname:          marl_factory_grid.environment.configs.marl_eval
  env_name:           "marl_eval/dirt_quadrant_eval_config"
-  n_agents:           2
-  eval_render:        True
-  save_and_log:       False
+  n_agents:           2 # Number of agents in the environment
+  eval_render:        True # If inference should be graphically visualized
+  save_and_log:       False # If configurations and potential logging files should be saved
 algorithm:
-  pile-order:         "dynamic" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
-  pile-observability: "single" # Options: "single", "all"
-  pile_all_done:      "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
+  pile-order:         "dynamic" # Agents only decide on next target pile based on the distance to the respective piles
+  pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
+  pile_all_done:      "shared" # Indicates that agents don't have to collect the same dirt piles
  auxiliary_piles:    False # Dirt quadrant does not use this option
--- a/marl_factory_grid/algorithms/rl/multi_agent_configs/two_rooms_eval_config.yaml
+++ b/marl_factory_grid/algorithms/rl/multi_agent_configs/two_rooms_eval_config.yaml
@ -1,13 +1,15 @@
 env:
  classname:          marl_factory_grid.environment.configs.marl_eval
  env_name:           "marl_eval/two_rooms_eval_config"
-  n_agents:           2
-  eval_render:        True
-  save_and_log:       False
+  n_agents:           2 # Number of agents in the environment
+  eval_render:        True # If inference should be graphically visualized
+  save_and_log:       False # If configurations and potential logging files should be saved
 algorithm:
-  pile-order:         "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
-  pile-observability: "single" # Options: "single", "all"
-  pile_all_done:      "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
-  auxiliary_piles:    True # Use True to see emergent phenomenon and False to prevent it
+  # Piles (=encoded flags) are evenly distributed among the two agents and have to be collected in the order defined
+  # by the environment config (cf. coords_or_quantity)
+  pile-order:         "agents"
+  pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
+  pile_all_done:      "distributed" # Indicates that agents must clean their specifically assigned dirt piles
+  auxiliary_piles:    True # Allows agents to go to an auxiliary pile


--- a/marl_factory_grid/algorithms/rl/multi_agent_configs/two_rooms_eval_config_emergent.yaml
+++ b/marl_factory_grid/algorithms/rl/multi_agent_configs/two_rooms_eval_config_emergent.yaml
@ -1,13 +1,16 @@
+# Configuration that shows emergent behavior in our two-rooms environment
 env:
  classname:          marl_factory_grid.environment.configs.marl_eval
  env_name:           "marl_eval/two_rooms_eval_config_emergent"
-  n_agents:           2
-  eval_render:        True
-  save_and_log:       False
+  n_agents:           2 # Number of agents in the environment
+  eval_render:        True # If inference should be graphically visualized
+  save_and_log:       False # If configurations and potential logging files should be saved
 algorithm:
-  pile-order:         "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
-  pile-observability: "single" # Options: "single", "all"
-  pile_all_done:      "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
-  auxiliary_piles:    False # Use True to see emergent phenomenon and False to prevent it
+  # Piles (=encoded flags) are evenly distributed among the two agents and have to be collected in the order defined
+  # by the environment config (cf. coords_or_quantity)
+  pile-order:         "agents"
+  pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
+  pile_all_done:      "distributed" # Indicates that agents must clean their specifically assigned dirt piles
+  auxiliary_piles:    False # Shows emergent behavior


--- a/marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_eval_config.yaml
+++ b/marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_eval_config.yaml
@ -1,12 +1,12 @@
 env:
  classname:          marl_factory_grid.environment.configs.rl
  env_name:           "rl/dirt_quadrant_agent1_eval_config"
-  n_agents:           1
-  eval_render:        True
-  save_and_log:       False
+  n_agents:           1 # Number of agents in the environment
+  eval_render:        True # If inference should be graphically visualized
+  save_and_log:       False # If configurations and potential logging files should be saved
 algorithm:
-  pile-order:         "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
-  pile-observability: "single" # Options: "single", "all"
-  pile_all_done:      "all" #
+  pile-order:         "fixed" # Clean dirt piles in a fixed order specified by the environment config (cf. coords_or_quantity)
+  pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
+  pile_all_done:      "all" # During inference the episode ends only when all dirt piles are cleaned
  auxiliary_piles:    False # Dirt quadrant does not use this option

--- a/marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_train_config.yaml
+++ b/marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_train_config.yaml
@ -1,17 +1,17 @@
 env:
  classname:          marl_factory_grid.environment.configs.rl
  env_name:           "rl/dirt_quadrant_agent1_train_config"
-  n_agents:           1
-  train_render:       False
-  save_and_log:       True
+  n_agents:           1 # Number of agents in the environment
+  train_render:       False # If training should be graphically visualized
+  save_and_log:       True # If configurations and potential logging files should be saved
 algorithm:
-  gamma:              0.99
-  n_steps:            0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
-  chunk-episode:      20000 # Chunk size. (0 = update networks with full episode at once)
-  max_steps:          140000
-  advantage:          "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
-  pile-order:         "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
-  pile-observability: "single" # Options: "single", "all"
-  pile_all_done:      "single" # Options: "single", "all" ("single" for training, "all" for eval)
+  gamma:              0.99 # The gamma value that is used as discounting factor
+  n_steps:            0 # How much experience should be sampled at most until the next value- and policy-net updates are performed. (0 = Monte Carlo)
+  chunk-episode:      20000 # For update, splits very large episodes in batches of approximately equal size. (0 = update networks with full episode at once)
+  max_steps:          140000 # Number of training steps used for agent1 (=agent2)
+  advantage:          "Advantage-AC" # Defines the used actor critic model
+  pile-order:         "fixed" # Clean dirt piles in a fixed order specified by the environment config (cf. coords_or_quantity)
+  pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
+  pile_all_done:      "single" # Episode ends when the current target pile is cleaned
  auxiliary_piles:    False # Dirt quadrant does not use this option

--- a/marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_eval_config.yaml
+++ b/marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_eval_config.yaml
@ -1,13 +1,13 @@
 env:
  classname:          marl_factory_grid.environment.configs.rl
  env_name:           "rl/two_rooms_eval_config"
-  n_agents:           1
-  eval_render:        True
-  save_and_log:       False
+  n_agents:           1 # Number of agents in the environment
+  eval_render:        True # If inference should be graphically visualized
+  save_and_log:       False # If configurations and potential logging files should be saved
 algorithm:
-  pile-order:         "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
-  pile-observability: "single" # Options: "single", "all"
-  pile_all_done:      "all" # Options: "single", "all" ("single" for training, "all" for eval)
-  auxiliary_piles:    False # Auxiliary piles are only used during marl eval
+  pile-order:         "fixed" # Clean dirt piles (=encoded flags) in a fixed order specified by the environment config (cf. coords_or_quantity)
+  pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
+  pile_all_done:      "all" # During inference the episode ends only when all dirt piles are cleaned
+  auxiliary_piles:    False # Auxiliary piles are only differentiated from regular target piles during marl eval


--- a/marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_train_config.yaml
+++ b/marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_train_config.yaml
@ -1,17 +1,17 @@
 env:
  classname:          marl_factory_grid.environment.configs.rl
-  n_agents:           1
-  train_render:       False
-  save_and_log:       True
+  n_agents:           1 # Number of agents in the environment
+  train_render:       False # If training should be graphically visualized
+  save_and_log:       True # If configurations and potential logging files should be saved
 algorithm:
-  gamma:              0.99
-  n_steps:            0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
-  chunk-episode:      20000 # Chunk size. (0 = update networks with full episode at once)
-  max_steps:          260000
-  advantage:          "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
-  pile-order:         "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
-  pile-observability: "single" # Options: "single", "all"
-  pile_all_done:      "single" # Options: "single", "all" ("single" for training, "all" for eval)
-  auxiliary_piles:    False # Auxiliary piles are only used during marl eval
+  gamma:              0.99 # The gamma value that is used as discounting factor
+  n_steps:            0 # How much experience should be sampled at most until the next value- and policy-net updates are performed. (0 = Monte Carlo)
+  chunk-episode:      20000 # For update, splits very large episodes in batches of approximately equal size. (0 = update networks with full episode at once)
+  max_steps:          260000 # Number of training steps used to train the agent. Here, only a placeholder value
+  advantage:          "Advantage-AC" # Defines the used actor critic model
+  pile-order:         "fixed" # Clean dirt piles (=encoded flags) in a fixed order specified by the environment config (cf. coords_or_quantity)
+  pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
+  pile_all_done:      "single" # Episode ends when the current target pile is cleaned
+  auxiliary_piles:    False # Auxiliary piles are only differentiated from regular target piles during marl eval


--- a/marl_factory_grid/environment/configs/rl/dirt_quadrant_agent1_train_config.yaml
+++ b/marl_factory_grid/environment/configs/rl/dirt_quadrant_agent1_train_config.yaml
@ -21,7 +21,7 @@ Agents:
    Observations:
      - DirtPiles
      - Self
-    Positions:
+    Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
      - (9,1)
      - (1,1)
      - (2,4)
--- a/marl_factory_grid/environment/configs/rl/two_rooms_agent1_eval_config.yaml
+++ b/marl_factory_grid/environment/configs/rl/two_rooms_agent1_eval_config.yaml
@ -21,9 +21,9 @@ Agents:
    Observations:
      - DirtPiles
      - Self
-    Positions:
+    Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
      - (3,1)
-      - (2,1)
+      - (2,1) # spawnpoint only required if agent1 should go to its auxiliary pile

 Entities:
  DirtPiles:
--- a/marl_factory_grid/environment/configs/rl/two_rooms_agent1_train_config.yaml
+++ b/marl_factory_grid/environment/configs/rl/two_rooms_agent1_train_config.yaml
@ -20,7 +20,7 @@ Agents:
    Observations:
      - DirtPiles
      - Self
-    Positions:
+    Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
      - (3,1)
      - (1,1)
      - (3,1)
--- a/marl_factory_grid/environment/configs/rl/two_rooms_agent2_eval_config.yaml
+++ b/marl_factory_grid/environment/configs/rl/two_rooms_agent2_eval_config.yaml
@ -21,9 +21,9 @@ Agents:
    Observations:
      - DirtPiles
      - Self
-    Positions:
+    Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
      - (3,13)
-      - (2,13)
+      - (2,13) # spawnpoint only required if agent2 should go to its auxiliary pile

 Entities:
  DirtPiles:
--- a/marl_factory_grid/environment/configs/rl/two_rooms_agent2_train_config.yaml
+++ b/marl_factory_grid/environment/configs/rl/two_rooms_agent2_train_config.yaml
@ -20,7 +20,7 @@ Agents:
    Observations:
      - DirtPiles
      - Self
-    Positions:
+    Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
      - (3,13)
      - (2,13)
      - (1,13)