Reworked configurations

2025-07-08 02:21:36 +02:00 · 2024-08-09 16:31:08 +02:00
parent 8e8e925278
commit 4c81e4b865
12 changed files with 33 additions and 23 deletions
--- a/marl_factory_grid/algorithms/rl/multi_agent_configs/dirt_quadrant_eval_config.yaml
+++ b/marl_factory_grid/algorithms/rl/multi_agent_configs/dirt_quadrant_eval_config.yaml
@ -4,7 +4,9 @@ env:
  n_agents:           2 # Number of agents in the environment
  eval_render:        True # If inference should be graphically visualized
  save_and_log:       False # If configurations and potential logging files should be saved
+  wandb_log:          False # If metrics for evaluation steps should be logged with weights&biases
 algorithm:
+  seed:               42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
  pile-order:         "smart" # Triggers implementation of our emergence prevention mechanism. Agents consider distance to other agent
  pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
  pile_all_done:      "shared" # Indicates that agents don't have to collect the same dirt piles
--- a/marl_factory_grid/algorithms/rl/multi_agent_configs/dirt_quadrant_eval_config_emergent.yaml
+++ b/marl_factory_grid/algorithms/rl/multi_agent_configs/dirt_quadrant_eval_config_emergent.yaml
@ -5,7 +5,9 @@ env:
  n_agents:           2 # Number of agents in the environment
  eval_render:        True # If inference should be graphically visualized
  save_and_log:       False # If configurations and potential logging files should be saved
+  wandb_log:          False # If metrics for evaluation steps should be logged with weights&biases
 algorithm:
+  seed:               42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
  pile-order:         "dynamic" # Agents only decide on next target pile based on the distance to the respective piles
  pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
  pile_all_done:      "shared" # Indicates that agents don't have to collect the same dirt piles
--- a/marl_factory_grid/algorithms/rl/multi_agent_configs/two_rooms_eval_config.yaml
+++ b/marl_factory_grid/algorithms/rl/multi_agent_configs/two_rooms_eval_config.yaml
@ -3,8 +3,10 @@ env:
  env_name:           "marl_eval/two_rooms_eval_config"
  n_agents:           2 # Number of agents in the environment
  eval_render:        True # If inference should be graphically visualized
-  save_and_log:       False # If configurations and potential logging files should be saved
+  save_and_log:       True # If configurations and potential logging files should be saved
+  wandb_log:          False # If metrics for evaluation steps should be logged with weights&biases
 algorithm:
+  seed:               42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
  # Piles (=encoded flags) are evenly distributed among the two agents and have to be collected in the order defined
  # by the environment config (cf. coords_or_quantity)
  pile-order:         "agents"
--- a/marl_factory_grid/algorithms/rl/multi_agent_configs/two_rooms_eval_config_emergent.yaml
+++ b/marl_factory_grid/algorithms/rl/multi_agent_configs/two_rooms_eval_config_emergent.yaml
@ -5,7 +5,9 @@ env:
  n_agents:           2 # Number of agents in the environment
  eval_render:        True # If inference should be graphically visualized
  save_and_log:       False # If configurations and potential logging files should be saved
+  wandb_log:          False # If metrics for evaluation steps should be logged with weights&biases
 algorithm:
+  seed:               42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
  # Piles (=encoded flags) are evenly distributed among the two agents and have to be collected in the order defined
  # by the environment config (cf. coords_or_quantity)
  pile-order:         "agents"
--- a/marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_eval_config.yaml
+++ b/marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_eval_config.yaml
@ -4,7 +4,9 @@ env:
  n_agents:           1 # Number of agents in the environment
  eval_render:        True # If inference should be graphically visualized
  save_and_log:       False # If configurations and potential logging files should be saved
+  wandb_log:          False # If metrics for evaluation steps should be logged with weights&biases
 algorithm:
+  seed:               42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
  pile-order:         "fixed" # Clean dirt piles in a fixed order specified by the environment config (cf. coords_or_quantity)
  pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
  pile_all_done:      "all" # During inference the episode ends only when all dirt piles are cleaned
--- a/marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_train_config.yaml
+++ b/marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_train_config.yaml
@ -4,11 +4,16 @@ env:
  n_agents:           1 # Number of agents in the environment
  train_render:       False # If training should be graphically visualized
  save_and_log:       True # If configurations and potential logging files should be saved
+  wandb_log:          True # If metrics for training steps should be logged with weights&biases
 algorithm:
+  seed:               9 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
  gamma:              0.99 # The gamma value that is used as discounting factor
  n_steps:            0 # How much experience should be sampled at most until the next value- and policy-net updates are performed. (0 = Monte Carlo)
  chunk-episode:      20000 # For update, splits very large episodes in batches of approximately equal size. (0 = update networks with full episode at once)
-  max_steps:          140000 # Number of training steps used for agent1 (=agent2)
+  max_steps:          400000 # Number of training steps used for agent1 (=agent2)
+  early_stopping:     True # If the early stopping functionality should be used
+  last_n_episodes:    100 # To determine if low change phase has begun, the last n episodes are checked if the mean target change is reached
+  mean_target_change: 2.0 # What should be the accepted fluctuation for determining if a low change phase has begun
  advantage:          "Advantage-AC" # Defines the used actor critic model
  pile-order:         "fixed" # Clean dirt piles in a fixed order specified by the environment config (cf. coords_or_quantity)
  pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
--- a/marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_eval_config.yaml
+++ b/marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_eval_config.yaml
@ -1,10 +1,12 @@
 env:
  classname:          marl_factory_grid.environment.configs.rl
-  env_name:           "rl/two_rooms_eval_config"
+  env_name:           "rl/two_rooms_agent2_eval_config"
  n_agents:           1 # Number of agents in the environment
  eval_render:        True # If inference should be graphically visualized
  save_and_log:       False # If configurations and potential logging files should be saved
+  wandb_log:          False # If metrics for evaluation steps should be logged with weights&biases
 algorithm:
+  seed:               42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
  pile-order:         "fixed" # Clean dirt piles (=encoded flags) in a fixed order specified by the environment config (cf. coords_or_quantity)
  pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
  pile_all_done:      "all" # During inference the episode ends only when all dirt piles are cleaned
--- a/marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_train_config.yaml
+++ b/marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_train_config.yaml
@ -3,11 +3,16 @@ env:
  n_agents:           1 # Number of agents in the environment
  train_render:       False # If training should be graphically visualized
  save_and_log:       True # If configurations and potential logging files should be saved
+  wandb_log:          True # If metrics for training steps should be logged with weights&biases
 algorithm:
+  seed:               9 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
  gamma:              0.99 # The gamma value that is used as discounting factor
  n_steps:            0 # How much experience should be sampled at most until the next value- and policy-net updates are performed. (0 = Monte Carlo)
  chunk-episode:      20000 # For update, splits very large episodes in batches of approximately equal size. (0 = update networks with full episode at once)
-  max_steps:          260000 # Number of training steps used to train the agent. Here, only a placeholder value
+  max_steps:          300000 # Number of training steps used to train the agent. Here, only a placeholder value
+  early_stopping:     True # If the early stopping functionality should be used
+  last_n_episodes:    100 # To determine if low change phase has begun, the last n episodes are checked if the mean target change is reached
+  mean_target_change: 2.0 # What should be the accepted fluctuation for determining if a low change phase has begun
  advantage:          "Advantage-AC" # Defines the used actor critic model
  pile-order:         "fixed" # Clean dirt piles (=encoded flags) in a fixed order specified by the environment config (cf. coords_or_quantity)
  pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
--- a/marl_factory_grid/environment/configs/marl_eval/dirt_quadrant_eval_config.yaml
+++ b/marl_factory_grid/environment/configs/marl_eval/dirt_quadrant_eval_config.yaml
@ -36,12 +36,13 @@ Agents:

 Entities:
  DirtPiles:
-    coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1)
+    coords_or_quantity: (9, 9), (4, 7), (2, 4), (1, 3)
    initial_amount: 0.5
    clean_amount: 1
    dirt_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1
+    randomize: False

 # Rules section specifies the rules governing the dynamics of the environment.
 Rules:
--- a/marl_factory_grid/environment/configs/rl/two_rooms_agent1_train_config.yaml
+++ b/marl_factory_grid/environment/configs/rl/two_rooms_agent1_train_config.yaml
@ -21,18 +21,13 @@ Agents:
      - DirtPiles
      - Self
    Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
-      - (3,1)
-      - (1,1)
-      - (3,1)
      - (5,1)
-      - (3,1)
-      - (1,8)
-      - (3,1)
-      - (5,8)
+      - (2,1)
+      - (1,1)

 Entities:
  DirtPiles:
-    coords_or_quantity: (2,1), (3,12) # Locations of dirt piles
+    coords_or_quantity: (3,12) # Locations of dirt piles
    initial_amount: 0.5
    clean_amount: 1
    dirt_spawn_r_var: 0
--- a/marl_factory_grid/environment/configs/rl/two_rooms_agent2_eval_config.yaml
+++ b/marl_factory_grid/environment/configs/rl/two_rooms_agent2_eval_config.yaml
@ -23,11 +23,10 @@ Agents:
      - Self
    Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
      - (3,13)
-      - (2,13) # spawnpoint only required if agent2 should go to its auxiliary pile

 Entities:
  DirtPiles:
-    coords_or_quantity: (2,13), (3,2)  # Locations of dirt piles
+    coords_or_quantity: (3,2)  # Locations of dirt piles
    initial_amount: 0.5
    clean_amount: 1
    dirt_spawn_r_var: 0
--- a/marl_factory_grid/environment/configs/rl/two_rooms_agent2_train_config.yaml
+++ b/marl_factory_grid/environment/configs/rl/two_rooms_agent2_train_config.yaml
@ -22,17 +22,10 @@ Agents:
      - Self
    Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
      - (3,13)
-      - (2,13)
-      - (1,13)
-      - (3,13)
-      - (1,8)
-      - (2,6)
-      - (3,10)
-      - (4,6)

 Entities:
  DirtPiles:
-    coords_or_quantity: (2,13), (3,2)  # Locations of dirt piles
+    coords_or_quantity: (3,2)  # Locations of dirt piles
    initial_amount: 0.5
    clean_amount: 1
    dirt_spawn_r_var: 0