Add various RL adapted configs

2025-07-05 17:11:35 +02:00 · 2024-05-02 11:00:35 +02:00
parent 48d708bbcd
commit c7c2c4e5a3
9 changed files with 334 additions and 9 deletions
--- a/marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml
+++ b/marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml
@ -0,0 +1,28 @@
 agent:
  classname:           marl_factory_grid.algorithms.marl.networks.RecurrentAC
  n_agents:            1
  obs_emb_size:        96
  action_emb_size:     16
  hidden_size_actor:   64
  hidden_size_critic:  64
  use_agent_embedding: False
 env:
  classname:          marl_factory_grid.configs.custom
  env_name:           "custom/dirt_quadrant_random_pos"
  n_agents:           1
  max_steps:          250
  pomdp_r:            2
  stack_n_frames:     0
  individual_rewards: True
  train_render:       False
  eval_render:        True
 method:               marl_factory_grid.algorithms.marl.LoopSEAC
 algorithm:
  gamma:              0.99
  entropy_coef:       0.01
  vf_coef:            0.05
  n_steps:            0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
  max_steps:          80000
  advantage:          "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
  pile-order:         "fixed" # Options: "fixed", "random", "none", "agents"
--- a/marl_factory_grid/algorithms/marl/configs/environment_changes
+++ b/marl_factory_grid/algorithms/marl/configs/environment_changes
@ -0,0 +1,3 @@
 marl_factory_grid>environment>rules.py#SpawnEntity.on_reset()
 marl_factory_grid>environment>rewards.py
 marl_factory_grid>modules>clean_up>groups.py#DirtPiles.trigger_spawn()
--- a/marl_factory_grid/algorithms/marl/configs/two_rooms_one_door_modified_config.yaml
+++ b/marl_factory_grid/algorithms/marl/configs/two_rooms_one_door_modified_config.yaml
@ -7,20 +7,22 @@ agent:
  hidden_size_critic:  64
  use_agent_embedding: False
 env:
-  classname:          marl_factory_grid.configs
+  classname:          marl_factory_grid.configs.custom
-  env_name:           "simple_crossing"
+  env_name:           "custom/two_rooms_one_door_modified_random_pos"
  n_agents:           2
  max_steps:          250
  pomdp_r:            2
  stack_n_frames:     0
  individual_rewards: True
-  train_render:       True
+  train_render:       False
  eval_render:        True
 method:               marl_factory_grid.algorithms.marl.LoopSEAC
 algorithm:
  gamma:              0.99
  entropy_coef:       0.01
-  vf_coef:            0.5
+  vf_coef:            0.05
-  n_steps:            5
+  n_steps:            0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
-  max_steps:          10000
+  max_steps:          100000
  advantage:          "TD-Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
  pile-order:         "agents" # Options: "fixed", "random", "none", "agents"
--- a/marl_factory_grid/configs/custom/dirt_quadrant.yaml
+++ b/marl_factory_grid/configs/custom/dirt_quadrant.yaml
@ -0,0 +1,67 @@
 General:
  # RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
  env_seed: 69
  # Individual vs global rewards
  individual_rewards: true
  # The level.txt file to load from marl_factory_grid/levels
  level_name: quadrant
  # Radius of Partially observable Markov decision process
  pomdp_r: 0 # default 3
  # Print all messages and events
  verbose: false
  # Run tests
  tests: false
 # In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
 # other agents aim to clean dirt piles.
 Agents:
  # The clean agents
  Wolfgang:
    Actions:
      - Move4
      #- Clean
      #- Noop
    Observations:
      # - Walls
      # - Other
      - DirtPiles
      - Self
    Positions:
      - (9,1)
  #Reiner:
    #Actions:
      #- Move4
      #- Clean
      #- Noop
    #Observations:
      # - Walls
      # - Other
      #- DirtPiles
      #- Self
    #Positions:
      #- (9,8) # (9, 4)
 Entities:
  DirtPiles:
    coords_or_quantity: (1,1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
    clean_amount: 1
    dirt_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1
 # Rules section specifies the rules governing the dynamics of the environment.
 Rules:
  # Utilities
  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
  # Can be omitted/ignored if you do not want to take care of collisions at all.
  WatchCollisions:
    done_at_collisions: false
  # Done Conditions
  # Define the conditions for the environment to stop. Either success or a fail conditions.
  # The environment stops when all dirt is cleaned
  DoneOnAllDirtCleaned:
  #DoneAtMaxStepsReached:
    #max_steps: 200
--- a/marl_factory_grid/configs/custom/dirt_quadrant_random_pos.yaml
+++ b/marl_factory_grid/configs/custom/dirt_quadrant_random_pos.yaml
@ -0,0 +1,75 @@
 General:
  # RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
  env_seed: 69
  # Individual vs global rewards
  individual_rewards: true
  # The level.txt file to load from marl_factory_grid/levels
  level_name: quadrant
  # Radius of Partially observable Markov decision process
  pomdp_r: 0 # default 3
  # Print all messages and events
  verbose: false
  # Run tests
  tests: false
 # In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
 # other agents aim to clean dirt piles.
 Agents:
  # The clean agents
  Wolfgang:
    Actions:
      - Move4
      #- Clean
      #- Noop
    Observations:
      # - Walls
      # - Other
      - DirtPiles
      - Self
    #Positions:
      #- (9,1)
      #- (9,2)
      #- (9,3)
      #- (9,4)
      #- (9,5)
      #- (9,6)
      #- (9,7)
      #- (9,8)
      #- (9,9)
  #Reiner:
    #Actions:
      #- Move4
      #- Clean
      #- Noop
    #Observations:
      # - Walls
      # - Other
      #- DirtPiles
      #- Self
    #Positions:
      #- (9,8) # (9, 4)
 Entities:
  DirtPiles:
    coords_or_quantity: (1,1) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
    clean_amount: 1
    dirt_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1
 # Rules section specifies the rules governing the dynamics of the environment.
 Rules:
  # Utilities
  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
  # Can be omitted/ignored if you do not want to take care of collisions at all.
  WatchCollisions:
    done_at_collisions: false
  # Done Conditions
  # Define the conditions for the environment to stop. Either success or a fail conditions.
  # The environment stops when all dirt is cleaned
  DoneOnAllDirtCleaned:
  #DoneAtMaxStepsReached: # An episode should last for at most max_steps steps
    #max_steps: 1000
--- a/marl_factory_grid/configs/custom/two_rooms_one_door_modified_random_pos.yaml
+++ b/marl_factory_grid/configs/custom/two_rooms_one_door_modified_random_pos.yaml
@ -0,0 +1,72 @@
 General:
  env_seed: 69
  # Individual vs global rewards
  individual_rewards: true
  # The level.txt file to load from marl_factory_grid/levels
  level_name: two_rooms_modified
  # View Radius; 0 = full observatbility
  pomdp_r: 0
  # Print all messages and events
  verbose: false
  # Run tests
  tests: false
 # In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
 # is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
 Agents:
  Wolfgang:
    Actions:
      - Move8
      - DoorUse
      - Noop
    Observations:
      - DirtPiles
      - Self
    #Positions:
      #- (1,1)
      #- (2,1)
      #- (3,1)
      #- (4,1)
      #- (5,1)
      #- (6,1)
  Sigmund:
    Actions:
      - Move8
      - DoorUse
      - Noop
    Observations:
      - DirtPiles
      - Self
    #Positions:
      #- (1,13)
      #- (2,13)
      #- (3,13)
      #- (4,13)
      #- (5,13)
      #- (6,13)
 Entities:
  DirtPiles:
    coords_or_quantity: (3,12), (3,2) # This order is required, because agent 0 needs to reach (3, 12) and agent 1 (3, 2)
    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
    clean_amount: 1
    dirt_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1
  Doors: { }
 Rules:
  # Environment Dynamics
  DoorAutoClose:
    close_frequency: 10
  # Utilities
  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
  WatchCollisions:
    done_at_collisions: false
  # Done Conditions
  #DoneOnAllDirtCleaned:
  #DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop
    #max_steps: 1000
--- a/marl_factory_grid/configs/dirt_quadrant.yaml
+++ b/marl_factory_grid/configs/dirt_quadrant.yaml
@ -0,0 +1,67 @@
 General:
  # RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
  env_seed: 69
  # Individual vs global rewards
  individual_rewards: true
  # The level.txt file to load from marl_factory_grid/levels
  level_name: quadrant
  # Radius of Partially observable Markov decision process
  pomdp_r: 0 # default 3
  # Print all messages and events
  verbose: false
  # Run tests
  tests: false
 # In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
 # other agents aim to clean dirt piles.
 Agents:
  # The clean agents
  Wolfgang:
    Actions:
      - Move4
      - Clean
      - Noop
    Observations:
      - Walls
      - Other
      - DirtPiles
      - Self
    Positions:
      - (9,1)
  Reiner:
    Actions:
      - Move4
      - Clean
      - Noop
    Observations:
      - Walls
      - Other
      - DirtPiles
      - Self
    Positions:
      - (9,8) # (9, 4)
 Entities:
  DirtPiles:
    coords_or_quantity:  (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
    clean_amount: 1
    dirt_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1
 # Rules section specifies the rules governing the dynamics of the environment.
 Rules:
  # Utilities
  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
  # Can be omitted/ignored if you do not want to take care of collisions at all.
  WatchCollisions:
    done_at_collisions: false
  # Done Conditions
  # Define the conditions for the environment to stop. Either success or a fail conditions.
  # The environment stops when all dirt is cleaned
  DoneOnAllDirtCleaned:
  DoneAtMaxStepsReached:
    max_steps: 200
--- a/marl_factory_grid/configs/two_rooms_one_door_modified.yaml
+++ b/marl_factory_grid/configs/two_rooms_one_door_modified.yaml
@ -5,7 +5,7 @@ General:
  # The level.txt file to load from marl_factory_grid/levels
  level_name: two_rooms_modified
  # View Radius; 0 = full observatbility
-  pomdp_r: 3
+  pomdp_r: 0
  # Print all messages and events
  verbose: false
  # Run tests
@ -26,7 +26,7 @@ Agents:
      - Doors
      - Destination
    Positions:
-      - (3,1)
+      - (3,1) # Agent spawnpoint
  Sigmund:
    Actions:
      - Move8
@ -47,7 +47,7 @@ Entities:
      SpawnDestinationsPerAgent:
        coords_or_quantity:
          Wolfgang:
-            - (3,12)
+            - (3,12) # Target coordinates
          Sigmund:
            - (3,2)
--- a/marl_factory_grid/levels/quadrant.txt
+++ b/marl_factory_grid/levels/quadrant.txt
@ -0,0 +1,11 @@
 ###########
 #---#######
 #-----#####
 #------####
 #-------###
 #--------##
 #--------##
 #---------#
 #---------#
 #---------#
 ###########