From c7c2c4e5a3445e747472bc3a6a715783b12e097d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Julian=20Sch=C3=B6nberger?= <schoenbergerj@cip.ifi.lmu.de>
Date: Thu, 2 May 2024 11:00:35 +0200
Subject: [PATCH] Add various RL adapted configs

---
 .../marl/configs/dirt_quadrant_config.yaml    | 28 +++++++
 .../marl/configs/environment_changes          |  3 +
 .../two_rooms_one_door_modified_config.yaml}  | 14 ++--
 .../configs/custom/dirt_quadrant.yaml         | 67 +++++++++++++++++
 .../custom/dirt_quadrant_random_pos.yaml      | 75 +++++++++++++++++++
 ...wo_rooms_one_door_modified_random_pos.yaml | 72 ++++++++++++++++++
 marl_factory_grid/configs/dirt_quadrant.yaml  | 67 +++++++++++++++++
 .../configs/two_rooms_one_door_modified.yaml  |  6 +-
 marl_factory_grid/levels/quadrant.txt         | 11 +++
 9 files changed, 334 insertions(+), 9 deletions(-)
 create mode 100644 marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml
 create mode 100644 marl_factory_grid/algorithms/marl/configs/environment_changes
 rename marl_factory_grid/algorithms/marl/{example_config.yaml => configs/two_rooms_one_door_modified_config.yaml} (50%)
 create mode 100644 marl_factory_grid/configs/custom/dirt_quadrant.yaml
 create mode 100644 marl_factory_grid/configs/custom/dirt_quadrant_random_pos.yaml
 create mode 100644 marl_factory_grid/configs/custom/two_rooms_one_door_modified_random_pos.yaml
 create mode 100644 marl_factory_grid/configs/dirt_quadrant.yaml
 create mode 100644 marl_factory_grid/levels/quadrant.txt

diff --git a/marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml b/marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml
new file mode 100644
index 0000000..6668f55
--- /dev/null
+++ b/marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml
@@ -0,0 +1,28 @@
+agent:
+  classname:           marl_factory_grid.algorithms.marl.networks.RecurrentAC
+  n_agents:            1
+  obs_emb_size:        96
+  action_emb_size:     16
+  hidden_size_actor:   64
+  hidden_size_critic:  64
+  use_agent_embedding: False
+env:
+  classname:          marl_factory_grid.configs.custom
+  env_name:           "custom/dirt_quadrant_random_pos"
+  n_agents:           1
+  max_steps:          250
+  pomdp_r:            2
+  stack_n_frames:     0
+  individual_rewards: True
+  train_render:       False
+  eval_render:        True
+method:               marl_factory_grid.algorithms.marl.LoopSEAC
+algorithm:
+  gamma:              0.99
+  entropy_coef:       0.01
+  vf_coef:            0.05
+  n_steps:            0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
+  max_steps:          80000
+  advantage:          "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
+  pile-order:         "fixed" # Options: "fixed", "random", "none", "agents"
+
diff --git a/marl_factory_grid/algorithms/marl/configs/environment_changes b/marl_factory_grid/algorithms/marl/configs/environment_changes
new file mode 100644
index 0000000..4859ec5
--- /dev/null
+++ b/marl_factory_grid/algorithms/marl/configs/environment_changes
@@ -0,0 +1,3 @@
+marl_factory_grid>environment>rules.py#SpawnEntity.on_reset()
+marl_factory_grid>environment>rewards.py
+marl_factory_grid>modules>clean_up>groups.py#DirtPiles.trigger_spawn()
diff --git a/marl_factory_grid/algorithms/marl/example_config.yaml b/marl_factory_grid/algorithms/marl/configs/two_rooms_one_door_modified_config.yaml
similarity index 50%
rename from marl_factory_grid/algorithms/marl/example_config.yaml
rename to marl_factory_grid/algorithms/marl/configs/two_rooms_one_door_modified_config.yaml
index 62782b3..ea115f6 100644
--- a/marl_factory_grid/algorithms/marl/example_config.yaml
+++ b/marl_factory_grid/algorithms/marl/configs/two_rooms_one_door_modified_config.yaml
@@ -7,20 +7,22 @@ agent:
   hidden_size_critic:  64
   use_agent_embedding: False
 env:
-  classname:          marl_factory_grid.configs
-  env_name:           "simple_crossing"
+  classname:          marl_factory_grid.configs.custom
+  env_name:           "custom/two_rooms_one_door_modified_random_pos"
   n_agents:           2
   max_steps:          250
   pomdp_r:            2
   stack_n_frames:     0
   individual_rewards: True
-  train_render:       True
+  train_render:       False
   eval_render:        True
 method:               marl_factory_grid.algorithms.marl.LoopSEAC
 algorithm:
   gamma:              0.99
   entropy_coef:       0.01
-  vf_coef:            0.5
-  n_steps:            5
-  max_steps:          10000
+  vf_coef:            0.05
+  n_steps:            0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
+  max_steps:          100000
+  advantage:          "TD-Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
+  pile-order:         "agents" # Options: "fixed", "random", "none", "agents"
 
diff --git a/marl_factory_grid/configs/custom/dirt_quadrant.yaml b/marl_factory_grid/configs/custom/dirt_quadrant.yaml
new file mode 100644
index 0000000..49b27ed
--- /dev/null
+++ b/marl_factory_grid/configs/custom/dirt_quadrant.yaml
@@ -0,0 +1,67 @@
+General:
+  # RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
+  env_seed: 69
+  # Individual vs global rewards
+  individual_rewards: true
+  # The level.txt file to load from marl_factory_grid/levels
+  level_name: quadrant
+  # Radius of Partially observable Markov decision process
+  pomdp_r: 0 # default 3
+  # Print all messages and events
+  verbose: false
+  # Run tests
+  tests: false
+
+# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
+# other agents aim to clean dirt piles.
+Agents:
+  # The clean agents
+  Wolfgang:
+    Actions:
+      - Move4
+      #- Clean
+      #- Noop
+    Observations:
+      # - Walls
+      # - Other
+      - DirtPiles
+      - Self
+    Positions:
+      - (9,1)
+  #Reiner:
+    #Actions:
+      #- Move4
+      #- Clean
+      #- Noop
+    #Observations:
+      # - Walls
+      # - Other
+      #- DirtPiles
+      #- Self
+    #Positions:
+      #- (9,8) # (9, 4)
+
+Entities:
+  DirtPiles:
+    coords_or_quantity: (1,1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
+    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
+    clean_amount: 1
+    dirt_spawn_r_var: 0
+    max_global_amount: 12
+    max_local_amount: 1
+
+# Rules section specifies the rules governing the dynamics of the environment.
+Rules:
+
+  # Utilities
+  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
+  # Can be omitted/ignored if you do not want to take care of collisions at all.
+  WatchCollisions:
+    done_at_collisions: false
+
+  # Done Conditions
+  # Define the conditions for the environment to stop. Either success or a fail conditions.
+  # The environment stops when all dirt is cleaned
+  DoneOnAllDirtCleaned:
+  #DoneAtMaxStepsReached:
+    #max_steps: 200
diff --git a/marl_factory_grid/configs/custom/dirt_quadrant_random_pos.yaml b/marl_factory_grid/configs/custom/dirt_quadrant_random_pos.yaml
new file mode 100644
index 0000000..b619a7b
--- /dev/null
+++ b/marl_factory_grid/configs/custom/dirt_quadrant_random_pos.yaml
@@ -0,0 +1,75 @@
+General:
+  # RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
+  env_seed: 69
+  # Individual vs global rewards
+  individual_rewards: true
+  # The level.txt file to load from marl_factory_grid/levels
+  level_name: quadrant
+  # Radius of Partially observable Markov decision process
+  pomdp_r: 0 # default 3
+  # Print all messages and events
+  verbose: false
+  # Run tests
+  tests: false
+
+# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
+# other agents aim to clean dirt piles.
+Agents:
+  # The clean agents
+  Wolfgang:
+    Actions:
+      - Move4
+      #- Clean
+      #- Noop
+    Observations:
+      # - Walls
+      # - Other
+      - DirtPiles
+      - Self
+    #Positions:
+      #- (9,1)
+      #- (9,2)
+      #- (9,3)
+      #- (9,4)
+      #- (9,5)
+      #- (9,6)
+      #- (9,7)
+      #- (9,8)
+      #- (9,9)
+  #Reiner:
+    #Actions:
+      #- Move4
+      #- Clean
+      #- Noop
+    #Observations:
+      # - Walls
+      # - Other
+      #- DirtPiles
+      #- Self
+    #Positions:
+      #- (9,8) # (9, 4)
+
+Entities:
+  DirtPiles:
+    coords_or_quantity: (1,1) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
+    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
+    clean_amount: 1
+    dirt_spawn_r_var: 0
+    max_global_amount: 12
+    max_local_amount: 1
+
+# Rules section specifies the rules governing the dynamics of the environment.
+Rules:
+
+  # Utilities
+  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
+  # Can be omitted/ignored if you do not want to take care of collisions at all.
+  WatchCollisions:
+    done_at_collisions: false
+
+  # Done Conditions
+  # Define the conditions for the environment to stop. Either success or a fail conditions.
+  # The environment stops when all dirt is cleaned
+  DoneOnAllDirtCleaned:
+  #DoneAtMaxStepsReached: # An episode should last for at most max_steps steps
+    #max_steps: 1000
diff --git a/marl_factory_grid/configs/custom/two_rooms_one_door_modified_random_pos.yaml b/marl_factory_grid/configs/custom/two_rooms_one_door_modified_random_pos.yaml
new file mode 100644
index 0000000..6202302
--- /dev/null
+++ b/marl_factory_grid/configs/custom/two_rooms_one_door_modified_random_pos.yaml
@@ -0,0 +1,72 @@
+General:
+  env_seed: 69
+  # Individual vs global rewards
+  individual_rewards: true
+  # The level.txt file to load from marl_factory_grid/levels
+  level_name: two_rooms_modified
+  # View Radius; 0 = full observatbility
+  pomdp_r: 0
+  # Print all messages and events
+  verbose: false
+  # Run tests
+  tests: false
+
+# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
+# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
+Agents:
+  Wolfgang:
+    Actions:
+      - Move8
+      - DoorUse
+      - Noop
+    Observations:
+      - DirtPiles
+      - Self
+    #Positions:
+      #- (1,1)
+      #- (2,1)
+      #- (3,1)
+      #- (4,1)
+      #- (5,1)
+      #- (6,1)
+  Sigmund:
+    Actions:
+      - Move8
+      - DoorUse
+      - Noop
+    Observations:
+      - DirtPiles
+      - Self
+    #Positions:
+      #- (1,13)
+      #- (2,13)
+      #- (3,13)
+      #- (4,13)
+      #- (5,13)
+      #- (6,13)
+
+Entities:
+  DirtPiles:
+    coords_or_quantity: (3,12), (3,2) # This order is required, because agent 0 needs to reach (3, 12) and agent 1 (3, 2)
+    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
+    clean_amount: 1
+    dirt_spawn_r_var: 0
+    max_global_amount: 12
+    max_local_amount: 1
+
+  Doors: { }
+
+Rules:
+  # Environment Dynamics
+  DoorAutoClose:
+    close_frequency: 10
+
+  # Utilities
+  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
+  WatchCollisions:
+    done_at_collisions: false
+
+  # Done Conditions
+  #DoneOnAllDirtCleaned:
+  #DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop
+    #max_steps: 1000
diff --git a/marl_factory_grid/configs/dirt_quadrant.yaml b/marl_factory_grid/configs/dirt_quadrant.yaml
new file mode 100644
index 0000000..bab83ca
--- /dev/null
+++ b/marl_factory_grid/configs/dirt_quadrant.yaml
@@ -0,0 +1,67 @@
+General:
+  # RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
+  env_seed: 69
+  # Individual vs global rewards
+  individual_rewards: true
+  # The level.txt file to load from marl_factory_grid/levels
+  level_name: quadrant
+  # Radius of Partially observable Markov decision process
+  pomdp_r: 0 # default 3
+  # Print all messages and events
+  verbose: false
+  # Run tests
+  tests: false
+
+# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
+# other agents aim to clean dirt piles.
+Agents:
+  # The clean agents
+  Wolfgang:
+    Actions:
+      - Move4
+      - Clean
+      - Noop
+    Observations:
+      - Walls
+      - Other
+      - DirtPiles
+      - Self
+    Positions:
+      - (9,1)
+  Reiner:
+    Actions:
+      - Move4
+      - Clean
+      - Noop
+    Observations:
+      - Walls
+      - Other
+      - DirtPiles
+      - Self
+    Positions:
+      - (9,8) # (9, 4)
+
+Entities:
+  DirtPiles:
+    coords_or_quantity:  (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
+    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
+    clean_amount: 1
+    dirt_spawn_r_var: 0
+    max_global_amount: 12
+    max_local_amount: 1
+
+# Rules section specifies the rules governing the dynamics of the environment.
+Rules:
+
+  # Utilities
+  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
+  # Can be omitted/ignored if you do not want to take care of collisions at all.
+  WatchCollisions:
+    done_at_collisions: false
+
+  # Done Conditions
+  # Define the conditions for the environment to stop. Either success or a fail conditions.
+  # The environment stops when all dirt is cleaned
+  DoneOnAllDirtCleaned:
+  DoneAtMaxStepsReached:
+    max_steps: 200
diff --git a/marl_factory_grid/configs/two_rooms_one_door_modified.yaml b/marl_factory_grid/configs/two_rooms_one_door_modified.yaml
index c6132d6..215e250 100644
--- a/marl_factory_grid/configs/two_rooms_one_door_modified.yaml
+++ b/marl_factory_grid/configs/two_rooms_one_door_modified.yaml
@@ -5,7 +5,7 @@ General:
   # The level.txt file to load from marl_factory_grid/levels
   level_name: two_rooms_modified
   # View Radius; 0 = full observatbility
-  pomdp_r: 3
+  pomdp_r: 0
   # Print all messages and events
   verbose: false
   # Run tests
@@ -26,7 +26,7 @@ Agents:
       - Doors
       - Destination
     Positions:
-      - (3,1)
+      - (3,1) # Agent spawnpoint
   Sigmund:
     Actions:
       - Move8
@@ -47,7 +47,7 @@ Entities:
       SpawnDestinationsPerAgent:
         coords_or_quantity:
           Wolfgang:
-            - (3,12)
+            - (3,12) # Target coordinates
           Sigmund:
             - (3,2)
 
diff --git a/marl_factory_grid/levels/quadrant.txt b/marl_factory_grid/levels/quadrant.txt
new file mode 100644
index 0000000..4f09ee5
--- /dev/null
+++ b/marl_factory_grid/levels/quadrant.txt
@@ -0,0 +1,11 @@
+###########
+#---#######
+#-----#####
+#------####
+#-------###
+#--------##
+#--------##
+#---------#
+#---------#
+#---------#
+###########
\ No newline at end of file