Merge remote-tracking branch 'origin/marl_refactor' into marl_refactor

2025-11-16 23:33:51 +01:00 · 2024-05-21 11:40:22 +02:00
parent 13ea9d25c9 4571dc1cd1
commit c7f2bbfbac
9 changed files with 64 additions and 35 deletions
--- a/marl_factory_grid/algorithms/marl/a2c_dirt.py
+++ b/marl_factory_grid/algorithms/marl/a2c_dirt.py
@@ -384,6 +384,14 @@ class A2C:
            obs[0][1][x][y] = 1
            print("Missing agent position")
    def get_all_cleaned_dirt_piles(self, dirt_piles_positions, cleaned_dirt_piles):
        meta_cleaned_dirt_piles = {pos: False for pos in dirt_piles_positions}
        for agent_idx in range(self.n_agents):
            for (pos, cleaned) in cleaned_dirt_piles[agent_idx].items():
                if cleaned:
                    meta_cleaned_dirt_piles[pos] = True
        return meta_cleaned_dirt_piles
    def handle_dirt(self, env, cleaned_dirt_piles, ordered_dirt_piles, target_pile, indices, reward, done):
        # Check if agent moved on field with dirt. If that is the case collect dirt automatically
        agent_positions = [env.state.moving_entites[agent_idx].pos for agent_idx in range(self.n_agents)]
@@ -428,12 +436,7 @@ class A2C:
                    done = True
            elif self.cfg[nms.ALGORITHM]["pile_all_done"] == "shared":
                # End episode if both agents together have cleaned all dirt piles
-                meta_cleaned_dirt_piles = {pos: False for pos in dirt_piles_positions}
+                if all(self.get_all_cleaned_dirt_piles(dirt_piles_positions, cleaned_dirt_piles).values()):
                for agent_idx in range(self.n_agents):
                    for (pos, cleaned) in cleaned_dirt_piles[agent_idx].items():
                        if cleaned:
                            meta_cleaned_dirt_piles[pos] = True
                if all(meta_cleaned_dirt_piles.values()):
                    done = True
        return reward, done
--- a/marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml
+++ b/marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml
@@ -24,7 +24,7 @@ algorithm:
  entropy_coef:       0.01
  vf_coef:            0.05
  n_steps:            0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
-  max_steps:          270000
+  max_steps:          240000
  advantage:          "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
  pile-order:         "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
  pile-observability: "single" # Options: "single", "all"
--- a/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_eval_config.yaml
+++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_eval_config.yaml
@@ -37,7 +37,7 @@ Agents:
 Entities:
  DirtPiles:
-    coords_or_quantity: (9,9), (4,5), (1,1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
+    coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
    clean_amount: 1
    dirt_spawn_r_var: 0
--- a/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config_emergent.yaml
+++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config_emergent.yaml
@@ -59,4 +59,4 @@ Rules:
  # Done Conditions
  #DoneOnAllDirtCleaned:
  DoneAtMaxStepsReached:
-    max_steps: 50
+    max_steps: 30
--- a/marl_factory_grid/configs/custom/dirt_quadrant_eval_config.yaml
+++ b/marl_factory_grid/configs/custom/dirt_quadrant_eval_config.yaml
@@ -25,8 +25,15 @@ Agents:
      #- Self
    #Positions:
      #- (9,1)
      #- (1,1)
      #- (2,4)
      #- (4,7)
      #- (7,9)
      #- (2,4)
      #- (4,7)
      #- (7,9)
      #- (9,9)
-      #- (4,5)
+      #- (9,1)
  Wolfgang:
    Actions:
      - Move4
@@ -35,12 +42,19 @@ Agents:
      - Self
    Positions:
      - (9,5)
-      - (9,9)
+      #- (1,1)
-      - (4,5)
+      #- (2,4)
      #- (4,7)
      #- (7,9)
      #- (2,4)
      #- (4,7)
      #- (7,9)
      #- (9,9)
      #- (9,5)
 Entities:
  DirtPiles:
-    coords_or_quantity: (9,9), (4,5), (1,1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
+    coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) #(9,9), (7,9), (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
    clean_amount: 1
    dirt_spawn_r_var: 0
--- a/marl_factory_grid/configs/custom/dirt_quadrant_train_config.yaml
+++ b/marl_factory_grid/configs/custom/dirt_quadrant_train_config.yaml
@@ -24,11 +24,17 @@ Agents:
      #- Self
    #Positions:
      #- (9,1)
      #- (4,5)
      #- (1,1)
-      #- (4,5)
+      #- (2,4)
-      #- (9,1)
+      #- (4,7)
      #- (6,8)
      #- (7,9)
      #- (2,4)
      #- (4,7)
      #- (6,8)
      #- (7,9)
      #- (9,9)
      #- (9,1)
  Wolfgang:
    Actions:
      - Move4
@@ -37,16 +43,22 @@ Agents:
      - Self
    Positions:
      - (9,5)
      - (4,5)
      - (1,1)
-      - (4,5)
+      - (2,4)
-      - (9,5)
+      - (4,7)
      - (6,8)
      - (7,9)
      - (2,4)
      - (4,7)
      - (6,8)
      - (7,9)
      - (9,9)
      - (9,5)
 Entities:
  DirtPiles:
-    coords_or_quantity: (9,9), (1,1), (4,5)  # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
+    coords_or_quantity: (1, 1), (2,4), (4,7), (6,8), (7,9), (9,9)  # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
    clean_amount: 1
    dirt_spawn_r_var: 0
--- a/marl_factory_grid/configs/dirt_quadrant.yaml
+++ b/marl_factory_grid/configs/dirt_quadrant.yaml
@@ -27,7 +27,7 @@ Agents:
      - DirtPiles
      - Self
    Positions:
-      - (9,2)
+      - (9,1)
  Reiner:
    Actions:
      - Move4
@@ -43,7 +43,7 @@ Agents:
 Entities:
  DirtPiles:
-    coords_or_quantity: (1, 1), (4,5), (9,9)
+    coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9)
    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
    clean_amount: 1
    dirt_spawn_r_var: 0
--- a/marl_factory_grid/levels/quadrant.txt
+++ b/marl_factory_grid/levels/quadrant.txt
@@ -1,10 +1,10 @@
 ###########
-#---#######
+#---------#
-#-----#####
+#---------#
-#------####
+#---------#
-#-------###
+#---------#
-#--------##
+#---------#
-#--------##
+#---------#
 #---------#
 #---------#
 #---------#
--- a/studies/marl_adapted.py
+++ b/studies/marl_adapted.py
@@ -30,7 +30,7 @@ def single_agent_eval(config_name, run):
    agent = A2C(train_cfg, eval_cfg)
    print("Evaluation phase")
    agent.load_agents(run)
-    agent.eval_loop(10)
+    agent.eval_loop(1)
 def multi_agent_eval(config_name, runs, emergent_phenomenon=False):
@@ -55,7 +55,7 @@ def multi_agent_eval(config_name, runs, emergent_phenomenon=False):
    agent = A2C(train_cfg, eval_cfg)
    print("Evaluation phase")
    agent.load_agents(runs)
-    agent.eval_loop(10)
+    agent.eval_loop(1)
 def dirt_quadrant_single_agent_training():
@@ -70,7 +70,7 @@ def dirt_quadrant_single_agent_eval(agent_name):
    if agent_name == "Sigmund":
        run = "run0"
    elif agent_name == "Wolfgang":
-        run = "run4"
+        run = "run1"
    single_agent_eval("dirt_quadrant", [run])
@@ -82,15 +82,15 @@ def two_rooms_one_door_modified_single_agent_eval(agent_name):
    single_agent_eval("two_rooms_one_door_modified", [run])
-def dirt_quadrant_multi_agent_eval(emergent_phenomenon):
+def dirt_quadrant_5_multi_agent_eval(emergent_phenomenon):
-    multi_agent_eval("dirt_quadrant", ["run0", "run1"], emergent_phenomenon)
+    multi_agent_eval("dirt_quadrant", ["run4", "run5"], emergent_phenomenon)
 def dirt_quadrant_5_multi_agent_ctde_eval(emergent_phenomenon): # run7 == run4
    multi_agent_eval("dirt_quadrant", ["run4", "run7"], emergent_phenomenon)
 def two_rooms_one_door_modified_multi_agent_eval(emergent_phenomenon):
    multi_agent_eval("two_rooms_one_door_modified", ["run2", "run3"], emergent_phenomenon)
 if __name__ == '__main__':
    dirt_quadrant_single_agent_training()