Added code for tsp_runs + Updated eval configs so that every episode only takes a maximum number of steps

2025-07-08 02:21:36 +02:00 · 2024-05-10 17:38:34 +02:00
parent d0d31b964c
commit 5e9e59c843
7 changed files with 125 additions and 13 deletions
--- a/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config.yaml
+++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config.yaml
@ -58,5 +58,5 @@ Rules:

  # Done Conditions
  #DoneOnAllDirtCleaned:
-  #DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop
-    #max_steps: 1000
+  DoneAtMaxStepsReached:
+    max_steps: 50
--- a/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config_emergent.yaml
+++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config_emergent.yaml
@ -58,5 +58,5 @@ Rules:

  # Done Conditions
  #DoneOnAllDirtCleaned:
-  #DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop
-    #max_steps: 1000
+  DoneAtMaxStepsReached:
+    max_steps: 50
--- a/marl_factory_grid/configs/custom/two_rooms_one_door_modified_eval_config.yaml
+++ b/marl_factory_grid/configs/custom/two_rooms_one_door_modified_eval_config.yaml
@ -58,5 +58,5 @@ Rules:

  # Done Conditions
  #DoneOnAllDirtCleaned:
-  #DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop
-    #max_steps: 1000
+  DoneAtMaxStepsReached:
+    max_steps: 50
--- a/marl_factory_grid/configs/dirt_quadrant.yaml
+++ b/marl_factory_grid/configs/dirt_quadrant.yaml
@ -27,7 +27,7 @@ Agents:
      - DirtPiles
      - Self
    Positions:
-      - (9,1)
+      - (9,2)
  Reiner:
    Actions:
      - Move4
@ -39,11 +39,11 @@ Agents:
      - DirtPiles
      - Self
    Positions:
-      - (9,8) # (9, 4)
+      - (9,5)

 Entities:
  DirtPiles:
-    coords_or_quantity:  (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
+    coords_or_quantity: (1, 1), (4,5), (9,9)
    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
    clean_amount: 1
    dirt_spawn_r_var: 0
--- a/marl_factory_grid/configs/two_rooms_one_door_modified.yaml
+++ b/marl_factory_grid/configs/two_rooms_one_door_modified.yaml
@ -16,7 +16,7 @@ General:
 Agents:
  Wolfgang:
    Actions:
-      - Move8
+      - Move4
      - Noop
      - DestAction
      - DoorUse
@ -29,7 +29,7 @@ Agents:
      - (3,1) # Agent spawnpoint
  Sigmund:
    Actions:
-      - Move8
+      - Move4
      - Noop
      - DestAction
      - DoorUse
@ -67,6 +67,11 @@ Rules:
  # Init
  AssignGlobalPositions: { }

+  DoneAtDestinationReach:
+    reward_at_done: 1
+    # We want to give rewards only, when all targets have been reached.
+    condition: "all"
+
  # Done Conditions
  DoneAtMaxStepsReached:
-    max_steps: 100
+    max_steps: 50
--- a/marl_factory_grid/modules/destinations/actions.py
+++ b/marl_factory_grid/modules/destinations/actions.py
@ -16,7 +16,7 @@ class DestAction(Action):

    def do(self, entity, state) -> Union[None, ActionResult]:
        if destination := state[d.DESTINATION].by_pos(entity.pos):
-            valid = destination.do_wait_action(entity)
+            valid = destination[0].do_wait_action(entity)
            state.print(f'{entity.name} just waited at {entity.pos}')
        else:
            valid = c.NOT_VALID
--- a/studies/tsp_runs.py
+++ b/studies/tsp_runs.py
@ -0,0 +1,107 @@
+import os
+import time
+from pathlib import Path
+
+import imageio
+from tqdm import trange
+
+from marl_factory_grid.algorithms.static.TSP_dirt_agent import TSPDirtAgent
+from marl_factory_grid.algorithms.static.TSP_item_agent import TSPItemAgent
+from marl_factory_grid.algorithms.static.TSP_target_agent import TSPTargetAgent
+from marl_factory_grid.environment.factory import Factory
+
+def get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory):
+    agents = [TSPDirtAgent(factory, 0), TSPDirtAgent(factory, 1)]
+    if not emergent_phenomenon:
+        edge_costs = {}
+        # Add costs for horizontal edges
+        for i in range(1, 10):
+            for j in range(1, 9):
+                # Add costs for both traversal directions
+                edge_costs[f"{(i, j)}-{i, j + 1}"] = 0.55 + (i - 1) * 0.05
+                edge_costs[f"{i, j + 1}-{(i, j)}"] = 0.55 + (i - 1) * 0.05
+
+        # Add costs for vertical edges
+        for i in range(1, 9):
+            for j in range(1, 10):
+                # Add costs for both traversal directions
+                edge_costs[f"{(i, j)}-{i + 1, j}"] = 0.55 + (i - 1) * 0.05
+                edge_costs[f"{i + 1, j}-{(i, j)}"] = 0.55 + (i - 1) * 0.05
+
+
+        for agent in agents:
+            for u, v, weight in agent._position_graph.edges(data='weight'):
+                agent._position_graph[u][v]['weight'] = edge_costs[f"{u}-{v}"]
+
+            """for u, v, weight in agent._position_graph.edges(data='weight'):
+                            print(f"Edge ({u}-{v}) has weight: {weight}")"""
+
+    return agents
+
+
+def get_two_rooms_one_door_modified_tsp_agents(emergent_phenomenon, factory):
+    agents = [TSPTargetAgent(factory, 0), TSPTargetAgent(factory, 1)]
+    if not emergent_phenomenon:
+        print(emergent_phenomenon)
+        for agent in agents:
+            agent._position_graph[(3, 1)][(3, 2)]['weight'] = 4
+    return agents
+
+def run_tsp_setting(config_name, emergent_phenomenon):
+    # Render at each step?
+    render = True
+
+    # Path to config File
+    path = Path(f'../marl_factory_grid/configs/{config_name}.yaml')
+
+    # Create results folder
+    runs = os.listdir("../study_out/")
+    run_numbers = [int(run[7:]) for run in runs if run[:7] == "tsp_run"]
+    next_run_number = max(run_numbers) + 1 if run_numbers else 0
+    results_path = f"../study_out/tsp_run{next_run_number}"
+    os.mkdir(results_path)
+
+    # Env Init
+    factory = Factory(path)
+
+    with open(f"{results_path}/env_config.txt", "w") as txt_file:
+        txt_file.write(str(factory.conf))
+
+    recorder = imageio.get_writer(f'{results_path}/pygame_recording.mp4', fps=5)
+
+    for episode in trange(1):
+        _ = factory.reset()
+        done = False
+        if render:
+            factory.set_recorder(recorder)
+            factory.render()
+            factory._renderer.fps = 5
+        if config_name == "dirt_quadrant":
+            agents = get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory)
+        elif config_name == "two_rooms_one_door_modified":
+            agents = get_two_rooms_one_door_modified_tsp_agents(emergent_phenomenon, factory)
+        else:
+            print("Config name does not exist. Abort...")
+            break
+        while not done:
+            a = [x.predict() for x in agents]
+            obs_type, _, _, done, info = factory.step(a)
+            if render:
+                factory.render()
+            if done:
+                print(f'Episode {episode} done...')
+                break
+
+    recorder.close()
+
+
+def dirt_quadrant_multi_agent_tsp(emergent_phenomenon):
+    run_tsp_setting("dirt_quadrant", emergent_phenomenon)
+
+
+def two_rooms_one_door_modified_multi_agent_tsp(emergent_phenomenon):
+    run_tsp_setting("two_rooms_one_door_modified", emergent_phenomenon)
+
+
+if __name__ == '__main__':
+    dirt_quadrant_multi_agent_tsp(False)