Added code for tsp_runs + Updated eval configs so that every episode only takes a maximum number of steps

2026-01-15 23:41:39 +01:00 · 2024-05-10 17:38:34 +02:00
parent d0d31b964c
commit 5e9e59c843
7 changed files with 125 additions and 13 deletions
--- a/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config.yaml
+++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config.yaml
@@ -58,5 +58,5 @@ Rules:

  # Done Conditions
  #DoneOnAllDirtCleaned:
-  #DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop
-    #max_steps: 1000
+  DoneAtMaxStepsReached:
+    max_steps: 50
--- a/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config_emergent.yaml
+++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config_emergent.yaml
@@ -58,5 +58,5 @@ Rules:

  # Done Conditions
  #DoneOnAllDirtCleaned:
-  #DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop
-    #max_steps: 1000
+  DoneAtMaxStepsReached:
+    max_steps: 50
--- a/marl_factory_grid/configs/custom/two_rooms_one_door_modified_eval_config.yaml
+++ b/marl_factory_grid/configs/custom/two_rooms_one_door_modified_eval_config.yaml
@@ -58,5 +58,5 @@ Rules:

  # Done Conditions
  #DoneOnAllDirtCleaned:
-  #DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop
-    #max_steps: 1000
+  DoneAtMaxStepsReached:
+    max_steps: 50
--- a/marl_factory_grid/configs/dirt_quadrant.yaml
+++ b/marl_factory_grid/configs/dirt_quadrant.yaml
@@ -27,7 +27,7 @@ Agents:
      - DirtPiles
      - Self
    Positions:
-      - (9,1)
+      - (9,2)
  Reiner:
    Actions:
      - Move4
@@ -39,11 +39,11 @@ Agents:
      - DirtPiles
      - Self
    Positions:
-      - (9,8) # (9, 4)
+      - (9,5)

 Entities:
  DirtPiles:
-    coords_or_quantity:  (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
+    coords_or_quantity: (1, 1), (4,5), (9,9)
    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
    clean_amount: 1
    dirt_spawn_r_var: 0
--- a/marl_factory_grid/configs/two_rooms_one_door_modified.yaml
+++ b/marl_factory_grid/configs/two_rooms_one_door_modified.yaml
@@ -16,7 +16,7 @@ General:
 Agents:
  Wolfgang:
    Actions:
-      - Move8
+      - Move4
      - Noop
      - DestAction
      - DoorUse
@@ -29,7 +29,7 @@ Agents:
      - (3,1) # Agent spawnpoint
  Sigmund:
    Actions:
-      - Move8
+      - Move4
      - Noop
      - DestAction
      - DoorUse
@@ -67,6 +67,11 @@ Rules:
  # Init
  AssignGlobalPositions: { }

+  DoneAtDestinationReach:
+    reward_at_done: 1
+    # We want to give rewards only, when all targets have been reached.
+    condition: "all"
+
  # Done Conditions
  DoneAtMaxStepsReached:
-    max_steps: 100
+    max_steps: 50
--- a/marl_factory_grid/modules/destinations/actions.py
+++ b/marl_factory_grid/modules/destinations/actions.py
@@ -16,7 +16,7 @@ class DestAction(Action):

    def do(self, entity, state) -> Union[None, ActionResult]:
        if destination := state[d.DESTINATION].by_pos(entity.pos):
-            valid = destination.do_wait_action(entity)
+            valid = destination[0].do_wait_action(entity)
            state.print(f'{entity.name} just waited at {entity.pos}')
        else:
            valid = c.NOT_VALID