updated remaining configs from dirt to coins

2025-12-06 15:40:37 +01:00 · 2024-09-25 18:22:21 +02:00
parent 5476f617c6
commit 2eee415015
12 changed files with 96 additions and 83 deletions
--- a/README_submission.md
+++ b/README_submission.md
@@ -58,14 +58,12 @@ Existing modules include a variety of functionalities within the environment:

 - [Agents](marl_factory_grid/algorithms) implement either static strategies or learning algorithms based on the specific
  configuration.
- Their action set includes opening [door entities](marl_factory_grid/modules/doors/entitites.py), collecting [coins](marl_factory_grid/modules/coins/coin  cleaning
+- Their action set includes opening [door entities](marl_factory_grid/modules/doors/entitites.py), collecting [coins](marl_factory_grid/modules/coins/entitites.py)  cleaning
  [dirt](marl_factory_grid/modules/clean_up/entitites.py), picking
  up [items](marl_factory_grid/modules/items/entitites.py) and
  delivering them to designated drop-off locations.
 - Agents are equipped with a [battery](marl_factory_grid/modules/batteries/entitites.py) that gradually depletes over
  time if not charged at a chargepod.
- The [maintainer](marl_factory_grid/modules/maintenance/entities.py) aims to
-  repair [machines](marl_factory_grid/modules/machines/entitites.py) that lose health over time.


 ## Limitations
--- a/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_eval_config.yaml
+++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_eval_config.yaml
@@ -15,13 +15,13 @@ General:
 # In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
 # other agents aim to clean dirt piles.
 Agents:
-  # The clean agents
+  # The coin collect agents
  Sigmund:
    Actions:
      - Move4
      - Noop
    Observations:
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (9,1)
@@ -30,17 +30,17 @@ Agents:
      - Move4
      - Noop
    Observations:
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (9,5)

 Entities:
-  DirtPiles:
+  CoinPiles:
    coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
-    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
-    clean_amount: 1
-    dirt_spawn_r_var: 0
+    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action
+    collect_amount: 1
+    coin_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1

@@ -55,7 +55,7 @@ Rules:

  # Done Conditions
  # Define the conditions for the environment to stop. Either success or a fail conditions.
-  # The environment stops when all dirt is cleaned
-  DoneOnAllDirtCleaned:
+  # The environment stops when all coins are collected
+  DoneOnAllCoinsCollected:
  #DoneAtMaxStepsReached:
    #max_steps: 200
--- a/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_train_config.yaml
+++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_train_config.yaml
@@ -12,17 +12,17 @@ General:
  # Run tests
  tests: false

-# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
-# other agents aim to clean dirt piles.
+# In the "collect and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
+# other agents aim to collect coin piles.
 Agents:
-  # The clean agents
+  # The collect coin agents
  Sigmund:
    Actions:
      - Move4
-      #- Clean
+      #- Collect
      #- Noop
    Observations:
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (9,1)
@@ -34,10 +34,10 @@ Agents:
  Wolfgang:
    Actions:
      - Move4
-      #- Clean
+      #- Collect
      #- Noop
    Observations:
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (9,5)
@@ -48,11 +48,11 @@ Agents:
      - (9,9)

 Entities:
-  DirtPiles:
+  CoinPiles:
    coords_or_quantity: (9,9), (1,1), (4,5)  # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
-    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
-    clean_amount: 1
-    dirt_spawn_r_var: 0
+    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action
+    collect_amount: 1
+    coin_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1

@@ -67,7 +67,7 @@ Rules:

  # Done Conditions
  # Define the conditions for the environment to stop. Either success or a fail conditions.
-  # The environment stops when all dirt is cleaned
-  DoneOnAllDirtCleaned:
+  # The environment stops when all coins are collected
+  DoneOnAllCoinsCollected:
  #DoneAtMaxStepsReached: # An episode should last for at most max_steps steps
    #max_steps: 100
--- a/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config.yaml
+++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config.yaml
@@ -20,7 +20,7 @@ Agents:
      - DoorUse
      - Noop
    Observations:
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (3,1)
@@ -30,17 +30,17 @@ Agents:
      - DoorUse
      - Noop
    Observations:
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (3,13)

 Entities:
-  DirtPiles:
+  CoinPiles:
    coords_or_quantity: (2,1), (3,12), (2,13), (3,2) # Static form: auxiliary pile, primary pile, auxiliary pile, ...
-    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
-    clean_amount: 1
-    dirt_spawn_r_var: 0
+    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action
+    collect_amount: 1
+    coin_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1

--- a/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config_emergent.yaml
+++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config_emergent.yaml
@@ -20,7 +20,7 @@ Agents:
      - DoorUse
      - Noop
    Observations:
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (3,1)
@@ -30,17 +30,17 @@ Agents:
      - DoorUse
      - Noop
    Observations:
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (3,13)

 Entities:
-  DirtPiles:
+  CoinPiles:
    coords_or_quantity: (3,12), (3,2) # Static form: auxiliary pile, primary pile, auxiliary pile, ...
-    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
-    clean_amount: 1
-    dirt_spawn_r_var: 0
+    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action
+    collect_amount: 1
+    coin_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1

--- a/marl_factory_grid/configs/custom/dirt_quadrant_eval_config.yaml
+++ b/marl_factory_grid/configs/custom/dirt_quadrant_eval_config.yaml
@@ -13,15 +13,15 @@ General:
  tests: false

 # In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
-# other agents aim to clean dirt piles.
+# other agents aim to collect coin piles.
 Agents:
-  # The clean agents
+  # The collect coin agents
  #Sigmund:
    #Actions:
      #- Move4
      #- Noop
    #Observations:
-      #- DirtPiles
+      #- CoinPiles
      #- Self
    #Positions:
      #- (9,1)
@@ -38,7 +38,7 @@ Agents:
    Actions:
      - Move4
    Observations:
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (9,5)
@@ -53,11 +53,11 @@ Agents:
      #- (9,5)

 Entities:
-  DirtPiles:
+  CoinPiles:
    coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) #(9,9), (7,9), (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
-    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
-    clean_amount: 1
-    dirt_spawn_r_var: 0
+    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action
+    collect_amount: 1
+    coin_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1

@@ -72,7 +72,7 @@ Rules:

  # Done Conditions
  # Define the conditions for the environment to stop. Either success or a fail conditions.
-  # The environment stops when all dirt is cleaned
-  DoneOnAllDirtCleaned:
+  # The environment stops when all coins are collected
+  DoneOnAllCoinsCollected:
  #DoneAtMaxStepsReached:
    #max_steps: 200
--- a/marl_factory_grid/configs/custom/dirt_quadrant_train_config.yaml
+++ b/marl_factory_grid/configs/custom/dirt_quadrant_train_config.yaml
@@ -12,15 +12,15 @@ General:
  # Run tests
  tests: false

-# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
-# other agents aim to clean dirt piles.
+# In the "collect and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
+# other agents aim to collect coin piles.
 Agents:
  # The clean agents
  #Sigmund:
    #Actions:
      #- Move4
    #Observations:
-      #- DirtPiles
+      #- CoinPiles
      #- Self
    #Positions:
      #- (9,1)
@@ -39,7 +39,7 @@ Agents:
    Actions:
      - Move4
    Observations:
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (9,5)
@@ -57,11 +57,11 @@ Agents:


 Entities:
-  DirtPiles:
+  CoinPiles:
    coords_or_quantity: (1, 1), (2,4), (4,7), (6,8), (7,9), (9,9)  # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
-    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
-    clean_amount: 1
-    dirt_spawn_r_var: 0
+    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action
+    collect_amount: 1
+    coin_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1

@@ -76,8 +76,8 @@ Rules:

  # Done Conditions
  # Define the conditions for the environment to stop. Either success or a fail conditions.
-  # The environment stops when all dirt is cleaned
-  DoneOnAllDirtCleaned:
+  # The environment stops when all coins are collected
+  DoneOnAllCoinsCollected:
  #DoneAtMaxStepsReached: # An episode should last for at most max_steps steps
    #max_steps: 1000

--- a/marl_factory_grid/configs/custom/two_rooms_one_door_modified_eval_config.yaml
+++ b/marl_factory_grid/configs/custom/two_rooms_one_door_modified_eval_config.yaml
@@ -19,7 +19,7 @@ Agents:
      #- Move4
      #- DoorUse
    #Observations:
-      #- DirtPiles
+      #- CoinPiles
      #- Self
    #Positions:
      #- (3,1)
@@ -29,18 +29,18 @@ Agents:
      - Move4
      - DoorUse
    Observations:
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (3,13)
      - (2,13)

 Entities:
-  DirtPiles:
+  CoinPiles:
    coords_or_quantity: (2,13), (3,2) # (2,1), (3,12)
-    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
-    clean_amount: 1
-    dirt_spawn_r_var: 0
+    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action
+    collect_amount: 1
+    coin_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1

--- a/marl_factory_grid/configs/custom/two_rooms_one_door_modified_train_config.yaml
+++ b/marl_factory_grid/configs/custom/two_rooms_one_door_modified_train_config.yaml
@@ -18,7 +18,7 @@ Agents:
    #Actions:
      #- Move4
    #Observations:
-      #- DirtPiles
+      #- CoinPiles
      #- Self
    #Positions:
      #- (3,1)
@@ -33,7 +33,7 @@ Agents:
    Actions:
      - Move4
    Observations:
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (3,13)
@@ -46,11 +46,11 @@ Agents:
      - (4,6)

 Entities:
-  DirtPiles:
+  CoinPiles:
    coords_or_quantity: (2,13), (3,2) # (2,1), (3,12)
-    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
-    clean_amount: 1
-    dirt_spawn_r_var: 0
+    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action
+    collect_amount: 1
+    coin_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1

@@ -67,7 +67,7 @@ Rules:
    done_at_collisions: false

  # Done Conditions
-  DoneOnAllDirtCleaned:
+  DoneOnAllCoinsCollected:
  #DoneAtMaxStepsReached:
    #max_steps: 100

--- a/marl_factory_grid/configs/default_config.yaml
+++ b/marl_factory_grid/configs/default_config.yaml
@@ -17,8 +17,8 @@ General:
 # Agents section defines the characteristics of different agents in the environment.

 # An Agent requires a list of actions and observations.
-# Possible actions: Noop, Charge, Clean, DestAction, DoorUse, ItemAction, MachineAction, Move8, Move4, North, NorthEast, ...
-# Possible observations: All, Combined, GlobalPosition, Battery, ChargePods, DirtPiles, Destinations, Doors, Items, Inventory, DropOffLocations, Maintainers, ...
+# Possible actions: Noop, Charge, Clean, Collect, DestAction, DoorUse, ItemAction, MachineAction, Move8, Move4, North, NorthEast, ...
+# Possible observations: All, Combined, GlobalPosition, Battery, ChargePods, CoinPiles, DirtPiles, Destinations, Doors, Items, Inventory, DropOffLocations, Maintainers, ...
 # You can use 'clone' as the agent name to have multiple instances with either a list of names or an int specifying the number of clones.
 Agents:
  Wolfgang:
@@ -26,6 +26,7 @@ Agents:
      - Noop
      - Charge
      - Clean
+      - Collect
      - DestAction
      - DoorUse
      - ItemAction
@@ -37,6 +38,7 @@ Agents:
      - GlobalPosition
      - Battery
      - ChargePods
+      - CoinPiles
      - DirtPiles
      - Destinations
      - Doors
@@ -57,6 +59,15 @@ Entities:
  ChargePods:
    coords_or_quantity: 2

+  # CoinPiles: Entities that can be collected by an agent.
+  CoinPiles:
+    coords_or_quantity: 10
+    initial_amount: 2
+    collect_amount: 1
+    coin_spawn_r_var: 0.1
+    max_global_amount: 20
+    max_local_amount: 5
+
  # Destinations: Entities representing target locations for agents.
  # - spawn_mode: GROUPED or SINGLE. Determines how destinations are spawned.
  Destinations:
@@ -137,6 +148,8 @@ Rules:
  DoneAtDestinationReach:
  # The environment stops when all dirt is cleaned
  DoneOnAllDirtCleaned:
+  # The environment stops when all Coins are collected.
+  DoneOnAllCoinsCollected:
  # The environment stops when a battery is discharged
  DoneAtBatteryDischarge:
  # The environment stops when a maintainer reports a collision
--- a/marl_factory_grid/configs/dirt_quadrant.yaml
+++ b/marl_factory_grid/configs/dirt_quadrant.yaml
@@ -13,40 +13,40 @@ General:
  tests: false

 # In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
-# other agents aim to clean dirt piles.
+# other agents aim to collect coin piles.
 Agents:
-  # The clean agents
+  # The collect coin agents
  Wolfgang:
    Actions:
      - Move4
-      - Clean
+      - Collect
      - Noop
    Observations:
      - Walls
      - Other
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (9,1)
  Reiner:
    Actions:
      - Move4
-      - Clean
+      - Collect
      - Noop
    Observations:
      - Walls
      - Other
-      - DirtPiles
+      - CoinPiles
      - Self
    Positions:
      - (9,5)

 Entities:
-  DirtPiles:
+  CoinPiles:
    coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9)
    initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
-    clean_amount: 1
-    dirt_spawn_r_var: 0
+    collect_amount: 1
+    coin_spawn_r_var: 0
    max_global_amount: 12
    max_local_amount: 1

@@ -61,7 +61,7 @@ Rules:

  # Done Conditions
  # Define the conditions for the environment to stop. Either success or a fail conditions.
-  # The environment stops when all dirt is cleaned
-  DoneOnAllDirtCleaned:
+  # The environment stops when all coins are collected
+  DoneOnAllCoinsCollected:
  DoneAtMaxStepsReached:
    max_steps: 200
--- a/marl_factory_grid/environment/factory.py
+++ b/marl_factory_grid/environment/factory.py
@@ -298,6 +298,8 @@ class Factory(gym.Env):
        """ Generalized method to mask entities based on dynamic conditions. """
        for entity in entities:
            if entity.name == 'CoinPiles':
+                # entity.name = 'Destinations'
+                # entity.value = 1
                entity.mask = 'Destinations'
                entity.mask_value = 1
        return entities