diff --git a/README_submission.md b/README_submission.md index 12e6513..3e246ab 100644 --- a/README_submission.md +++ b/README_submission.md @@ -58,14 +58,12 @@ Existing modules include a variety of functionalities within the environment: - [Agents](marl_factory_grid/algorithms) implement either static strategies or learning algorithms based on the specific configuration. -- Their action set includes opening [door entities](marl_factory_grid/modules/doors/entitites.py), collecting [coins](marl_factory_grid/modules/coins/coin cleaning +- Their action set includes opening [door entities](marl_factory_grid/modules/doors/entitites.py), collecting [coins](marl_factory_grid/modules/coins/entitites.py) cleaning [dirt](marl_factory_grid/modules/clean_up/entitites.py), picking up [items](marl_factory_grid/modules/items/entitites.py) and delivering them to designated drop-off locations. - Agents are equipped with a [battery](marl_factory_grid/modules/batteries/entitites.py) that gradually depletes over time if not charged at a chargepod. -- The [maintainer](marl_factory_grid/modules/maintenance/entities.py) aims to - repair [machines](marl_factory_grid/modules/machines/entitites.py) that lose health over time. ## Limitations diff --git a/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_eval_config.yaml b/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_eval_config.yaml index 9e4b25d..f8e42d3 100644 --- a/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_eval_config.yaml +++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_eval_config.yaml @@ -15,13 +15,13 @@ General: # In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all # other agents aim to clean dirt piles. Agents: - # The clean agents + # The coin collect agents Sigmund: Actions: - Move4 - Noop Observations: - - DirtPiles + - CoinPiles - Self Positions: - (9,1) @@ -30,17 +30,17 @@ Agents: - Move4 - Noop Observations: - - DirtPiles + - CoinPiles - Self Positions: - (9,5) Entities: - DirtPiles: + CoinPiles: coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9) - initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action - clean_amount: 1 - dirt_spawn_r_var: 0 + initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action + collect_amount: 1 + coin_spawn_r_var: 0 max_global_amount: 12 max_local_amount: 1 @@ -55,7 +55,7 @@ Rules: # Done Conditions # Define the conditions for the environment to stop. Either success or a fail conditions. - # The environment stops when all dirt is cleaned - DoneOnAllDirtCleaned: + # The environment stops when all coins are collected + DoneOnAllCoinsCollected: #DoneAtMaxStepsReached: #max_steps: 200 diff --git a/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_train_config.yaml b/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_train_config.yaml index 7c5d4dc..7c3e85e 100644 --- a/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_train_config.yaml +++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/dirt_quadrant_train_config.yaml @@ -12,17 +12,17 @@ General: # Run tests tests: false -# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all -# other agents aim to clean dirt piles. +# In the "collect and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all +# other agents aim to collect coin piles. Agents: - # The clean agents + # The collect coin agents Sigmund: Actions: - Move4 - #- Clean + #- Collect #- Noop Observations: - - DirtPiles + - CoinPiles - Self Positions: - (9,1) @@ -34,10 +34,10 @@ Agents: Wolfgang: Actions: - Move4 - #- Clean + #- Collect #- Noop Observations: - - DirtPiles + - CoinPiles - Self Positions: - (9,5) @@ -48,11 +48,11 @@ Agents: - (9,9) Entities: - DirtPiles: + CoinPiles: coords_or_quantity: (9,9), (1,1), (4,5) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9) - initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action - clean_amount: 1 - dirt_spawn_r_var: 0 + initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action + collect_amount: 1 + coin_spawn_r_var: 0 max_global_amount: 12 max_local_amount: 1 @@ -67,7 +67,7 @@ Rules: # Done Conditions # Define the conditions for the environment to stop. Either success or a fail conditions. - # The environment stops when all dirt is cleaned - DoneOnAllDirtCleaned: + # The environment stops when all coins are collected + DoneOnAllCoinsCollected: #DoneAtMaxStepsReached: # An episode should last for at most max_steps steps #max_steps: 100 diff --git a/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config.yaml b/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config.yaml index bff7c60..cd8992b 100644 --- a/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config.yaml +++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config.yaml @@ -20,7 +20,7 @@ Agents: - DoorUse - Noop Observations: - - DirtPiles + - CoinPiles - Self Positions: - (3,1) @@ -30,17 +30,17 @@ Agents: - DoorUse - Noop Observations: - - DirtPiles + - CoinPiles - Self Positions: - (3,13) Entities: - DirtPiles: + CoinPiles: coords_or_quantity: (2,1), (3,12), (2,13), (3,2) # Static form: auxiliary pile, primary pile, auxiliary pile, ... - initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action - clean_amount: 1 - dirt_spawn_r_var: 0 + initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action + collect_amount: 1 + coin_spawn_r_var: 0 max_global_amount: 12 max_local_amount: 1 diff --git a/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config_emergent.yaml b/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config_emergent.yaml index bf5a060..65e0df8 100644 --- a/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config_emergent.yaml +++ b/marl_factory_grid/configs/custom/MultiAgentConfigs/two_rooms_one_door_modified_eval_config_emergent.yaml @@ -20,7 +20,7 @@ Agents: - DoorUse - Noop Observations: - - DirtPiles + - CoinPiles - Self Positions: - (3,1) @@ -30,17 +30,17 @@ Agents: - DoorUse - Noop Observations: - - DirtPiles + - CoinPiles - Self Positions: - (3,13) Entities: - DirtPiles: + CoinPiles: coords_or_quantity: (3,12), (3,2) # Static form: auxiliary pile, primary pile, auxiliary pile, ... - initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action - clean_amount: 1 - dirt_spawn_r_var: 0 + initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action + collect_amount: 1 + coin_spawn_r_var: 0 max_global_amount: 12 max_local_amount: 1 diff --git a/marl_factory_grid/configs/custom/dirt_quadrant_eval_config.yaml b/marl_factory_grid/configs/custom/dirt_quadrant_eval_config.yaml index 2ef0874..b652619 100644 --- a/marl_factory_grid/configs/custom/dirt_quadrant_eval_config.yaml +++ b/marl_factory_grid/configs/custom/dirt_quadrant_eval_config.yaml @@ -13,15 +13,15 @@ General: tests: false # In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all -# other agents aim to clean dirt piles. +# other agents aim to collect coin piles. Agents: - # The clean agents + # The collect coin agents #Sigmund: #Actions: #- Move4 #- Noop #Observations: - #- DirtPiles + #- CoinPiles #- Self #Positions: #- (9,1) @@ -38,7 +38,7 @@ Agents: Actions: - Move4 Observations: - - DirtPiles + - CoinPiles - Self Positions: - (9,5) @@ -53,11 +53,11 @@ Agents: #- (9,5) Entities: - DirtPiles: + CoinPiles: coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) #(9,9), (7,9), (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9) - initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action - clean_amount: 1 - dirt_spawn_r_var: 0 + initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action + collect_amount: 1 + coin_spawn_r_var: 0 max_global_amount: 12 max_local_amount: 1 @@ -72,7 +72,7 @@ Rules: # Done Conditions # Define the conditions for the environment to stop. Either success or a fail conditions. - # The environment stops when all dirt is cleaned - DoneOnAllDirtCleaned: + # The environment stops when all coins are collected + DoneOnAllCoinsCollected: #DoneAtMaxStepsReached: #max_steps: 200 diff --git a/marl_factory_grid/configs/custom/dirt_quadrant_train_config.yaml b/marl_factory_grid/configs/custom/dirt_quadrant_train_config.yaml index 7fcdbdf..154b520 100644 --- a/marl_factory_grid/configs/custom/dirt_quadrant_train_config.yaml +++ b/marl_factory_grid/configs/custom/dirt_quadrant_train_config.yaml @@ -12,15 +12,15 @@ General: # Run tests tests: false -# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all -# other agents aim to clean dirt piles. +# In the "collect and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all +# other agents aim to collect coin piles. Agents: # The clean agents #Sigmund: #Actions: #- Move4 #Observations: - #- DirtPiles + #- CoinPiles #- Self #Positions: #- (9,1) @@ -39,7 +39,7 @@ Agents: Actions: - Move4 Observations: - - DirtPiles + - CoinPiles - Self Positions: - (9,5) @@ -57,11 +57,11 @@ Agents: Entities: - DirtPiles: + CoinPiles: coords_or_quantity: (1, 1), (2,4), (4,7), (6,8), (7,9), (9,9) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9) - initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action - clean_amount: 1 - dirt_spawn_r_var: 0 + initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action + collect_amount: 1 + coin_spawn_r_var: 0 max_global_amount: 12 max_local_amount: 1 @@ -76,8 +76,8 @@ Rules: # Done Conditions # Define the conditions for the environment to stop. Either success or a fail conditions. - # The environment stops when all dirt is cleaned - DoneOnAllDirtCleaned: + # The environment stops when all coins are collected + DoneOnAllCoinsCollected: #DoneAtMaxStepsReached: # An episode should last for at most max_steps steps #max_steps: 1000 diff --git a/marl_factory_grid/configs/custom/two_rooms_one_door_modified_eval_config.yaml b/marl_factory_grid/configs/custom/two_rooms_one_door_modified_eval_config.yaml index 9417ea4..383ac8b 100644 --- a/marl_factory_grid/configs/custom/two_rooms_one_door_modified_eval_config.yaml +++ b/marl_factory_grid/configs/custom/two_rooms_one_door_modified_eval_config.yaml @@ -19,7 +19,7 @@ Agents: #- Move4 #- DoorUse #Observations: - #- DirtPiles + #- CoinPiles #- Self #Positions: #- (3,1) @@ -29,18 +29,18 @@ Agents: - Move4 - DoorUse Observations: - - DirtPiles + - CoinPiles - Self Positions: - (3,13) - (2,13) Entities: - DirtPiles: + CoinPiles: coords_or_quantity: (2,13), (3,2) # (2,1), (3,12) - initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action - clean_amount: 1 - dirt_spawn_r_var: 0 + initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action + collect_amount: 1 + coin_spawn_r_var: 0 max_global_amount: 12 max_local_amount: 1 diff --git a/marl_factory_grid/configs/custom/two_rooms_one_door_modified_train_config.yaml b/marl_factory_grid/configs/custom/two_rooms_one_door_modified_train_config.yaml index d64aec5..88517fb 100644 --- a/marl_factory_grid/configs/custom/two_rooms_one_door_modified_train_config.yaml +++ b/marl_factory_grid/configs/custom/two_rooms_one_door_modified_train_config.yaml @@ -18,7 +18,7 @@ Agents: #Actions: #- Move4 #Observations: - #- DirtPiles + #- CoinPiles #- Self #Positions: #- (3,1) @@ -33,7 +33,7 @@ Agents: Actions: - Move4 Observations: - - DirtPiles + - CoinPiles - Self Positions: - (3,13) @@ -46,11 +46,11 @@ Agents: - (4,6) Entities: - DirtPiles: + CoinPiles: coords_or_quantity: (2,13), (3,2) # (2,1), (3,12) - initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action - clean_amount: 1 - dirt_spawn_r_var: 0 + initial_amount: 0.5 # <1 to ensure that the robot which first attempts to collect this field, can collect the coin in one action + collect_amount: 1 + coin_spawn_r_var: 0 max_global_amount: 12 max_local_amount: 1 @@ -67,7 +67,7 @@ Rules: done_at_collisions: false # Done Conditions - DoneOnAllDirtCleaned: + DoneOnAllCoinsCollected: #DoneAtMaxStepsReached: #max_steps: 100 diff --git a/marl_factory_grid/configs/default_config.yaml b/marl_factory_grid/configs/default_config.yaml index a6a53d9..71295cd 100644 --- a/marl_factory_grid/configs/default_config.yaml +++ b/marl_factory_grid/configs/default_config.yaml @@ -17,8 +17,8 @@ General: # Agents section defines the characteristics of different agents in the environment. # An Agent requires a list of actions and observations. -# Possible actions: Noop, Charge, Clean, DestAction, DoorUse, ItemAction, MachineAction, Move8, Move4, North, NorthEast, ... -# Possible observations: All, Combined, GlobalPosition, Battery, ChargePods, DirtPiles, Destinations, Doors, Items, Inventory, DropOffLocations, Maintainers, ... +# Possible actions: Noop, Charge, Clean, Collect, DestAction, DoorUse, ItemAction, MachineAction, Move8, Move4, North, NorthEast, ... +# Possible observations: All, Combined, GlobalPosition, Battery, ChargePods, CoinPiles, DirtPiles, Destinations, Doors, Items, Inventory, DropOffLocations, Maintainers, ... # You can use 'clone' as the agent name to have multiple instances with either a list of names or an int specifying the number of clones. Agents: Wolfgang: @@ -26,6 +26,7 @@ Agents: - Noop - Charge - Clean + - Collect - DestAction - DoorUse - ItemAction @@ -37,6 +38,7 @@ Agents: - GlobalPosition - Battery - ChargePods + - CoinPiles - DirtPiles - Destinations - Doors @@ -57,6 +59,15 @@ Entities: ChargePods: coords_or_quantity: 2 + # CoinPiles: Entities that can be collected by an agent. + CoinPiles: + coords_or_quantity: 10 + initial_amount: 2 + collect_amount: 1 + coin_spawn_r_var: 0.1 + max_global_amount: 20 + max_local_amount: 5 + # Destinations: Entities representing target locations for agents. # - spawn_mode: GROUPED or SINGLE. Determines how destinations are spawned. Destinations: @@ -137,6 +148,8 @@ Rules: DoneAtDestinationReach: # The environment stops when all dirt is cleaned DoneOnAllDirtCleaned: + # The environment stops when all Coins are collected. + DoneOnAllCoinsCollected: # The environment stops when a battery is discharged DoneAtBatteryDischarge: # The environment stops when a maintainer reports a collision diff --git a/marl_factory_grid/configs/dirt_quadrant.yaml b/marl_factory_grid/configs/dirt_quadrant.yaml index fa29bbd..1887f29 100644 --- a/marl_factory_grid/configs/dirt_quadrant.yaml +++ b/marl_factory_grid/configs/dirt_quadrant.yaml @@ -13,40 +13,40 @@ General: tests: false # In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all -# other agents aim to clean dirt piles. +# other agents aim to collect coin piles. Agents: - # The clean agents + # The collect coin agents Wolfgang: Actions: - Move4 - - Clean + - Collect - Noop Observations: - Walls - Other - - DirtPiles + - CoinPiles - Self Positions: - (9,1) Reiner: Actions: - Move4 - - Clean + - Collect - Noop Observations: - Walls - Other - - DirtPiles + - CoinPiles - Self Positions: - (9,5) Entities: - DirtPiles: + CoinPiles: coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action - clean_amount: 1 - dirt_spawn_r_var: 0 + collect_amount: 1 + coin_spawn_r_var: 0 max_global_amount: 12 max_local_amount: 1 @@ -61,7 +61,7 @@ Rules: # Done Conditions # Define the conditions for the environment to stop. Either success or a fail conditions. - # The environment stops when all dirt is cleaned - DoneOnAllDirtCleaned: + # The environment stops when all coins are collected + DoneOnAllCoinsCollected: DoneAtMaxStepsReached: max_steps: 200 diff --git a/marl_factory_grid/environment/factory.py b/marl_factory_grid/environment/factory.py index 0b239b4..c7f1af1 100644 --- a/marl_factory_grid/environment/factory.py +++ b/marl_factory_grid/environment/factory.py @@ -298,6 +298,8 @@ class Factory(gym.Env): """ Generalized method to mask entities based on dynamic conditions. """ for entity in entities: if entity.name == 'CoinPiles': + # entity.name = 'Destinations' + # entity.value = 1 entity.mask = 'Destinations' entity.mask_value = 1 return entities