From c7c2c4e5a3445e747472bc3a6a715783b12e097d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20Sch=C3=B6nberger?= Date: Thu, 2 May 2024 11:00:35 +0200 Subject: [PATCH] Add various RL adapted configs --- .../marl/configs/dirt_quadrant_config.yaml | 28 +++++++ .../marl/configs/environment_changes | 3 + .../two_rooms_one_door_modified_config.yaml} | 14 ++-- .../configs/custom/dirt_quadrant.yaml | 67 +++++++++++++++++ .../custom/dirt_quadrant_random_pos.yaml | 75 +++++++++++++++++++ ...wo_rooms_one_door_modified_random_pos.yaml | 72 ++++++++++++++++++ marl_factory_grid/configs/dirt_quadrant.yaml | 67 +++++++++++++++++ .../configs/two_rooms_one_door_modified.yaml | 6 +- marl_factory_grid/levels/quadrant.txt | 11 +++ 9 files changed, 334 insertions(+), 9 deletions(-) create mode 100644 marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml create mode 100644 marl_factory_grid/algorithms/marl/configs/environment_changes rename marl_factory_grid/algorithms/marl/{example_config.yaml => configs/two_rooms_one_door_modified_config.yaml} (50%) create mode 100644 marl_factory_grid/configs/custom/dirt_quadrant.yaml create mode 100644 marl_factory_grid/configs/custom/dirt_quadrant_random_pos.yaml create mode 100644 marl_factory_grid/configs/custom/two_rooms_one_door_modified_random_pos.yaml create mode 100644 marl_factory_grid/configs/dirt_quadrant.yaml create mode 100644 marl_factory_grid/levels/quadrant.txt diff --git a/marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml b/marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml new file mode 100644 index 0000000..6668f55 --- /dev/null +++ b/marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml @@ -0,0 +1,28 @@ +agent: + classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC + n_agents: 1 + obs_emb_size: 96 + action_emb_size: 16 + hidden_size_actor: 64 + hidden_size_critic: 64 + use_agent_embedding: False +env: + classname: marl_factory_grid.configs.custom + env_name: "custom/dirt_quadrant_random_pos" + n_agents: 1 + max_steps: 250 + pomdp_r: 2 + stack_n_frames: 0 + individual_rewards: True + train_render: False + eval_render: True +method: marl_factory_grid.algorithms.marl.LoopSEAC +algorithm: + gamma: 0.99 + entropy_coef: 0.01 + vf_coef: 0.05 + n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC + max_steps: 80000 + advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce" + pile-order: "fixed" # Options: "fixed", "random", "none", "agents" + diff --git a/marl_factory_grid/algorithms/marl/configs/environment_changes b/marl_factory_grid/algorithms/marl/configs/environment_changes new file mode 100644 index 0000000..4859ec5 --- /dev/null +++ b/marl_factory_grid/algorithms/marl/configs/environment_changes @@ -0,0 +1,3 @@ +marl_factory_grid>environment>rules.py#SpawnEntity.on_reset() +marl_factory_grid>environment>rewards.py +marl_factory_grid>modules>clean_up>groups.py#DirtPiles.trigger_spawn() diff --git a/marl_factory_grid/algorithms/marl/example_config.yaml b/marl_factory_grid/algorithms/marl/configs/two_rooms_one_door_modified_config.yaml similarity index 50% rename from marl_factory_grid/algorithms/marl/example_config.yaml rename to marl_factory_grid/algorithms/marl/configs/two_rooms_one_door_modified_config.yaml index 62782b3..ea115f6 100644 --- a/marl_factory_grid/algorithms/marl/example_config.yaml +++ b/marl_factory_grid/algorithms/marl/configs/two_rooms_one_door_modified_config.yaml @@ -7,20 +7,22 @@ agent: hidden_size_critic: 64 use_agent_embedding: False env: - classname: marl_factory_grid.configs - env_name: "simple_crossing" + classname: marl_factory_grid.configs.custom + env_name: "custom/two_rooms_one_door_modified_random_pos" n_agents: 2 max_steps: 250 pomdp_r: 2 stack_n_frames: 0 individual_rewards: True - train_render: True + train_render: False eval_render: True method: marl_factory_grid.algorithms.marl.LoopSEAC algorithm: gamma: 0.99 entropy_coef: 0.01 - vf_coef: 0.5 - n_steps: 5 - max_steps: 10000 + vf_coef: 0.05 + n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC + max_steps: 100000 + advantage: "TD-Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce" + pile-order: "agents" # Options: "fixed", "random", "none", "agents" diff --git a/marl_factory_grid/configs/custom/dirt_quadrant.yaml b/marl_factory_grid/configs/custom/dirt_quadrant.yaml new file mode 100644 index 0000000..49b27ed --- /dev/null +++ b/marl_factory_grid/configs/custom/dirt_quadrant.yaml @@ -0,0 +1,67 @@ +General: + # RNG-seed to sample the same "random" numbers every time, to make the different runs comparable. + env_seed: 69 + # Individual vs global rewards + individual_rewards: true + # The level.txt file to load from marl_factory_grid/levels + level_name: quadrant + # Radius of Partially observable Markov decision process + pomdp_r: 0 # default 3 + # Print all messages and events + verbose: false + # Run tests + tests: false + +# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all +# other agents aim to clean dirt piles. +Agents: + # The clean agents + Wolfgang: + Actions: + - Move4 + #- Clean + #- Noop + Observations: + # - Walls + # - Other + - DirtPiles + - Self + Positions: + - (9,1) + #Reiner: + #Actions: + #- Move4 + #- Clean + #- Noop + #Observations: + # - Walls + # - Other + #- DirtPiles + #- Self + #Positions: + #- (9,8) # (9, 4) + +Entities: + DirtPiles: + coords_or_quantity: (1,1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9) + initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action + clean_amount: 1 + dirt_spawn_r_var: 0 + max_global_amount: 12 + max_local_amount: 1 + +# Rules section specifies the rules governing the dynamics of the environment. +Rules: + + # Utilities + # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards. + # Can be omitted/ignored if you do not want to take care of collisions at all. + WatchCollisions: + done_at_collisions: false + + # Done Conditions + # Define the conditions for the environment to stop. Either success or a fail conditions. + # The environment stops when all dirt is cleaned + DoneOnAllDirtCleaned: + #DoneAtMaxStepsReached: + #max_steps: 200 diff --git a/marl_factory_grid/configs/custom/dirt_quadrant_random_pos.yaml b/marl_factory_grid/configs/custom/dirt_quadrant_random_pos.yaml new file mode 100644 index 0000000..b619a7b --- /dev/null +++ b/marl_factory_grid/configs/custom/dirt_quadrant_random_pos.yaml @@ -0,0 +1,75 @@ +General: + # RNG-seed to sample the same "random" numbers every time, to make the different runs comparable. + env_seed: 69 + # Individual vs global rewards + individual_rewards: true + # The level.txt file to load from marl_factory_grid/levels + level_name: quadrant + # Radius of Partially observable Markov decision process + pomdp_r: 0 # default 3 + # Print all messages and events + verbose: false + # Run tests + tests: false + +# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all +# other agents aim to clean dirt piles. +Agents: + # The clean agents + Wolfgang: + Actions: + - Move4 + #- Clean + #- Noop + Observations: + # - Walls + # - Other + - DirtPiles + - Self + #Positions: + #- (9,1) + #- (9,2) + #- (9,3) + #- (9,4) + #- (9,5) + #- (9,6) + #- (9,7) + #- (9,8) + #- (9,9) + #Reiner: + #Actions: + #- Move4 + #- Clean + #- Noop + #Observations: + # - Walls + # - Other + #- DirtPiles + #- Self + #Positions: + #- (9,8) # (9, 4) + +Entities: + DirtPiles: + coords_or_quantity: (1,1) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9) + initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action + clean_amount: 1 + dirt_spawn_r_var: 0 + max_global_amount: 12 + max_local_amount: 1 + +# Rules section specifies the rules governing the dynamics of the environment. +Rules: + + # Utilities + # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards. + # Can be omitted/ignored if you do not want to take care of collisions at all. + WatchCollisions: + done_at_collisions: false + + # Done Conditions + # Define the conditions for the environment to stop. Either success or a fail conditions. + # The environment stops when all dirt is cleaned + DoneOnAllDirtCleaned: + #DoneAtMaxStepsReached: # An episode should last for at most max_steps steps + #max_steps: 1000 diff --git a/marl_factory_grid/configs/custom/two_rooms_one_door_modified_random_pos.yaml b/marl_factory_grid/configs/custom/two_rooms_one_door_modified_random_pos.yaml new file mode 100644 index 0000000..6202302 --- /dev/null +++ b/marl_factory_grid/configs/custom/two_rooms_one_door_modified_random_pos.yaml @@ -0,0 +1,72 @@ +General: + env_seed: 69 + # Individual vs global rewards + individual_rewards: true + # The level.txt file to load from marl_factory_grid/levels + level_name: two_rooms_modified + # View Radius; 0 = full observatbility + pomdp_r: 0 + # Print all messages and events + verbose: false + # Run tests + tests: false + +# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim +# is to reach the destination in the room they didn't spawn in leading to a conflict at the door. +Agents: + Wolfgang: + Actions: + - Move8 + - DoorUse + - Noop + Observations: + - DirtPiles + - Self + #Positions: + #- (1,1) + #- (2,1) + #- (3,1) + #- (4,1) + #- (5,1) + #- (6,1) + Sigmund: + Actions: + - Move8 + - DoorUse + - Noop + Observations: + - DirtPiles + - Self + #Positions: + #- (1,13) + #- (2,13) + #- (3,13) + #- (4,13) + #- (5,13) + #- (6,13) + +Entities: + DirtPiles: + coords_or_quantity: (3,12), (3,2) # This order is required, because agent 0 needs to reach (3, 12) and agent 1 (3, 2) + initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action + clean_amount: 1 + dirt_spawn_r_var: 0 + max_global_amount: 12 + max_local_amount: 1 + + Doors: { } + +Rules: + # Environment Dynamics + DoorAutoClose: + close_frequency: 10 + + # Utilities + # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards. + WatchCollisions: + done_at_collisions: false + + # Done Conditions + #DoneOnAllDirtCleaned: + #DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop + #max_steps: 1000 diff --git a/marl_factory_grid/configs/dirt_quadrant.yaml b/marl_factory_grid/configs/dirt_quadrant.yaml new file mode 100644 index 0000000..bab83ca --- /dev/null +++ b/marl_factory_grid/configs/dirt_quadrant.yaml @@ -0,0 +1,67 @@ +General: + # RNG-seed to sample the same "random" numbers every time, to make the different runs comparable. + env_seed: 69 + # Individual vs global rewards + individual_rewards: true + # The level.txt file to load from marl_factory_grid/levels + level_name: quadrant + # Radius of Partially observable Markov decision process + pomdp_r: 0 # default 3 + # Print all messages and events + verbose: false + # Run tests + tests: false + +# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all +# other agents aim to clean dirt piles. +Agents: + # The clean agents + Wolfgang: + Actions: + - Move4 + - Clean + - Noop + Observations: + - Walls + - Other + - DirtPiles + - Self + Positions: + - (9,1) + Reiner: + Actions: + - Move4 + - Clean + - Noop + Observations: + - Walls + - Other + - DirtPiles + - Self + Positions: + - (9,8) # (9, 4) + +Entities: + DirtPiles: + coords_or_quantity: (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9) + initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action + clean_amount: 1 + dirt_spawn_r_var: 0 + max_global_amount: 12 + max_local_amount: 1 + +# Rules section specifies the rules governing the dynamics of the environment. +Rules: + + # Utilities + # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards. + # Can be omitted/ignored if you do not want to take care of collisions at all. + WatchCollisions: + done_at_collisions: false + + # Done Conditions + # Define the conditions for the environment to stop. Either success or a fail conditions. + # The environment stops when all dirt is cleaned + DoneOnAllDirtCleaned: + DoneAtMaxStepsReached: + max_steps: 200 diff --git a/marl_factory_grid/configs/two_rooms_one_door_modified.yaml b/marl_factory_grid/configs/two_rooms_one_door_modified.yaml index c6132d6..215e250 100644 --- a/marl_factory_grid/configs/two_rooms_one_door_modified.yaml +++ b/marl_factory_grid/configs/two_rooms_one_door_modified.yaml @@ -5,7 +5,7 @@ General: # The level.txt file to load from marl_factory_grid/levels level_name: two_rooms_modified # View Radius; 0 = full observatbility - pomdp_r: 3 + pomdp_r: 0 # Print all messages and events verbose: false # Run tests @@ -26,7 +26,7 @@ Agents: - Doors - Destination Positions: - - (3,1) + - (3,1) # Agent spawnpoint Sigmund: Actions: - Move8 @@ -47,7 +47,7 @@ Entities: SpawnDestinationsPerAgent: coords_or_quantity: Wolfgang: - - (3,12) + - (3,12) # Target coordinates Sigmund: - (3,2) diff --git a/marl_factory_grid/levels/quadrant.txt b/marl_factory_grid/levels/quadrant.txt new file mode 100644 index 0000000..4f09ee5 --- /dev/null +++ b/marl_factory_grid/levels/quadrant.txt @@ -0,0 +1,11 @@ +########### +#---####### +#-----##### +#------#### +#-------### +#--------## +#--------## +#---------# +#---------# +#---------# +########### \ No newline at end of file