Add various RL adapted configs

This commit is contained in:
Julian Schönberger
2024-05-02 11:00:35 +02:00
parent 48d708bbcd
commit c7c2c4e5a3
9 changed files with 334 additions and 9 deletions

View File

@ -0,0 +1,67 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# Radius of Partially observable Markov decision process
pomdp_r: 0 # default 3
# Print all messages and events
verbose: false
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
Agents:
# The clean agents
Wolfgang:
Actions:
- Move4
#- Clean
#- Noop
Observations:
# - Walls
# - Other
- DirtPiles
- Self
Positions:
- (9,1)
#Reiner:
#Actions:
#- Move4
#- Clean
#- Noop
#Observations:
# - Walls
# - Other
#- DirtPiles
#- Self
#Positions:
#- (9,8) # (9, 4)
Entities:
DirtPiles:
coords_or_quantity: (1,1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
#DoneAtMaxStepsReached:
#max_steps: 200

View File

@ -0,0 +1,75 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# Radius of Partially observable Markov decision process
pomdp_r: 0 # default 3
# Print all messages and events
verbose: false
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
Agents:
# The clean agents
Wolfgang:
Actions:
- Move4
#- Clean
#- Noop
Observations:
# - Walls
# - Other
- DirtPiles
- Self
#Positions:
#- (9,1)
#- (9,2)
#- (9,3)
#- (9,4)
#- (9,5)
#- (9,6)
#- (9,7)
#- (9,8)
#- (9,9)
#Reiner:
#Actions:
#- Move4
#- Clean
#- Noop
#Observations:
# - Walls
# - Other
#- DirtPiles
#- Self
#Positions:
#- (9,8) # (9, 4)
Entities:
DirtPiles:
coords_or_quantity: (1,1) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
#DoneAtMaxStepsReached: # An episode should last for at most max_steps steps
#max_steps: 1000

View File

@ -0,0 +1,72 @@
General:
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms_modified
# View Radius; 0 = full observatbility
pomdp_r: 0
# Print all messages and events
verbose: false
# Run tests
tests: false
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
Agents:
Wolfgang:
Actions:
- Move8
- DoorUse
- Noop
Observations:
- DirtPiles
- Self
#Positions:
#- (1,1)
#- (2,1)
#- (3,1)
#- (4,1)
#- (5,1)
#- (6,1)
Sigmund:
Actions:
- Move8
- DoorUse
- Noop
Observations:
- DirtPiles
- Self
#Positions:
#- (1,13)
#- (2,13)
#- (3,13)
#- (4,13)
#- (5,13)
#- (6,13)
Entities:
DirtPiles:
coords_or_quantity: (3,12), (3,2) # This order is required, because agent 0 needs to reach (3, 12) and agent 1 (3, 2)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
Doors: { }
Rules:
# Environment Dynamics
DoorAutoClose:
close_frequency: 10
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Done Conditions
#DoneOnAllDirtCleaned:
#DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop
#max_steps: 1000

View File

@ -0,0 +1,67 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# Radius of Partially observable Markov decision process
pomdp_r: 0 # default 3
# Print all messages and events
verbose: false
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
Agents:
# The clean agents
Wolfgang:
Actions:
- Move4
- Clean
- Noop
Observations:
- Walls
- Other
- DirtPiles
- Self
Positions:
- (9,1)
Reiner:
Actions:
- Move4
- Clean
- Noop
Observations:
- Walls
- Other
- DirtPiles
- Self
Positions:
- (9,8) # (9, 4)
Entities:
DirtPiles:
coords_or_quantity: (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
DoneAtMaxStepsReached:
max_steps: 200

View File

@ -5,7 +5,7 @@ General:
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms_modified
# View Radius; 0 = full observatbility
pomdp_r: 3
pomdp_r: 0
# Print all messages and events
verbose: false
# Run tests
@ -26,7 +26,7 @@ Agents:
- Doors
- Destination
Positions:
- (3,1)
- (3,1) # Agent spawnpoint
Sigmund:
Actions:
- Move8
@ -47,7 +47,7 @@ Entities:
SpawnDestinationsPerAgent:
coords_or_quantity:
Wolfgang:
- (3,12)
- (3,12) # Target coordinates
Sigmund:
- (3,2)