mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-07-05 17:11:35 +02:00
Add various RL adapted configs
This commit is contained in:
@ -0,0 +1,28 @@
|
|||||||
|
agent:
|
||||||
|
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
|
||||||
|
n_agents: 1
|
||||||
|
obs_emb_size: 96
|
||||||
|
action_emb_size: 16
|
||||||
|
hidden_size_actor: 64
|
||||||
|
hidden_size_critic: 64
|
||||||
|
use_agent_embedding: False
|
||||||
|
env:
|
||||||
|
classname: marl_factory_grid.configs.custom
|
||||||
|
env_name: "custom/dirt_quadrant_random_pos"
|
||||||
|
n_agents: 1
|
||||||
|
max_steps: 250
|
||||||
|
pomdp_r: 2
|
||||||
|
stack_n_frames: 0
|
||||||
|
individual_rewards: True
|
||||||
|
train_render: False
|
||||||
|
eval_render: True
|
||||||
|
method: marl_factory_grid.algorithms.marl.LoopSEAC
|
||||||
|
algorithm:
|
||||||
|
gamma: 0.99
|
||||||
|
entropy_coef: 0.01
|
||||||
|
vf_coef: 0.05
|
||||||
|
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
||||||
|
max_steps: 80000
|
||||||
|
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
||||||
|
pile-order: "fixed" # Options: "fixed", "random", "none", "agents"
|
||||||
|
|
@ -0,0 +1,3 @@
|
|||||||
|
marl_factory_grid>environment>rules.py#SpawnEntity.on_reset()
|
||||||
|
marl_factory_grid>environment>rewards.py
|
||||||
|
marl_factory_grid>modules>clean_up>groups.py#DirtPiles.trigger_spawn()
|
@ -7,20 +7,22 @@ agent:
|
|||||||
hidden_size_critic: 64
|
hidden_size_critic: 64
|
||||||
use_agent_embedding: False
|
use_agent_embedding: False
|
||||||
env:
|
env:
|
||||||
classname: marl_factory_grid.configs
|
classname: marl_factory_grid.configs.custom
|
||||||
env_name: "simple_crossing"
|
env_name: "custom/two_rooms_one_door_modified_random_pos"
|
||||||
n_agents: 2
|
n_agents: 2
|
||||||
max_steps: 250
|
max_steps: 250
|
||||||
pomdp_r: 2
|
pomdp_r: 2
|
||||||
stack_n_frames: 0
|
stack_n_frames: 0
|
||||||
individual_rewards: True
|
individual_rewards: True
|
||||||
train_render: True
|
train_render: False
|
||||||
eval_render: True
|
eval_render: True
|
||||||
method: marl_factory_grid.algorithms.marl.LoopSEAC
|
method: marl_factory_grid.algorithms.marl.LoopSEAC
|
||||||
algorithm:
|
algorithm:
|
||||||
gamma: 0.99
|
gamma: 0.99
|
||||||
entropy_coef: 0.01
|
entropy_coef: 0.01
|
||||||
vf_coef: 0.5
|
vf_coef: 0.05
|
||||||
n_steps: 5
|
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
||||||
max_steps: 10000
|
max_steps: 100000
|
||||||
|
advantage: "TD-Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
||||||
|
pile-order: "agents" # Options: "fixed", "random", "none", "agents"
|
||||||
|
|
67
marl_factory_grid/configs/custom/dirt_quadrant.yaml
Normal file
67
marl_factory_grid/configs/custom/dirt_quadrant.yaml
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
General:
|
||||||
|
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||||
|
env_seed: 69
|
||||||
|
# Individual vs global rewards
|
||||||
|
individual_rewards: true
|
||||||
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
|
level_name: quadrant
|
||||||
|
# Radius of Partially observable Markov decision process
|
||||||
|
pomdp_r: 0 # default 3
|
||||||
|
# Print all messages and events
|
||||||
|
verbose: false
|
||||||
|
# Run tests
|
||||||
|
tests: false
|
||||||
|
|
||||||
|
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
||||||
|
# other agents aim to clean dirt piles.
|
||||||
|
Agents:
|
||||||
|
# The clean agents
|
||||||
|
Wolfgang:
|
||||||
|
Actions:
|
||||||
|
- Move4
|
||||||
|
#- Clean
|
||||||
|
#- Noop
|
||||||
|
Observations:
|
||||||
|
# - Walls
|
||||||
|
# - Other
|
||||||
|
- DirtPiles
|
||||||
|
- Self
|
||||||
|
Positions:
|
||||||
|
- (9,1)
|
||||||
|
#Reiner:
|
||||||
|
#Actions:
|
||||||
|
#- Move4
|
||||||
|
#- Clean
|
||||||
|
#- Noop
|
||||||
|
#Observations:
|
||||||
|
# - Walls
|
||||||
|
# - Other
|
||||||
|
#- DirtPiles
|
||||||
|
#- Self
|
||||||
|
#Positions:
|
||||||
|
#- (9,8) # (9, 4)
|
||||||
|
|
||||||
|
Entities:
|
||||||
|
DirtPiles:
|
||||||
|
coords_or_quantity: (1,1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
||||||
|
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||||
|
clean_amount: 1
|
||||||
|
dirt_spawn_r_var: 0
|
||||||
|
max_global_amount: 12
|
||||||
|
max_local_amount: 1
|
||||||
|
|
||||||
|
# Rules section specifies the rules governing the dynamics of the environment.
|
||||||
|
Rules:
|
||||||
|
|
||||||
|
# Utilities
|
||||||
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
|
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||||
|
WatchCollisions:
|
||||||
|
done_at_collisions: false
|
||||||
|
|
||||||
|
# Done Conditions
|
||||||
|
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||||
|
# The environment stops when all dirt is cleaned
|
||||||
|
DoneOnAllDirtCleaned:
|
||||||
|
#DoneAtMaxStepsReached:
|
||||||
|
#max_steps: 200
|
@ -0,0 +1,75 @@
|
|||||||
|
General:
|
||||||
|
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||||
|
env_seed: 69
|
||||||
|
# Individual vs global rewards
|
||||||
|
individual_rewards: true
|
||||||
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
|
level_name: quadrant
|
||||||
|
# Radius of Partially observable Markov decision process
|
||||||
|
pomdp_r: 0 # default 3
|
||||||
|
# Print all messages and events
|
||||||
|
verbose: false
|
||||||
|
# Run tests
|
||||||
|
tests: false
|
||||||
|
|
||||||
|
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
||||||
|
# other agents aim to clean dirt piles.
|
||||||
|
Agents:
|
||||||
|
# The clean agents
|
||||||
|
Wolfgang:
|
||||||
|
Actions:
|
||||||
|
- Move4
|
||||||
|
#- Clean
|
||||||
|
#- Noop
|
||||||
|
Observations:
|
||||||
|
# - Walls
|
||||||
|
# - Other
|
||||||
|
- DirtPiles
|
||||||
|
- Self
|
||||||
|
#Positions:
|
||||||
|
#- (9,1)
|
||||||
|
#- (9,2)
|
||||||
|
#- (9,3)
|
||||||
|
#- (9,4)
|
||||||
|
#- (9,5)
|
||||||
|
#- (9,6)
|
||||||
|
#- (9,7)
|
||||||
|
#- (9,8)
|
||||||
|
#- (9,9)
|
||||||
|
#Reiner:
|
||||||
|
#Actions:
|
||||||
|
#- Move4
|
||||||
|
#- Clean
|
||||||
|
#- Noop
|
||||||
|
#Observations:
|
||||||
|
# - Walls
|
||||||
|
# - Other
|
||||||
|
#- DirtPiles
|
||||||
|
#- Self
|
||||||
|
#Positions:
|
||||||
|
#- (9,8) # (9, 4)
|
||||||
|
|
||||||
|
Entities:
|
||||||
|
DirtPiles:
|
||||||
|
coords_or_quantity: (1,1) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
||||||
|
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||||
|
clean_amount: 1
|
||||||
|
dirt_spawn_r_var: 0
|
||||||
|
max_global_amount: 12
|
||||||
|
max_local_amount: 1
|
||||||
|
|
||||||
|
# Rules section specifies the rules governing the dynamics of the environment.
|
||||||
|
Rules:
|
||||||
|
|
||||||
|
# Utilities
|
||||||
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
|
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||||
|
WatchCollisions:
|
||||||
|
done_at_collisions: false
|
||||||
|
|
||||||
|
# Done Conditions
|
||||||
|
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||||
|
# The environment stops when all dirt is cleaned
|
||||||
|
DoneOnAllDirtCleaned:
|
||||||
|
#DoneAtMaxStepsReached: # An episode should last for at most max_steps steps
|
||||||
|
#max_steps: 1000
|
@ -0,0 +1,72 @@
|
|||||||
|
General:
|
||||||
|
env_seed: 69
|
||||||
|
# Individual vs global rewards
|
||||||
|
individual_rewards: true
|
||||||
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
|
level_name: two_rooms_modified
|
||||||
|
# View Radius; 0 = full observatbility
|
||||||
|
pomdp_r: 0
|
||||||
|
# Print all messages and events
|
||||||
|
verbose: false
|
||||||
|
# Run tests
|
||||||
|
tests: false
|
||||||
|
|
||||||
|
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
|
||||||
|
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
|
||||||
|
Agents:
|
||||||
|
Wolfgang:
|
||||||
|
Actions:
|
||||||
|
- Move8
|
||||||
|
- DoorUse
|
||||||
|
- Noop
|
||||||
|
Observations:
|
||||||
|
- DirtPiles
|
||||||
|
- Self
|
||||||
|
#Positions:
|
||||||
|
#- (1,1)
|
||||||
|
#- (2,1)
|
||||||
|
#- (3,1)
|
||||||
|
#- (4,1)
|
||||||
|
#- (5,1)
|
||||||
|
#- (6,1)
|
||||||
|
Sigmund:
|
||||||
|
Actions:
|
||||||
|
- Move8
|
||||||
|
- DoorUse
|
||||||
|
- Noop
|
||||||
|
Observations:
|
||||||
|
- DirtPiles
|
||||||
|
- Self
|
||||||
|
#Positions:
|
||||||
|
#- (1,13)
|
||||||
|
#- (2,13)
|
||||||
|
#- (3,13)
|
||||||
|
#- (4,13)
|
||||||
|
#- (5,13)
|
||||||
|
#- (6,13)
|
||||||
|
|
||||||
|
Entities:
|
||||||
|
DirtPiles:
|
||||||
|
coords_or_quantity: (3,12), (3,2) # This order is required, because agent 0 needs to reach (3, 12) and agent 1 (3, 2)
|
||||||
|
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||||
|
clean_amount: 1
|
||||||
|
dirt_spawn_r_var: 0
|
||||||
|
max_global_amount: 12
|
||||||
|
max_local_amount: 1
|
||||||
|
|
||||||
|
Doors: { }
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
# Environment Dynamics
|
||||||
|
DoorAutoClose:
|
||||||
|
close_frequency: 10
|
||||||
|
|
||||||
|
# Utilities
|
||||||
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
|
WatchCollisions:
|
||||||
|
done_at_collisions: false
|
||||||
|
|
||||||
|
# Done Conditions
|
||||||
|
#DoneOnAllDirtCleaned:
|
||||||
|
#DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop
|
||||||
|
#max_steps: 1000
|
67
marl_factory_grid/configs/dirt_quadrant.yaml
Normal file
67
marl_factory_grid/configs/dirt_quadrant.yaml
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
General:
|
||||||
|
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||||
|
env_seed: 69
|
||||||
|
# Individual vs global rewards
|
||||||
|
individual_rewards: true
|
||||||
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
|
level_name: quadrant
|
||||||
|
# Radius of Partially observable Markov decision process
|
||||||
|
pomdp_r: 0 # default 3
|
||||||
|
# Print all messages and events
|
||||||
|
verbose: false
|
||||||
|
# Run tests
|
||||||
|
tests: false
|
||||||
|
|
||||||
|
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
||||||
|
# other agents aim to clean dirt piles.
|
||||||
|
Agents:
|
||||||
|
# The clean agents
|
||||||
|
Wolfgang:
|
||||||
|
Actions:
|
||||||
|
- Move4
|
||||||
|
- Clean
|
||||||
|
- Noop
|
||||||
|
Observations:
|
||||||
|
- Walls
|
||||||
|
- Other
|
||||||
|
- DirtPiles
|
||||||
|
- Self
|
||||||
|
Positions:
|
||||||
|
- (9,1)
|
||||||
|
Reiner:
|
||||||
|
Actions:
|
||||||
|
- Move4
|
||||||
|
- Clean
|
||||||
|
- Noop
|
||||||
|
Observations:
|
||||||
|
- Walls
|
||||||
|
- Other
|
||||||
|
- DirtPiles
|
||||||
|
- Self
|
||||||
|
Positions:
|
||||||
|
- (9,8) # (9, 4)
|
||||||
|
|
||||||
|
Entities:
|
||||||
|
DirtPiles:
|
||||||
|
coords_or_quantity: (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
||||||
|
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||||
|
clean_amount: 1
|
||||||
|
dirt_spawn_r_var: 0
|
||||||
|
max_global_amount: 12
|
||||||
|
max_local_amount: 1
|
||||||
|
|
||||||
|
# Rules section specifies the rules governing the dynamics of the environment.
|
||||||
|
Rules:
|
||||||
|
|
||||||
|
# Utilities
|
||||||
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
|
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||||
|
WatchCollisions:
|
||||||
|
done_at_collisions: false
|
||||||
|
|
||||||
|
# Done Conditions
|
||||||
|
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||||
|
# The environment stops when all dirt is cleaned
|
||||||
|
DoneOnAllDirtCleaned:
|
||||||
|
DoneAtMaxStepsReached:
|
||||||
|
max_steps: 200
|
@ -5,7 +5,7 @@ General:
|
|||||||
# The level.txt file to load from marl_factory_grid/levels
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
level_name: two_rooms_modified
|
level_name: two_rooms_modified
|
||||||
# View Radius; 0 = full observatbility
|
# View Radius; 0 = full observatbility
|
||||||
pomdp_r: 3
|
pomdp_r: 0
|
||||||
# Print all messages and events
|
# Print all messages and events
|
||||||
verbose: false
|
verbose: false
|
||||||
# Run tests
|
# Run tests
|
||||||
@ -26,7 +26,7 @@ Agents:
|
|||||||
- Doors
|
- Doors
|
||||||
- Destination
|
- Destination
|
||||||
Positions:
|
Positions:
|
||||||
- (3,1)
|
- (3,1) # Agent spawnpoint
|
||||||
Sigmund:
|
Sigmund:
|
||||||
Actions:
|
Actions:
|
||||||
- Move8
|
- Move8
|
||||||
@ -47,7 +47,7 @@ Entities:
|
|||||||
SpawnDestinationsPerAgent:
|
SpawnDestinationsPerAgent:
|
||||||
coords_or_quantity:
|
coords_or_quantity:
|
||||||
Wolfgang:
|
Wolfgang:
|
||||||
- (3,12)
|
- (3,12) # Target coordinates
|
||||||
Sigmund:
|
Sigmund:
|
||||||
- (3,2)
|
- (3,2)
|
||||||
|
|
||||||
|
11
marl_factory_grid/levels/quadrant.txt
Normal file
11
marl_factory_grid/levels/quadrant.txt
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
###########
|
||||||
|
#---#######
|
||||||
|
#-----#####
|
||||||
|
#------####
|
||||||
|
#-------###
|
||||||
|
#--------##
|
||||||
|
#--------##
|
||||||
|
#---------#
|
||||||
|
#---------#
|
||||||
|
#---------#
|
||||||
|
###########
|
Reference in New Issue
Block a user