mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-07-08 02:21:36 +02:00
Added commentary to configs
This commit is contained in:
@ -1,11 +1,11 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.marl_eval
|
||||
env_name: "marl_eval/dirt_quadrant_eval_config"
|
||||
n_agents: 2
|
||||
eval_render: True
|
||||
save_and_log: False
|
||||
n_agents: 2 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
algorithm:
|
||||
pile-order: "smart" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
|
||||
pile-order: "smart" # Triggers implementation of our emergence prevention mechanism. Agents consider distance to other agent
|
||||
pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "shared" # Indicates that agents don't have to collect the same dirt piles
|
||||
auxiliary_piles: False # Dirt quadrant does not use this option
|
||||
|
@ -1,11 +1,12 @@
|
||||
# Configuration that shows emergent behavior in out dirt-quadrant environment
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.marl_eval
|
||||
env_name: "marl_eval/dirt_quadrant_eval_config"
|
||||
n_agents: 2
|
||||
eval_render: True
|
||||
save_and_log: False
|
||||
n_agents: 2 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
algorithm:
|
||||
pile-order: "dynamic" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
|
||||
pile-order: "dynamic" # Agents only decide on next target pile based on the distance to the respective piles
|
||||
pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "shared" # Indicates that agents don't have to collect the same dirt piles
|
||||
auxiliary_piles: False # Dirt quadrant does not use this option
|
||||
|
@ -1,13 +1,15 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.marl_eval
|
||||
env_name: "marl_eval/two_rooms_eval_config"
|
||||
n_agents: 2
|
||||
eval_render: True
|
||||
save_and_log: False
|
||||
n_agents: 2 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
algorithm:
|
||||
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
|
||||
auxiliary_piles: True # Use True to see emergent phenomenon and False to prevent it
|
||||
# Piles (=encoded flags) are evenly distributed among the two agents and have to be collected in the order defined
|
||||
# by the environment config (cf. coords_or_quantity)
|
||||
pile-order: "agents"
|
||||
pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "distributed" # Indicates that agents must clean their specifically assigned dirt piles
|
||||
auxiliary_piles: True # Allows agents to go to an auxiliary pile
|
||||
|
||||
|
||||
|
@ -1,13 +1,16 @@
|
||||
# Configuration that shows emergent behavior in our two-rooms environment
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.marl_eval
|
||||
env_name: "marl_eval/two_rooms_eval_config_emergent"
|
||||
n_agents: 2
|
||||
eval_render: True
|
||||
save_and_log: False
|
||||
n_agents: 2 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
algorithm:
|
||||
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
|
||||
auxiliary_piles: False # Use True to see emergent phenomenon and False to prevent it
|
||||
# Piles (=encoded flags) are evenly distributed among the two agents and have to be collected in the order defined
|
||||
# by the environment config (cf. coords_or_quantity)
|
||||
pile-order: "agents"
|
||||
pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "distributed" # Indicates that agents must clean their specifically assigned dirt piles
|
||||
auxiliary_piles: False # Shows emergent behavior
|
||||
|
||||
|
||||
|
@ -1,12 +1,12 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.rl
|
||||
env_name: "rl/dirt_quadrant_agent1_eval_config"
|
||||
n_agents: 1
|
||||
eval_render: True
|
||||
save_and_log: False
|
||||
n_agents: 1 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
algorithm:
|
||||
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "all" #
|
||||
pile-order: "fixed" # Clean dirt piles in a fixed order specified by the environment config (cf. coords_or_quantity)
|
||||
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "all" # During inference the episode ends only when all dirt piles are cleaned
|
||||
auxiliary_piles: False # Dirt quadrant does not use this option
|
||||
|
||||
|
@ -1,17 +1,17 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.rl
|
||||
env_name: "rl/dirt_quadrant_agent1_train_config"
|
||||
n_agents: 1
|
||||
train_render: False
|
||||
save_and_log: True
|
||||
n_agents: 1 # Number of agents in the environment
|
||||
train_render: False # If training should be graphically visualized
|
||||
save_and_log: True # If configurations and potential logging files should be saved
|
||||
algorithm:
|
||||
gamma: 0.99
|
||||
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
||||
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
||||
max_steps: 140000
|
||||
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
||||
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "single" # Options: "single", "all" ("single" for training, "all" for eval)
|
||||
gamma: 0.99 # The gamma value that is used as discounting factor
|
||||
n_steps: 0 # How much experience should be sampled at most until the next value- and policy-net updates are performed. (0 = Monte Carlo)
|
||||
chunk-episode: 20000 # For update, splits very large episodes in batches of approximately equal size. (0 = update networks with full episode at once)
|
||||
max_steps: 140000 # Number of training steps used for agent1 (=agent2)
|
||||
advantage: "Advantage-AC" # Defines the used actor critic model
|
||||
pile-order: "fixed" # Clean dirt piles in a fixed order specified by the environment config (cf. coords_or_quantity)
|
||||
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "single" # Episode ends when the current target pile is cleaned
|
||||
auxiliary_piles: False # Dirt quadrant does not use this option
|
||||
|
||||
|
@ -1,13 +1,13 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.rl
|
||||
env_name: "rl/two_rooms_eval_config"
|
||||
n_agents: 1
|
||||
eval_render: True
|
||||
save_and_log: False
|
||||
n_agents: 1 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
algorithm:
|
||||
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "all" # Options: "single", "all" ("single" for training, "all" for eval)
|
||||
auxiliary_piles: False # Auxiliary piles are only used during marl eval
|
||||
pile-order: "fixed" # Clean dirt piles (=encoded flags) in a fixed order specified by the environment config (cf. coords_or_quantity)
|
||||
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "all" # During inference the episode ends only when all dirt piles are cleaned
|
||||
auxiliary_piles: False # Auxiliary piles are only differentiated from regular target piles during marl eval
|
||||
|
||||
|
||||
|
@ -1,17 +1,17 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.rl
|
||||
n_agents: 1
|
||||
train_render: False
|
||||
save_and_log: True
|
||||
n_agents: 1 # Number of agents in the environment
|
||||
train_render: False # If training should be graphically visualized
|
||||
save_and_log: True # If configurations and potential logging files should be saved
|
||||
algorithm:
|
||||
gamma: 0.99
|
||||
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
||||
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
||||
max_steps: 260000
|
||||
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
||||
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "single" # Options: "single", "all" ("single" for training, "all" for eval)
|
||||
auxiliary_piles: False # Auxiliary piles are only used during marl eval
|
||||
gamma: 0.99 # The gamma value that is used as discounting factor
|
||||
n_steps: 0 # How much experience should be sampled at most until the next value- and policy-net updates are performed. (0 = Monte Carlo)
|
||||
chunk-episode: 20000 # For update, splits very large episodes in batches of approximately equal size. (0 = update networks with full episode at once)
|
||||
max_steps: 260000 # Number of training steps used to train the agent. Here, only a placeholder value
|
||||
advantage: "Advantage-AC" # Defines the used actor critic model
|
||||
pile-order: "fixed" # Clean dirt piles (=encoded flags) in a fixed order specified by the environment config (cf. coords_or_quantity)
|
||||
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "single" # Episode ends when the current target pile is cleaned
|
||||
auxiliary_piles: False # Auxiliary piles are only differentiated from regular target piles during marl eval
|
||||
|
||||
|
||||
|
@ -21,7 +21,7 @@ Agents:
|
||||
Observations:
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
|
||||
- (9,1)
|
||||
- (1,1)
|
||||
- (2,4)
|
||||
|
@ -21,9 +21,9 @@ Agents:
|
||||
Observations:
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
|
||||
- (3,1)
|
||||
- (2,1)
|
||||
- (2,1) # spawnpoint only required if agent1 should go to its auxiliary pile
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
|
@ -20,7 +20,7 @@ Agents:
|
||||
Observations:
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
|
||||
- (3,1)
|
||||
- (1,1)
|
||||
- (3,1)
|
||||
|
@ -21,9 +21,9 @@ Agents:
|
||||
Observations:
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
|
||||
- (3,13)
|
||||
- (2,13)
|
||||
- (2,13) # spawnpoint only required if agent2 should go to its auxiliary pile
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
|
@ -20,7 +20,7 @@ Agents:
|
||||
Observations:
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
|
||||
- (3,13)
|
||||
- (2,13)
|
||||
- (1,13)
|
||||
|
Reference in New Issue
Block a user