Reworked configurations

This commit is contained in:
Julian Schönberger
2024-08-09 16:31:08 +02:00
parent 8e8e925278
commit 4c81e4b865
12 changed files with 33 additions and 23 deletions

View File

@ -4,7 +4,9 @@ env:
n_agents: 2 # Number of agents in the environment
eval_render: True # If inference should be graphically visualized
save_and_log: False # If configurations and potential logging files should be saved
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
algorithm:
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
pile-order: "smart" # Triggers implementation of our emergence prevention mechanism. Agents consider distance to other agent
pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
pile_all_done: "shared" # Indicates that agents don't have to collect the same dirt piles

View File

@ -5,7 +5,9 @@ env:
n_agents: 2 # Number of agents in the environment
eval_render: True # If inference should be graphically visualized
save_and_log: False # If configurations and potential logging files should be saved
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
algorithm:
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
pile-order: "dynamic" # Agents only decide on next target pile based on the distance to the respective piles
pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
pile_all_done: "shared" # Indicates that agents don't have to collect the same dirt piles

View File

@ -3,8 +3,10 @@ env:
env_name: "marl_eval/two_rooms_eval_config"
n_agents: 2 # Number of agents in the environment
eval_render: True # If inference should be graphically visualized
save_and_log: False # If configurations and potential logging files should be saved
save_and_log: True # If configurations and potential logging files should be saved
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
algorithm:
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
# Piles (=encoded flags) are evenly distributed among the two agents and have to be collected in the order defined
# by the environment config (cf. coords_or_quantity)
pile-order: "agents"

View File

@ -5,7 +5,9 @@ env:
n_agents: 2 # Number of agents in the environment
eval_render: True # If inference should be graphically visualized
save_and_log: False # If configurations and potential logging files should be saved
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
algorithm:
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
# Piles (=encoded flags) are evenly distributed among the two agents and have to be collected in the order defined
# by the environment config (cf. coords_or_quantity)
pile-order: "agents"

View File

@ -4,7 +4,9 @@ env:
n_agents: 1 # Number of agents in the environment
eval_render: True # If inference should be graphically visualized
save_and_log: False # If configurations and potential logging files should be saved
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
algorithm:
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
pile-order: "fixed" # Clean dirt piles in a fixed order specified by the environment config (cf. coords_or_quantity)
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
pile_all_done: "all" # During inference the episode ends only when all dirt piles are cleaned

View File

@ -4,11 +4,16 @@ env:
n_agents: 1 # Number of agents in the environment
train_render: False # If training should be graphically visualized
save_and_log: True # If configurations and potential logging files should be saved
wandb_log: True # If metrics for training steps should be logged with weights&biases
algorithm:
seed: 9 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
gamma: 0.99 # The gamma value that is used as discounting factor
n_steps: 0 # How much experience should be sampled at most until the next value- and policy-net updates are performed. (0 = Monte Carlo)
chunk-episode: 20000 # For update, splits very large episodes in batches of approximately equal size. (0 = update networks with full episode at once)
max_steps: 140000 # Number of training steps used for agent1 (=agent2)
max_steps: 400000 # Number of training steps used for agent1 (=agent2)
early_stopping: True # If the early stopping functionality should be used
last_n_episodes: 100 # To determine if low change phase has begun, the last n episodes are checked if the mean target change is reached
mean_target_change: 2.0 # What should be the accepted fluctuation for determining if a low change phase has begun
advantage: "Advantage-AC" # Defines the used actor critic model
pile-order: "fixed" # Clean dirt piles in a fixed order specified by the environment config (cf. coords_or_quantity)
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step

View File

@ -1,10 +1,12 @@
env:
classname: marl_factory_grid.environment.configs.rl
env_name: "rl/two_rooms_eval_config"
env_name: "rl/two_rooms_agent2_eval_config"
n_agents: 1 # Number of agents in the environment
eval_render: True # If inference should be graphically visualized
save_and_log: False # If configurations and potential logging files should be saved
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
algorithm:
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
pile-order: "fixed" # Clean dirt piles (=encoded flags) in a fixed order specified by the environment config (cf. coords_or_quantity)
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
pile_all_done: "all" # During inference the episode ends only when all dirt piles are cleaned

View File

@ -3,11 +3,16 @@ env:
n_agents: 1 # Number of agents in the environment
train_render: False # If training should be graphically visualized
save_and_log: True # If configurations and potential logging files should be saved
wandb_log: True # If metrics for training steps should be logged with weights&biases
algorithm:
seed: 9 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
gamma: 0.99 # The gamma value that is used as discounting factor
n_steps: 0 # How much experience should be sampled at most until the next value- and policy-net updates are performed. (0 = Monte Carlo)
chunk-episode: 20000 # For update, splits very large episodes in batches of approximately equal size. (0 = update networks with full episode at once)
max_steps: 260000 # Number of training steps used to train the agent. Here, only a placeholder value
max_steps: 300000 # Number of training steps used to train the agent. Here, only a placeholder value
early_stopping: True # If the early stopping functionality should be used
last_n_episodes: 100 # To determine if low change phase has begun, the last n episodes are checked if the mean target change is reached
mean_target_change: 2.0 # What should be the accepted fluctuation for determining if a low change phase has begun
advantage: "Advantage-AC" # Defines the used actor critic model
pile-order: "fixed" # Clean dirt piles (=encoded flags) in a fixed order specified by the environment config (cf. coords_or_quantity)
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step

View File

@ -36,12 +36,13 @@ Agents:
Entities:
DirtPiles:
coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1)
coords_or_quantity: (9, 9), (4, 7), (2, 4), (1, 3)
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
randomize: False
# Rules section specifies the rules governing the dynamics of the environment.
Rules:

View File

@ -21,18 +21,13 @@ Agents:
- DirtPiles
- Self
Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
- (3,1)
- (1,1)
- (3,1)
- (5,1)
- (3,1)
- (1,8)
- (3,1)
- (5,8)
- (2,1)
- (1,1)
Entities:
DirtPiles:
coords_or_quantity: (2,1), (3,12) # Locations of dirt piles
coords_or_quantity: (3,12) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0

View File

@ -23,11 +23,10 @@ Agents:
- Self
Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
- (3,13)
- (2,13) # spawnpoint only required if agent2 should go to its auxiliary pile
Entities:
DirtPiles:
coords_or_quantity: (2,13), (3,2) # Locations of dirt piles
coords_or_quantity: (3,2) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0

View File

@ -22,17 +22,10 @@ Agents:
- Self
Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
- (3,13)
- (2,13)
- (1,13)
- (3,13)
- (1,8)
- (2,6)
- (3,10)
- (4,6)
Entities:
DirtPiles:
coords_or_quantity: (2,13), (3,2) # Locations of dirt piles
coords_or_quantity: (3,2) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0