mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-07-08 02:21:36 +02:00
Reworked configurations
This commit is contained in:
@ -4,7 +4,9 @@ env:
|
||||
n_agents: 2 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
pile-order: "smart" # Triggers implementation of our emergence prevention mechanism. Agents consider distance to other agent
|
||||
pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "shared" # Indicates that agents don't have to collect the same dirt piles
|
||||
|
@ -5,7 +5,9 @@ env:
|
||||
n_agents: 2 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
pile-order: "dynamic" # Agents only decide on next target pile based on the distance to the respective piles
|
||||
pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "shared" # Indicates that agents don't have to collect the same dirt piles
|
||||
|
@ -3,8 +3,10 @@ env:
|
||||
env_name: "marl_eval/two_rooms_eval_config"
|
||||
n_agents: 2 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
save_and_log: True # If configurations and potential logging files should be saved
|
||||
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
# Piles (=encoded flags) are evenly distributed among the two agents and have to be collected in the order defined
|
||||
# by the environment config (cf. coords_or_quantity)
|
||||
pile-order: "agents"
|
||||
|
@ -5,7 +5,9 @@ env:
|
||||
n_agents: 2 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
# Piles (=encoded flags) are evenly distributed among the two agents and have to be collected in the order defined
|
||||
# by the environment config (cf. coords_or_quantity)
|
||||
pile-order: "agents"
|
||||
|
@ -4,7 +4,9 @@ env:
|
||||
n_agents: 1 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
pile-order: "fixed" # Clean dirt piles in a fixed order specified by the environment config (cf. coords_or_quantity)
|
||||
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "all" # During inference the episode ends only when all dirt piles are cleaned
|
||||
|
@ -4,11 +4,16 @@ env:
|
||||
n_agents: 1 # Number of agents in the environment
|
||||
train_render: False # If training should be graphically visualized
|
||||
save_and_log: True # If configurations and potential logging files should be saved
|
||||
wandb_log: True # If metrics for training steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 9 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
gamma: 0.99 # The gamma value that is used as discounting factor
|
||||
n_steps: 0 # How much experience should be sampled at most until the next value- and policy-net updates are performed. (0 = Monte Carlo)
|
||||
chunk-episode: 20000 # For update, splits very large episodes in batches of approximately equal size. (0 = update networks with full episode at once)
|
||||
max_steps: 140000 # Number of training steps used for agent1 (=agent2)
|
||||
max_steps: 400000 # Number of training steps used for agent1 (=agent2)
|
||||
early_stopping: True # If the early stopping functionality should be used
|
||||
last_n_episodes: 100 # To determine if low change phase has begun, the last n episodes are checked if the mean target change is reached
|
||||
mean_target_change: 2.0 # What should be the accepted fluctuation for determining if a low change phase has begun
|
||||
advantage: "Advantage-AC" # Defines the used actor critic model
|
||||
pile-order: "fixed" # Clean dirt piles in a fixed order specified by the environment config (cf. coords_or_quantity)
|
||||
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
|
||||
|
@ -1,10 +1,12 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.rl
|
||||
env_name: "rl/two_rooms_eval_config"
|
||||
env_name: "rl/two_rooms_agent2_eval_config"
|
||||
n_agents: 1 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
pile-order: "fixed" # Clean dirt piles (=encoded flags) in a fixed order specified by the environment config (cf. coords_or_quantity)
|
||||
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "all" # During inference the episode ends only when all dirt piles are cleaned
|
||||
|
@ -3,11 +3,16 @@ env:
|
||||
n_agents: 1 # Number of agents in the environment
|
||||
train_render: False # If training should be graphically visualized
|
||||
save_and_log: True # If configurations and potential logging files should be saved
|
||||
wandb_log: True # If metrics for training steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 9 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
gamma: 0.99 # The gamma value that is used as discounting factor
|
||||
n_steps: 0 # How much experience should be sampled at most until the next value- and policy-net updates are performed. (0 = Monte Carlo)
|
||||
chunk-episode: 20000 # For update, splits very large episodes in batches of approximately equal size. (0 = update networks with full episode at once)
|
||||
max_steps: 260000 # Number of training steps used to train the agent. Here, only a placeholder value
|
||||
max_steps: 300000 # Number of training steps used to train the agent. Here, only a placeholder value
|
||||
early_stopping: True # If the early stopping functionality should be used
|
||||
last_n_episodes: 100 # To determine if low change phase has begun, the last n episodes are checked if the mean target change is reached
|
||||
mean_target_change: 2.0 # What should be the accepted fluctuation for determining if a low change phase has begun
|
||||
advantage: "Advantage-AC" # Defines the used actor critic model
|
||||
pile-order: "fixed" # Clean dirt piles (=encoded flags) in a fixed order specified by the environment config (cf. coords_or_quantity)
|
||||
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
|
||||
|
@ -36,12 +36,13 @@ Agents:
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1)
|
||||
coords_or_quantity: (9, 9), (4, 7), (2, 4), (1, 3)
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
max_global_amount: 12
|
||||
max_local_amount: 1
|
||||
randomize: False
|
||||
|
||||
# Rules section specifies the rules governing the dynamics of the environment.
|
||||
Rules:
|
||||
|
@ -21,18 +21,13 @@ Agents:
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
|
||||
- (3,1)
|
||||
- (1,1)
|
||||
- (3,1)
|
||||
- (5,1)
|
||||
- (3,1)
|
||||
- (1,8)
|
||||
- (3,1)
|
||||
- (5,8)
|
||||
- (2,1)
|
||||
- (1,1)
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (2,1), (3,12) # Locations of dirt piles
|
||||
coords_or_quantity: (3,12) # Locations of dirt piles
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
|
@ -23,11 +23,10 @@ Agents:
|
||||
- Self
|
||||
Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
|
||||
- (3,13)
|
||||
- (2,13) # spawnpoint only required if agent2 should go to its auxiliary pile
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (2,13), (3,2) # Locations of dirt piles
|
||||
coords_or_quantity: (3,2) # Locations of dirt piles
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
|
@ -22,17 +22,10 @@ Agents:
|
||||
- Self
|
||||
Positions: # Each spawnpoint is mapped to one dirt pile looping over coords_or_quantity (see below)
|
||||
- (3,13)
|
||||
- (2,13)
|
||||
- (1,13)
|
||||
- (3,13)
|
||||
- (1,8)
|
||||
- (2,6)
|
||||
- (3,10)
|
||||
- (4,6)
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (2,13), (3,2) # Locations of dirt piles
|
||||
coords_or_quantity: (3,2) # Locations of dirt piles
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
|
Reference in New Issue
Block a user