mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-07-11 23:42:40 +02:00
Reworked configurations
This commit is contained in:
@ -4,7 +4,9 @@ env:
|
||||
n_agents: 2 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
pile-order: "smart" # Triggers implementation of our emergence prevention mechanism. Agents consider distance to other agent
|
||||
pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "shared" # Indicates that agents don't have to collect the same dirt piles
|
||||
|
@ -5,7 +5,9 @@ env:
|
||||
n_agents: 2 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
pile-order: "dynamic" # Agents only decide on next target pile based on the distance to the respective piles
|
||||
pile-observability: "single" # Agents can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "shared" # Indicates that agents don't have to collect the same dirt piles
|
||||
|
@ -3,8 +3,10 @@ env:
|
||||
env_name: "marl_eval/two_rooms_eval_config"
|
||||
n_agents: 2 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
save_and_log: True # If configurations and potential logging files should be saved
|
||||
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
# Piles (=encoded flags) are evenly distributed among the two agents and have to be collected in the order defined
|
||||
# by the environment config (cf. coords_or_quantity)
|
||||
pile-order: "agents"
|
||||
|
@ -5,7 +5,9 @@ env:
|
||||
n_agents: 2 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
# Piles (=encoded flags) are evenly distributed among the two agents and have to be collected in the order defined
|
||||
# by the environment config (cf. coords_or_quantity)
|
||||
pile-order: "agents"
|
||||
|
@ -4,7 +4,9 @@ env:
|
||||
n_agents: 1 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
pile-order: "fixed" # Clean dirt piles in a fixed order specified by the environment config (cf. coords_or_quantity)
|
||||
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "all" # During inference the episode ends only when all dirt piles are cleaned
|
||||
|
@ -4,11 +4,16 @@ env:
|
||||
n_agents: 1 # Number of agents in the environment
|
||||
train_render: False # If training should be graphically visualized
|
||||
save_and_log: True # If configurations and potential logging files should be saved
|
||||
wandb_log: True # If metrics for training steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 9 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
gamma: 0.99 # The gamma value that is used as discounting factor
|
||||
n_steps: 0 # How much experience should be sampled at most until the next value- and policy-net updates are performed. (0 = Monte Carlo)
|
||||
chunk-episode: 20000 # For update, splits very large episodes in batches of approximately equal size. (0 = update networks with full episode at once)
|
||||
max_steps: 140000 # Number of training steps used for agent1 (=agent2)
|
||||
max_steps: 400000 # Number of training steps used for agent1 (=agent2)
|
||||
early_stopping: True # If the early stopping functionality should be used
|
||||
last_n_episodes: 100 # To determine if low change phase has begun, the last n episodes are checked if the mean target change is reached
|
||||
mean_target_change: 2.0 # What should be the accepted fluctuation for determining if a low change phase has begun
|
||||
advantage: "Advantage-AC" # Defines the used actor critic model
|
||||
pile-order: "fixed" # Clean dirt piles in a fixed order specified by the environment config (cf. coords_or_quantity)
|
||||
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
|
||||
|
@ -1,10 +1,12 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.rl
|
||||
env_name: "rl/two_rooms_eval_config"
|
||||
env_name: "rl/two_rooms_agent2_eval_config"
|
||||
n_agents: 1 # Number of agents in the environment
|
||||
eval_render: True # If inference should be graphically visualized
|
||||
save_and_log: False # If configurations and potential logging files should be saved
|
||||
wandb_log: False # If metrics for evaluation steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 42 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
pile-order: "fixed" # Clean dirt piles (=encoded flags) in a fixed order specified by the environment config (cf. coords_or_quantity)
|
||||
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
|
||||
pile_all_done: "all" # During inference the episode ends only when all dirt piles are cleaned
|
||||
|
@ -3,11 +3,16 @@ env:
|
||||
n_agents: 1 # Number of agents in the environment
|
||||
train_render: False # If training should be graphically visualized
|
||||
save_and_log: True # If configurations and potential logging files should be saved
|
||||
wandb_log: True # If metrics for training steps should be logged with weights&biases
|
||||
algorithm:
|
||||
seed: 9 # Picks seed to make random parts of algorithm reproducible. -1 for random seed
|
||||
gamma: 0.99 # The gamma value that is used as discounting factor
|
||||
n_steps: 0 # How much experience should be sampled at most until the next value- and policy-net updates are performed. (0 = Monte Carlo)
|
||||
chunk-episode: 20000 # For update, splits very large episodes in batches of approximately equal size. (0 = update networks with full episode at once)
|
||||
max_steps: 260000 # Number of training steps used to train the agent. Here, only a placeholder value
|
||||
max_steps: 300000 # Number of training steps used to train the agent. Here, only a placeholder value
|
||||
early_stopping: True # If the early stopping functionality should be used
|
||||
last_n_episodes: 100 # To determine if low change phase has begun, the last n episodes are checked if the mean target change is reached
|
||||
mean_target_change: 2.0 # What should be the accepted fluctuation for determining if a low change phase has begun
|
||||
advantage: "Advantage-AC" # Defines the used actor critic model
|
||||
pile-order: "fixed" # Clean dirt piles (=encoded flags) in a fixed order specified by the environment config (cf. coords_or_quantity)
|
||||
pile-observability: "single" # Agent can only perceive one dirt pile at any given time step
|
||||
|
Reference in New Issue
Block a user