Add various RL adapted configs

2026-01-15 23:41:39 +01:00 · 2024-05-02 11:00:35 +02:00
parent 48d708bbcd
commit c7c2c4e5a3
9 changed files with 334 additions and 9 deletions
--- a/marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml
+++ b/marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml
@@ -0,0 +1,28 @@
+agent:
+  classname:           marl_factory_grid.algorithms.marl.networks.RecurrentAC
+  n_agents:            1
+  obs_emb_size:        96
+  action_emb_size:     16
+  hidden_size_actor:   64
+  hidden_size_critic:  64
+  use_agent_embedding: False
+env:
+  classname:          marl_factory_grid.configs.custom
+  env_name:           "custom/dirt_quadrant_random_pos"
+  n_agents:           1
+  max_steps:          250
+  pomdp_r:            2
+  stack_n_frames:     0
+  individual_rewards: True
+  train_render:       False
+  eval_render:        True
+method:               marl_factory_grid.algorithms.marl.LoopSEAC
+algorithm:
+  gamma:              0.99
+  entropy_coef:       0.01
+  vf_coef:            0.05
+  n_steps:            0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
+  max_steps:          80000
+  advantage:          "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
+  pile-order:         "fixed" # Options: "fixed", "random", "none", "agents"
+
--- a/marl_factory_grid/algorithms/marl/configs/environment_changes
+++ b/marl_factory_grid/algorithms/marl/configs/environment_changes
@@ -0,0 +1,3 @@
+marl_factory_grid>environment>rules.py#SpawnEntity.on_reset()
+marl_factory_grid>environment>rewards.py
+marl_factory_grid>modules>clean_up>groups.py#DirtPiles.trigger_spawn()
--- a/marl_factory_grid/algorithms/marl/configs/two_rooms_one_door_modified_config.yaml
+++ b/marl_factory_grid/algorithms/marl/configs/two_rooms_one_door_modified_config.yaml
@@ -7,20 +7,22 @@ agent:
  hidden_size_critic:  64
  use_agent_embedding: False
 env:
-  classname:          marl_factory_grid.configs
-  env_name:           "simple_crossing"
+  classname:          marl_factory_grid.configs.custom
+  env_name:           "custom/two_rooms_one_door_modified_random_pos"
  n_agents:           2
  max_steps:          250
  pomdp_r:            2
  stack_n_frames:     0
  individual_rewards: True
-  train_render:       True
+  train_render:       False
  eval_render:        True
 method:               marl_factory_grid.algorithms.marl.LoopSEAC
 algorithm:
  gamma:              0.99
  entropy_coef:       0.01
-  vf_coef:            0.5
-  n_steps:            5
-  max_steps:          10000
+  vf_coef:            0.05
+  n_steps:            0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
+  max_steps:          100000
+  advantage:          "TD-Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
+  pile-order:         "agents" # Options: "fixed", "random", "none", "agents"