All relevant functional code for A2C Dirt Quadrant setting with small changes to the environment + Different configs for single agent and multiagent settings

2025-12-06 15:40:37 +01:00 · 2024-05-06 12:33:37 +02:00
parent 55026eda12
commit 3c54d04f9f
13 changed files with 652 additions and 174 deletions
--- a/studies/marl_adapted.py
+++ b/studies/marl_adapted.py
@@ -3,17 +3,36 @@ from pathlib import Path
 from marl_factory_grid.algorithms.marl.a2c_dirt import A2C
 from marl_factory_grid.algorithms.utils import load_yaml_file

-if __name__ == '__main__':
+def dirt_quadrant_single_agent_training():
    cfg_path = Path('../marl_factory_grid/algorithms/marl/configs/dirt_quadrant_config.yaml')

    train_cfg = load_yaml_file(cfg_path)
    # Use environment config with fixed spawnpoints for eval
    eval_cfg = copy.deepcopy(train_cfg)
-    eval_cfg["env"]["env_name"] = "custom/dirt_quadrant" # Options: two_rooms_one_door_modified, dirt_quadrant
+    eval_cfg["env"]["env_name"] = "custom/dirt_quadrant_eval_config"

    print("Training phase")
    agent = A2C(train_cfg, eval_cfg)
    agent.train_loop()
-    agent.plot_reward_development()
    print("Evaluation phase")
-    agent.eval_loop(10)
+    # Have consecutive episode for eval in single agent case
+    train_cfg["algorithm"]["pile_all_done"] = "all"
+    # agent.load_agents(["run0", "run1"])
+    agent.eval_loop(10)
+
+
+def dirt_quadrant_multi_agent_eval():
+    cfg_path = Path('../marl_factory_grid/algorithms/marl/configs/MultiAgentConfigs/dirt_quadrant_config.yaml')
+
+    train_cfg = load_yaml_file(cfg_path)
+    # Use environment config with fixed spawnpoints for eval
+    eval_cfg = copy.deepcopy(train_cfg)
+    eval_cfg["env"]["env_name"] = "custom/MultiAgentConfigs/dirt_quadrant_eval_config"
+    agent = A2C(train_cfg, eval_cfg)
+    print("Evaluation phase")
+    agent.load_agents(["run0", "run1"])
+    agent.eval_loop(10)
+
+
+if __name__ == '__main__':
+    dirt_quadrant_single_agent_training()