refactoring and init.py

2025-12-25 07:56:06 +01:00 · 2023-06-20 18:21:43 +02:00
parent 1332cee7e1
commit c7d77acbbe
138 changed files with 328 additions and 320 deletions
--- a/quickstart/combine_and_monitor_rerun.py
+++ b/quickstart/combine_and_monitor_rerun.py
@@ -96,7 +96,7 @@ if __name__ == '__main__':
    max_seed = 0
    # Define this folder
    combinations_path = Path('combinations')
-    # Those are all differently trained combinations of mdoels, env and parameters
+    # Those are all differently trained combinations of mdoels, environment and parameters
    for combination in (x for x in combinations_path.iterdir() if x.is_dir()):
        # These are all the models for this specific combination
        for model_run in (x for x in combination.iterdir() if x.is_dir()):
@@ -108,7 +108,7 @@ if __name__ == '__main__':
            # Those are all available seeds
            for seed_run in (x for x in model_run.iterdir() if x.is_dir()):
                max_seed = max(int(seed_run.name.split('_')[0]), max_seed)
-                # Read the env configuration from ROM
+                # Read the environment configuration from ROM
                with next(seed_run.glob('env_params.json')).open('r') as f:
                    env_kwargs = simplejson.load(f)
                available_runs_kwargs[seed_run.name] = env_kwargs
--- a/quickstart/init.py
+++ b/quickstart/init.py
@@ -0,0 +1,14 @@
+import os
+import shutil
+from pathlib import Path
+
+from mfg_package.utils.tools import ConfigExplainer
+
+if __name__ == '__main__':
+    print('Retrieving available options...')
+    ce = ConfigExplainer()
+    cwd = Path(os.getcwd())
+    ce.save_all(cwd / 'full_config.yaml')
+    template_path = Path(__file__) / 'mfg_package' / 'modules' / '_template'
+    shutil.copytree(template_path, cwd)
+    print()
--- a/quickstart/single_agent_train_battery_target_env.py
+++ b/quickstart/single_agent_train_battery_target_env.py
@@ -35,9 +35,9 @@ Welcome to this quick start file. Here we will see how to:
    1. Setup parameters for the environments (dirt-factory).
    2. Setup parameters for the agent training (SB3: PPO) and save metrics.
        Run the training.
-    3. Save env and agent for later analysis.
+    3. Save environment and agent for later analysis.
    4. Load the agent from drive
-    5. Rendering the env with a run of the trained agent.
+    5. Rendering the environment with a run of the trained agent.
    6. Plot metrics 
 """

@@ -64,14 +64,14 @@ if __name__ == '__main__':


    # Define property object parameters.
-    #  'ObservationProperties' are for specifying how the agent sees the env.
+    #  'ObservationProperties' are for specifying how the agent sees the environment.
    obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,  # Agents won`t be shown in the obs at all
                                      omit_agent_self=True,                  # This is default
                                      additional_agent_placeholder=None,     # We will not take care of future agent
                                      frames_to_stack=3,                     # To give the agent a notion of time
                                      pomdp_r=2                              # the agent view-radius
                                      )
-    #  'MovementProperties' are for specifying how the agent is allowed to move in the env.
+    #  'MovementProperties' are for specifying how the agent is allowed to move in the environment.
    move_props = MovementProperties(allow_diagonal_movement=True,   # Euclidean style (vertices)
                                    allow_square_movement=True,     # Manhattan (edges)
                                    allow_no_op=False)              # Pause movement (do nothing)
@@ -94,7 +94,7 @@ if __name__ == '__main__':
        multi_charge            = False,
    )

-    #  These are the EnvKwargs for initializing the env class, holding all former parameter-classes
+    #  These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
    # TODO: Comments
    factory_kwargs = dict(n_agents=1,
                          max_steps=400,
@@ -149,8 +149,8 @@ if __name__ == '__main__':
            model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])

            #########################################################
-            # 3. Save env and agent for later analysis.
-            #   Save the trained Model, the monitor (env measures) and the env parameters
+            # 3. Save environment and agent for later analysis.
+            #   Save the trained Model, the monitor (environment measures) and the environment parameters
            model.named_observation_space = env_factory.named_observation_space
            model.named_action_space = env_factory.named_action_space
            model.save(model_save_path)
@@ -176,10 +176,10 @@ if __name__ == '__main__':
        model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
        # Load the agent agent
        model = model_cls.load(policy_path / 'model.zip', device='cpu')
-        # Load old env kwargs
+        # Load old environment kwargs
        with next(policy_path.glob(env_params_json)).open('r') as f:
            env_kwargs = simplejson.load(f)
-            # Make the env stop ar collisions
+            # Make the environment stop ar collisions
            # (you only want to have a single collision per episode hence the statistics)
            env_kwargs.update(done_at_collision=True)

--- a/quickstart/single_agent_train_dest_env.py
+++ b/quickstart/single_agent_train_dest_env.py
@@ -34,9 +34,9 @@ Welcome to this quick start file. Here we will see how to:
    1. Setup parameters for the environments (dest-factory).
    2. Setup parameters for the agent training (SB3: PPO) and save metrics.
        Run the training.
-    3. Save env and agent for later analysis.
+    3. Save environment and agent for later analysis.
    4. Load the agent from drive
-    5. Rendering the env with a run of the trained agent.
+    5. Rendering the environment with a run of the trained agent.
    6. Plot metrics 
 """

@@ -63,14 +63,14 @@ if __name__ == '__main__':


    # Define property object parameters.
-    #  'ObservationProperties' are for specifying how the agent sees the env.
+    #  'ObservationProperties' are for specifying how the agent sees the environment.
    obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,  # Agents won`t be shown in the obs at all
                                      omit_agent_self=True,                  # This is default
                                      additional_agent_placeholder=None,     # We will not take care of future agent
                                      frames_to_stack=3,                     # To give the agent a notion of time
                                      pomdp_r=2                              # the agent view-radius
                                      )
-    #  'MovementProperties' are for specifying how the agent is allowed to move in the env.
+    #  'MovementProperties' are for specifying how the agent is allowed to move in the environment.
    move_props = MovementProperties(allow_diagonal_movement=True,   # Euclidean style (vertices)
                                    allow_square_movement=True,     # Manhattan (edges)
                                    allow_no_op=False)              # Pause movement (do nothing)
@@ -85,7 +85,7 @@ if __name__ == '__main__':
        spawn_mode           = DestModeOptions.DONE,
    )

-    #  These are the EnvKwargs for initializing the env class, holding all former parameter-classes
+    #  These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
    # TODO: Comments
    factory_kwargs = dict(n_agents=1,
                          max_steps=400,
@@ -139,8 +139,8 @@ if __name__ == '__main__':
            model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])

            #########################################################
-            # 3. Save env and agent for later analysis.
-            #   Save the trained Model, the monitor (env measures) and the env parameters
+            # 3. Save environment and agent for later analysis.
+            #   Save the trained Model, the monitor (environment measures) and the environment parameters
            model.named_observation_space = env_factory.named_observation_space
            model.named_action_space = env_factory.named_action_space
            model.save(model_save_path)
@@ -166,10 +166,10 @@ if __name__ == '__main__':
        model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
        # Load the agent agent
        model = model_cls.load(policy_path / 'model.zip', device='cpu')
-        # Load old env kwargs
+        # Load old environment kwargs
        with next(policy_path.glob(env_params_json)).open('r') as f:
            env_kwargs = simplejson.load(f)
-            # Make the env stop ar collisions
+            # Make the environment stop ar collisions
            # (you only want to have a single collision per episode hence the statistics)
            env_kwargs.update(done_at_collision=True)

--- a/quickstart/single_agent_train_dirt_env.py
+++ b/quickstart/single_agent_train_dirt_env.py
@@ -34,9 +34,9 @@ Welcome to this quick start file. Here we will see how to:
    1. Setup parameters for the environments (dirt-factory).
    2. Setup parameters for the agent training (SB3: PPO) and save metrics.
        Run the training.
-    3. Save env and agent for later analysis.
+    3. Save environment and agent for later analysis.
    4. Load the agent from drive
-    5. Rendering the env with a run of the trained agent.
+    5. Rendering the environment with a run of the trained agent.
    6. Plot metrics 
 """

@@ -63,14 +63,14 @@ if __name__ == '__main__':


    # Define property object parameters.
-    #  'ObservationProperties' are for specifying how the agent sees the env.
+    #  'ObservationProperties' are for specifying how the agent sees the environment.
    obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,  # Agents won`t be shown in the obs at all
                                      omit_agent_self=True,                  # This is default
                                      additional_agent_placeholder=None,     # We will not take care of future agent
                                      frames_to_stack=3,                     # To give the agent a notion of time
                                      pomdp_r=2                              # the agent' view-radius
                                      )
-    #  'MovementProperties' are for specifying how the agent is allowed to move in the env.
+    #  'MovementProperties' are for specifying how the agent is allowed to move in the environment.
    move_props = MovementProperties(allow_diagonal_movement=True,   # Euclidean style (vertices)
                                    allow_square_movement=True,     # Manhattan (edges)
                                    allow_no_op=False)              # Pause movement (do nothing)
@@ -87,7 +87,7 @@ if __name__ == '__main__':
                                max_spawn_ratio=0.05,
                                dirt_smear_amount=0.0)

-    #  These are the EnvKwargs for initializing the env class, holding all former parameter-classes
+    #  These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
    # TODO: Comments
    factory_kwargs = dict(n_agents=1,
                          max_steps=400,
@@ -141,8 +141,8 @@ if __name__ == '__main__':
            model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])

            #########################################################
-            # 3. Save env and agent for later analysis.
-            #   Save the trained Model, the monitor (env measures) and the env parameters
+            # 3. Save environment and agent for later analysis.
+            #   Save the trained Model, the monitor (environment measures) and the environment parameters
            model.named_observation_space = env_factory.named_observation_space
            model.named_action_space = env_factory.named_action_space
            model.save(model_save_path)
@@ -168,10 +168,10 @@ if __name__ == '__main__':
        model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
        # Load the agent
        model = model_cls.load(policy_path / 'model.zip', device='cpu')
-        # Load old env kwargs
+        # Load old environment kwargs
        with next(policy_path.glob(env_params_json)).open('r') as f:
            env_kwargs = simplejson.load(f)
-            # Make the env stop ar collisions
+            # Make the environment stop ar collisions
            # (you only want to have a single collision per episode hence the statistics)
            env_kwargs.update(done_at_collision=True)

--- a/quickstart/single_agent_train_item_env.py
+++ b/quickstart/single_agent_train_item_env.py
@@ -34,9 +34,9 @@ Welcome to this quick start file. Here we will see how to:
    1. Setup parameters for the environments (item-factory).
    2. Setup parameters for the agent training (SB3: PPO) and save metrics.
        Run the training.
-    3. Save env and agent for later analysis.
+    3. Save environment and agent for later analysis.
    4. Load the agent from drive
-    5. Rendering the env with a run of the trained agent.
+    5. Rendering the environment with a run of the trained agent.
    6. Plot metrics 
 """

@@ -62,14 +62,14 @@ if __name__ == '__main__':
    # 1. Setup parameters for the environments (item-factory).
    #
    # Define property object parameters.
-    #  'ObservationProperties' are for specifying how the agent sees the env.
+    #  'ObservationProperties' are for specifying how the agent sees the environment.
    obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,  # Agents won`t be shown in the obs at all
                                      omit_agent_self=True,                  # This is default
                                      additional_agent_placeholder=None,     # We will not take care of future agent
                                      frames_to_stack=3,                     # To give the agent a notion of time
                                      pomdp_r=2                              # the agent view-radius
                                      )
-    #  'MovementProperties' are for specifying how the agent is allowed to move in the env.
+    #  'MovementProperties' are for specifying how the agent is allowed to move in the environment.
    move_props = MovementProperties(allow_diagonal_movement=True,   # Euclidean style (vertices)
                                    allow_square_movement=True,     # Manhattan (edges)
                                    allow_no_op=False)              # Pause movement (do nothing)
@@ -84,7 +84,7 @@ if __name__ == '__main__':
        max_agent_inventory_capacity = 5,     # How many items are needed until the agent inventory is full)
        )

-    #  These are the EnvKwargs for initializing the env class, holding all former parameter-classes
+    #  These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
    # TODO: Comments
    factory_kwargs = dict(n_agents=1,
                          max_steps=400,
@@ -137,8 +137,8 @@ if __name__ == '__main__':
            model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])

            #########################################################
-            # 3. Save env and agent for later analysis.
-            #   Save the trained Model, the monitor (env measures) and the env parameters
+            # 3. Save environment and agent for later analysis.
+            #   Save the trained Model, the monitor (environment measures) and the environment parameters
            model.named_observation_space = env_factory.named_observation_space
            model.named_action_space = env_factory.named_action_space
            model.save(model_save_path)
@@ -164,10 +164,10 @@ if __name__ == '__main__':
        model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
        # Load the agent agent
        model = model_cls.load(policy_path / 'model.zip', device='cpu')
-        # Load old env kwargs
+        # Load old environment kwargs
        with next(policy_path.glob(env_params_json)).open('r') as f:
            env_kwargs = simplejson.load(f)
-            # Make the env stop ar collisions
+            # Make the environment stop ar collisions
            # (you only want to have a single collision per episode hence the statistics)
            env_kwargs.update(done_at_collision=True)