refactoring and init.py

This commit is contained in:
Steffen Illium
2023-06-20 18:21:43 +02:00
parent 1332cee7e1
commit c7d77acbbe
138 changed files with 328 additions and 320 deletions

View File

@@ -96,7 +96,7 @@ if __name__ == '__main__':
max_seed = 0
# Define this folder
combinations_path = Path('combinations')
# Those are all differently trained combinations of mdoels, env and parameters
# Those are all differently trained combinations of mdoels, environment and parameters
for combination in (x for x in combinations_path.iterdir() if x.is_dir()):
# These are all the models for this specific combination
for model_run in (x for x in combination.iterdir() if x.is_dir()):
@@ -108,7 +108,7 @@ if __name__ == '__main__':
# Those are all available seeds
for seed_run in (x for x in model_run.iterdir() if x.is_dir()):
max_seed = max(int(seed_run.name.split('_')[0]), max_seed)
# Read the env configuration from ROM
# Read the environment configuration from ROM
with next(seed_run.glob('env_params.json')).open('r') as f:
env_kwargs = simplejson.load(f)
available_runs_kwargs[seed_run.name] = env_kwargs

14
quickstart/init.py Normal file
View File

@@ -0,0 +1,14 @@
import os
import shutil
from pathlib import Path
from mfg_package.utils.tools import ConfigExplainer
if __name__ == '__main__':
print('Retrieving available options...')
ce = ConfigExplainer()
cwd = Path(os.getcwd())
ce.save_all(cwd / 'full_config.yaml')
template_path = Path(__file__) / 'mfg_package' / 'modules' / '_template'
shutil.copytree(template_path, cwd)
print()

View File

@@ -35,9 +35,9 @@ Welcome to this quick start file. Here we will see how to:
1. Setup parameters for the environments (dirt-factory).
2. Setup parameters for the agent training (SB3: PPO) and save metrics.
Run the training.
3. Save env and agent for later analysis.
3. Save environment and agent for later analysis.
4. Load the agent from drive
5. Rendering the env with a run of the trained agent.
5. Rendering the environment with a run of the trained agent.
6. Plot metrics
"""
@@ -64,14 +64,14 @@ if __name__ == '__main__':
# Define property object parameters.
# 'ObservationProperties' are for specifying how the agent sees the env.
# 'ObservationProperties' are for specifying how the agent sees the environment.
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all
omit_agent_self=True, # This is default
additional_agent_placeholder=None, # We will not take care of future agent
frames_to_stack=3, # To give the agent a notion of time
pomdp_r=2 # the agent view-radius
)
# 'MovementProperties' are for specifying how the agent is allowed to move in the env.
# 'MovementProperties' are for specifying how the agent is allowed to move in the environment.
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
allow_square_movement=True, # Manhattan (edges)
allow_no_op=False) # Pause movement (do nothing)
@@ -94,7 +94,7 @@ if __name__ == '__main__':
multi_charge = False,
)
# These are the EnvKwargs for initializing the env class, holding all former parameter-classes
# These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
# TODO: Comments
factory_kwargs = dict(n_agents=1,
max_steps=400,
@@ -149,8 +149,8 @@ if __name__ == '__main__':
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
#########################################################
# 3. Save env and agent for later analysis.
# Save the trained Model, the monitor (env measures) and the env parameters
# 3. Save environment and agent for later analysis.
# Save the trained Model, the monitor (environment measures) and the environment parameters
model.named_observation_space = env_factory.named_observation_space
model.named_action_space = env_factory.named_action_space
model.save(model_save_path)
@@ -176,10 +176,10 @@ if __name__ == '__main__':
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
# Load the agent agent
model = model_cls.load(policy_path / 'model.zip', device='cpu')
# Load old env kwargs
# Load old environment kwargs
with next(policy_path.glob(env_params_json)).open('r') as f:
env_kwargs = simplejson.load(f)
# Make the env stop ar collisions
# Make the environment stop ar collisions
# (you only want to have a single collision per episode hence the statistics)
env_kwargs.update(done_at_collision=True)

View File

@@ -34,9 +34,9 @@ Welcome to this quick start file. Here we will see how to:
1. Setup parameters for the environments (dest-factory).
2. Setup parameters for the agent training (SB3: PPO) and save metrics.
Run the training.
3. Save env and agent for later analysis.
3. Save environment and agent for later analysis.
4. Load the agent from drive
5. Rendering the env with a run of the trained agent.
5. Rendering the environment with a run of the trained agent.
6. Plot metrics
"""
@@ -63,14 +63,14 @@ if __name__ == '__main__':
# Define property object parameters.
# 'ObservationProperties' are for specifying how the agent sees the env.
# 'ObservationProperties' are for specifying how the agent sees the environment.
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all
omit_agent_self=True, # This is default
additional_agent_placeholder=None, # We will not take care of future agent
frames_to_stack=3, # To give the agent a notion of time
pomdp_r=2 # the agent view-radius
)
# 'MovementProperties' are for specifying how the agent is allowed to move in the env.
# 'MovementProperties' are for specifying how the agent is allowed to move in the environment.
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
allow_square_movement=True, # Manhattan (edges)
allow_no_op=False) # Pause movement (do nothing)
@@ -85,7 +85,7 @@ if __name__ == '__main__':
spawn_mode = DestModeOptions.DONE,
)
# These are the EnvKwargs for initializing the env class, holding all former parameter-classes
# These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
# TODO: Comments
factory_kwargs = dict(n_agents=1,
max_steps=400,
@@ -139,8 +139,8 @@ if __name__ == '__main__':
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
#########################################################
# 3. Save env and agent for later analysis.
# Save the trained Model, the monitor (env measures) and the env parameters
# 3. Save environment and agent for later analysis.
# Save the trained Model, the monitor (environment measures) and the environment parameters
model.named_observation_space = env_factory.named_observation_space
model.named_action_space = env_factory.named_action_space
model.save(model_save_path)
@@ -166,10 +166,10 @@ if __name__ == '__main__':
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
# Load the agent agent
model = model_cls.load(policy_path / 'model.zip', device='cpu')
# Load old env kwargs
# Load old environment kwargs
with next(policy_path.glob(env_params_json)).open('r') as f:
env_kwargs = simplejson.load(f)
# Make the env stop ar collisions
# Make the environment stop ar collisions
# (you only want to have a single collision per episode hence the statistics)
env_kwargs.update(done_at_collision=True)

View File

@@ -34,9 +34,9 @@ Welcome to this quick start file. Here we will see how to:
1. Setup parameters for the environments (dirt-factory).
2. Setup parameters for the agent training (SB3: PPO) and save metrics.
Run the training.
3. Save env and agent for later analysis.
3. Save environment and agent for later analysis.
4. Load the agent from drive
5. Rendering the env with a run of the trained agent.
5. Rendering the environment with a run of the trained agent.
6. Plot metrics
"""
@@ -63,14 +63,14 @@ if __name__ == '__main__':
# Define property object parameters.
# 'ObservationProperties' are for specifying how the agent sees the env.
# 'ObservationProperties' are for specifying how the agent sees the environment.
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all
omit_agent_self=True, # This is default
additional_agent_placeholder=None, # We will not take care of future agent
frames_to_stack=3, # To give the agent a notion of time
pomdp_r=2 # the agent' view-radius
)
# 'MovementProperties' are for specifying how the agent is allowed to move in the env.
# 'MovementProperties' are for specifying how the agent is allowed to move in the environment.
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
allow_square_movement=True, # Manhattan (edges)
allow_no_op=False) # Pause movement (do nothing)
@@ -87,7 +87,7 @@ if __name__ == '__main__':
max_spawn_ratio=0.05,
dirt_smear_amount=0.0)
# These are the EnvKwargs for initializing the env class, holding all former parameter-classes
# These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
# TODO: Comments
factory_kwargs = dict(n_agents=1,
max_steps=400,
@@ -141,8 +141,8 @@ if __name__ == '__main__':
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
#########################################################
# 3. Save env and agent for later analysis.
# Save the trained Model, the monitor (env measures) and the env parameters
# 3. Save environment and agent for later analysis.
# Save the trained Model, the monitor (environment measures) and the environment parameters
model.named_observation_space = env_factory.named_observation_space
model.named_action_space = env_factory.named_action_space
model.save(model_save_path)
@@ -168,10 +168,10 @@ if __name__ == '__main__':
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
# Load the agent
model = model_cls.load(policy_path / 'model.zip', device='cpu')
# Load old env kwargs
# Load old environment kwargs
with next(policy_path.glob(env_params_json)).open('r') as f:
env_kwargs = simplejson.load(f)
# Make the env stop ar collisions
# Make the environment stop ar collisions
# (you only want to have a single collision per episode hence the statistics)
env_kwargs.update(done_at_collision=True)

View File

@@ -34,9 +34,9 @@ Welcome to this quick start file. Here we will see how to:
1. Setup parameters for the environments (item-factory).
2. Setup parameters for the agent training (SB3: PPO) and save metrics.
Run the training.
3. Save env and agent for later analysis.
3. Save environment and agent for later analysis.
4. Load the agent from drive
5. Rendering the env with a run of the trained agent.
5. Rendering the environment with a run of the trained agent.
6. Plot metrics
"""
@@ -62,14 +62,14 @@ if __name__ == '__main__':
# 1. Setup parameters for the environments (item-factory).
#
# Define property object parameters.
# 'ObservationProperties' are for specifying how the agent sees the env.
# 'ObservationProperties' are for specifying how the agent sees the environment.
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all
omit_agent_self=True, # This is default
additional_agent_placeholder=None, # We will not take care of future agent
frames_to_stack=3, # To give the agent a notion of time
pomdp_r=2 # the agent view-radius
)
# 'MovementProperties' are for specifying how the agent is allowed to move in the env.
# 'MovementProperties' are for specifying how the agent is allowed to move in the environment.
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
allow_square_movement=True, # Manhattan (edges)
allow_no_op=False) # Pause movement (do nothing)
@@ -84,7 +84,7 @@ if __name__ == '__main__':
max_agent_inventory_capacity = 5, # How many items are needed until the agent inventory is full)
)
# These are the EnvKwargs for initializing the env class, holding all former parameter-classes
# These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
# TODO: Comments
factory_kwargs = dict(n_agents=1,
max_steps=400,
@@ -137,8 +137,8 @@ if __name__ == '__main__':
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
#########################################################
# 3. Save env and agent for later analysis.
# Save the trained Model, the monitor (env measures) and the env parameters
# 3. Save environment and agent for later analysis.
# Save the trained Model, the monitor (environment measures) and the environment parameters
model.named_observation_space = env_factory.named_observation_space
model.named_action_space = env_factory.named_action_space
model.save(model_save_path)
@@ -164,10 +164,10 @@ if __name__ == '__main__':
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
# Load the agent agent
model = model_cls.load(policy_path / 'model.zip', device='cpu')
# Load old env kwargs
# Load old environment kwargs
with next(policy_path.glob(env_params_json)).open('r') as f:
env_kwargs = simplejson.load(f)
# Make the env stop ar collisions
# Make the environment stop ar collisions
# (you only want to have a single collision per episode hence the statistics)
env_kwargs.update(done_at_collision=True)