experiment 1 running

2021-10-14 15:06:07 +02:00
parent 696e520862
commit db4dbc13ae
2 changed files with 193 additions and 67 deletions
--- a/reload_agent.py
+++ b/reload_agent.py
@@ -2,9 +2,9 @@ import warnings
 from pathlib import Path

 import yaml
-from natsort import natsorted
-from environments import helpers as h

+from environments import helpers as h
+from environments.factory.factory_dirt import DirtFactory
 from environments.factory.factory_dirt_item import DirtItemFactory
 from environments.logging.recorder import RecorderCallback

@@ -17,21 +17,23 @@ if __name__ == '__main__':
    model_name = 'PPO_1631187073'
    run_id = 0
    seed = 69
-    out_path = Path(__file__).parent / 'study_out' / 'e_1_1631709932'/ 'no_obs' / 'itemdirt'/'A2C_1631709932' / '0_A2C_1631709932'
+    out_path = Path(__file__).parent / 'study_out' / 'e_1_1631709932' / 'no_obs' / 'dirt' / 'A2C_1631709932' / '0_A2C_1631709932'
    model_path = out_path / model_name

    with (out_path / f'env_params.json').open('r') as f:
        env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
-        env_kwargs.update(verbose=False, env_seed=seed, record_episodes=True)
+        env_kwargs.update(additional_agent_placeholder=None)
+        # env_kwargs.update(verbose=False, env_seed=seed, record_episodes=True, parse_doors=True)

    this_model = out_path / 'model.zip'

    model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name)
    model = model_cls.load(this_model)

-    with RecorderCallback(filepath=Path() / 'recorder_out.json') as recorder:
+    with RecorderCallback(filepath=Path() / 'recorder_out_doors.json') as recorder:
        # Init Env
-        with DirtItemFactory(**env_kwargs) as env:
+        with DirtFactory(**env_kwargs) as env:
+            obs_shape = env.observation_space.shape
            # Evaluation Loop for i in range(n Episodes)
            for episode in range(5):
                obs = env.reset()
@@ -41,6 +43,7 @@ if __name__ == '__main__':
                    env_state, step_r, done_bool, info_obj = env.step(action[0])
                    recorder.read_info(0, info_obj)
                    rew += step_r
+                    env.render()
                    if done_bool:
                        recorder.read_done(0, done_bool)
                        break
--- a/studies/e_1.py
+++ b/studies/e_1.py
@@ -1,5 +1,6 @@
 import sys
 from pathlib import Path
+from matplotlib import pyplot as plt

 try:
    # noinspection PyUnboundLocalVariable
@@ -25,11 +26,14 @@ from environments.factory.factory_dirt_item import DirtItemFactory
 from environments.factory.factory_item import ItemProperties, ItemFactory
 from environments.logging.monitor import MonitorCallback
 from environments.utility_classes import MovementProperties
+import pickle
 from plotting.compare_runs import compare_seed_runs, compare_model_runs, compare_all_parameter_runs
+import pandas as pd
+import seaborn as sns

 # Define a global studi save path
-start_time = 1631709932  # int(time.time())
-study_root_path = (Path('..') if not DIR else Path()) / 'study_out' / f'{Path(__file__).stem}_{start_time}'
+start_time = int(time.time())
+study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'

 """
 In this studie, we want to explore the macro behaviour of multi agents which are trained on the same task, 
@@ -56,6 +60,11 @@ There are further distinctions to be made:
 - This tells the agent to treat other agents as obstacle. 
 - However, the state space is altered since moving obstacles are not part the original agent observation. 
 - We are out of distribution.
+
+4. Obseration (similiar to camera read out) ['in_lvl_0.5', 'in_lvl_n']
+- This tells the agent to treat other agents as obstacle, but "sees" them encoded as a different value. 
+- However, the state space is altered since moving obstacles are not part the original agent observation. 
+- We are out of distribution.
 """


@@ -122,12 +131,12 @@ if __name__ == '__main__':
        #  Further Adjustments are done post-training
        'in_lvl_obs': dict(post_training_kwargs=dict(other_agent_obs='in_lvl')),
        #  No further adjustment needed
-        'no_obs': None
+        'no_obs': {}
    }

    # Train starts here ############################################################
    # Build Major Loop  parameters, parameter versions, Env Classes and models
-    if False:
+    if True:
        for observation_mode in observation_modes.keys():
            for env_name in env_names:
                for model_cls in h.MODEL_MAP.values():
@@ -151,27 +160,28 @@ if __name__ == '__main__':

                        # Env Init & Model kwargs definition
                        if model_cls.__name__ in ["PPO", "A2C"]:
-                            env = env_class(**env_kwargs)
-
-                            # env = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs) for _ in range(1)],
-                            #                    start_method="spawn")
+                            # env_factory = env_class(**env_kwargs)
+                            env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs)
+                                                         for _ in range(1)], start_method="spawn")
                            model_kwargs = policy_model_kwargs()

                        elif model_cls.__name__ in ["RegDQN", "DQN", "QRDQN"]:
-                            env = env_class(**env_kwargs)
-                            model_kwargs = dqn_model_kwargs()
+                            with env_class(**env_kwargs) as env_factory:
+                                model_kwargs = dqn_model_kwargs()

                        else:
                            raise NameError(f'The model "{model_cls.__name__}" has the wrong name.')

                        param_path = seed_path / f'env_params.json'
                        try:
-                            env.env_method('save_params', param_path)
+                            env_factory.env_method('save_params', param_path)
                        except AttributeError:
-                            env.save_params(param_path)
+                            env_factory.save_params(param_path)

                        # Model Init
-                        model = model_cls("MlpPolicy", env, verbose=1, seed=seed, device='cpu', **model_kwargs)
+                        model = model_cls("MlpPolicy", env_factory,
+                                          verbose=1, seed=seed, device='cpu',
+                                          **model_kwargs)

                        # Model train
                        model.learn(total_timesteps=int(train_steps), callback=callbacks)
@@ -179,56 +189,169 @@ if __name__ == '__main__':
                        # Model save
                        save_path = seed_path / f'model.zip'
                        model.save(save_path)
-                        pass
-                    # Compare perfoormance runs, for each seed within a model
+
+                        # Better be save then sorry: Clean up!
+                        del env_factory, model
+                        import gc
+                        gc.collect()
+
+                    # Compare performance runs, for each seed within a model
                    compare_seed_runs(combination_path)
+                    # Better be save then sorry: Clean up!
+                    del model_kwargs, env_kwargs
+                    import gc
+                    gc.collect()
+
                # Compare performance runs, for each model
                # FIXME: Check THIS!!!!
                compare_model_runs(study_root_path / observation_mode / env_name, f'{start_time}', 'step_reward')
-    # Train ends here ############################################################
-
-    # Evaluation starts here #####################################################
-    # Iterate Observation Modes
-
-    for observation_mode in observation_modes:
-        obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode)
-        # For trained policy in study_root_path / identifier
-        for env_path in [x for x in obs_mode_path.iterdir() if x.is_dir()]:
-            for policy_path in [x for x in env_path.iterdir() if x. is_dir()]:
-                # TODO: Pick random seed or iterate over available seeds
-                # First seed path version
-                # seed_path = next((y for y in policy_path.iterdir() if y.is_dir()))
-                # Iteration
-                for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
-                    # retrieve model class
-                    for model_cls in (val for key, val in h.MODEL_MAP.items() if key in policy_path.name):
-                        # Load both agents
-                        models = [model_cls.load(seed_path / 'model.zip') for _ in range(2)]
-                        # Load old env kwargs
-                        with next(seed_path.glob('*.json')).open('r') as f:
-                            env_kwargs = simplejson.load(f)
-                            env_kwargs.update(n_agents=2, additional_agent_placeholder=None,
-                                              **observation_modes[observation_mode].get('post_training_env_kwargs', {}))
-
-                        # Monitor Init
-                        with MonitorCallback(filepath=seed_path / f'e_1_monitor.pick') as monitor:
-                            # Init Env
-                            env = env_map[env_path.name][0](**env_kwargs)
-                            # Evaluation Loop for i in range(n Episodes)
-                            for episode in range(50):
-                                obs = env.reset()
-                                rew, done_bool = 0, False
-                                while not done_bool:
-                                    actions = [model.predict(obs[i], deterministic=False)[0]
-                                               for i, model in enumerate(models)]
-                                    env_state, step_r, done_bool, info_obj = env.step(actions)
-                                    monitor.read_info(0, info_obj)
-                                    rew += step_r
-                                    if done_bool:
-                                        monitor.read_done(0, done_bool)
-                                        break
-                                print(f'Factory run {episode} done, reward is:\n    {rew}')
-                            # Eval monitor outputs are automatically stored by the monitor object
-
-                            # TODO: Plotting
+                pass
+            pass
        pass
+    pass
+    # Train ends here ############################################################
+    exit()
+    # Evaluation starts here #####################################################
+    # First Iterate over every model and monitor "as trained"
+    baseline_monitor_file = 'e_1_baseline_monitor.pick'
+    if True:
+        render = True
+        for observation_mode in observation_modes:
+            obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode)
+            # For trained policy in study_root_path / identifier
+            for env_path in [x for x in obs_mode_path.iterdir() if x.is_dir()]:
+                for policy_path in [x for x in env_path.iterdir() if x. is_dir()]:
+                    # Iteration
+                    for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
+                        # retrieve model class
+                        for model_cls in (val for key, val in h.MODEL_MAP.items() if key in policy_path.name):
+                            # Load both agents
+                            model = model_cls.load(seed_path / 'model.zip')
+                            # Load old env kwargs
+                            with next(seed_path.glob('*.json')).open('r') as f:
+                                env_kwargs = simplejson.load(f)
+                            # Monitor Init
+                            with MonitorCallback(filepath=seed_path / baseline_monitor_file) as monitor:
+                                # Init Env
+                                env_factory = env_map[env_path.name][0](**env_kwargs)
+                                # Evaluation Loop for i in range(n Episodes)
+                                for episode in range(100):
+                                    obs = env_factory.reset()
+                                    rew, done_bool = 0, False
+                                    while not done_bool:
+                                        action = model.predict(obs, deterministic=True)[0]
+                                        env_state, step_r, done_bool, info_obj = env_factory.step(action)
+                                        monitor.read_info(0, info_obj)
+                                        rew += step_r
+                                        if render:
+                                            env_factory.render()
+                                        if done_bool:
+                                            monitor.read_done(0, done_bool)
+                                            break
+                                    print(f'Factory run {episode} done, reward is:\n    {rew}')
+                                # Eval monitor outputs are automatically stored by the monitor object
+                            del model, env_kwargs, env_factory
+                            import gc
+
+                            gc.collect()
+
+    # Then iterate over every model and monitor "ood behavior" - "is it ood?"
+    ood_monitor_file = 'e_1_monitor.pick'
+    if True:
+        for observation_mode in observation_modes:
+            obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode)
+            # For trained policy in study_root_path / identifier
+            for env_path in [x for x in obs_mode_path.iterdir() if x.is_dir()]:
+                for policy_path in [x for x in env_path.iterdir() if x. is_dir()]:
+                    # FIXME: Pick random seed or iterate over available seeds
+                    # First seed path version
+                    # seed_path = next((y for y in policy_path.iterdir() if y.is_dir()))
+                    # Iteration
+                    for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
+                        if (seed_path / f'e_1_monitor.pick').exists():
+                            continue
+                        # retrieve model class
+                        for model_cls in (val for key, val in h.MODEL_MAP.items() if key in policy_path.name):
+                            # Load both agents
+                            models = [model_cls.load(seed_path / 'model.zip') for _ in range(2)]
+                            # Load old env kwargs
+                            with next(seed_path.glob('*.json')).open('r') as f:
+                                env_kwargs = simplejson.load(f)
+                                env_kwargs.update(
+                                    n_agents=2, additional_agent_placeholder=None,
+                                    **observation_modes[observation_mode].get('post_training_env_kwargs', {}))
+
+                            # Monitor Init
+                            with MonitorCallback(filepath=seed_path / ood_monitor_file) as monitor:
+                                # Init Env
+                                with env_map[env_path.name][0](**env_kwargs) as env_factory:
+                                    # Evaluation Loop for i in range(n Episodes)
+                                    for episode in range(50):
+                                        obs = env_factory.reset()
+                                        rew, done_bool = 0, False
+                                        while not done_bool:
+                                            actions = [model.predict(obs[i], deterministic=False)[0]
+                                                       for i, model in enumerate(models)]
+                                            env_state, step_r, done_bool, info_obj = env_factory.step(actions)
+                                            monitor.read_info(0, info_obj)
+                                            rew += step_r
+                                            if done_bool:
+                                                monitor.read_done(0, done_bool)
+                                                break
+                                        print(f'Factory run {episode} done, reward is:\n    {rew}')
+                                    # Eval monitor outputs are automatically stored by the monitor object
+                            del models, env_kwargs, env_factory
+                            import gc
+
+                            gc.collect()
+
+    # Plotting
+    if True:
+        # TODO: Plotting
+        df_list = list()
+        for observation_folder in (x for x in study_root_path.iterdir() if x.is_dir()):
+            for env_folder in (x for x in observation_folder.iterdir() if x.is_dir()):
+                for model_folder in (x for x in env_folder.iterdir() if x.is_dir()):
+                    # Gather per seed results in this list
+
+                    for seed_folder in (x for x in model_folder.iterdir() if x.is_dir()):
+                        for monitor_file in [baseline_monitor_file, ood_monitor_file]:
+
+                            with (seed_folder / monitor_file).open('rb') as f:
+                                monitor_df = pickle.load(f)
+
+                            monitor_df = monitor_df.fillna(0)
+                            monitor_df['seed'] = int(seed_folder.name.split('_')[0])
+                            monitor_df['monitor'] = monitor_file.split('.')[0]
+                            monitor_df['monitor'] = monitor_df['monitor'].astype(str)
+                            monitor_df['env'] = env_folder.name
+
+                            monitor_df['obs_mode'] = observation_folder.name
+                            monitor_df['obs_mode'] = monitor_df['obs_mode'].astype(str)
+                            monitor_df['model'] = model_folder.name.split('_')[0]
+
+
+                            df_list.append(monitor_df)
+
+        id_cols = ['monitor', 'env', 'obs_mode', 'model']
+
+        df = pd.concat(df_list, ignore_index=True)
+        df = df.fillna(0)
+
+        for id_col in id_cols:
+            df[id_col] = df[id_col].astype(str)
+
+        df_grouped = df.groupby(id_cols + ['seed']
+                                ).agg({key: 'sum' if "Agent" in key else 'mean' for key in df.columns
+                                       if key not in (id_cols + ['seed'])})
+        df_melted = df_grouped.reset_index().melt(id_vars=id_cols,
+                                                  value_vars='step_reward', var_name="Measurement",
+                                                  value_name="Score")
+
+        c = sns.catplot(data=df_melted, x='obs_mode', hue='monitor', row='model', col='env', y='Score', sharey=False,
+                        kind="box", height=4, aspect=.7, legend_out=True)
+        c.set_xticklabels(rotation=65, horizontalalignment='right')
+        plt.tight_layout(pad=2)
+        plt.show()
+
+    pass