From 59484f49c96f5b383f7a0322ef69b116f80c696e Mon Sep 17 00:00:00 2001 From: Steffen Illium Date: Thu, 18 Nov 2021 18:32:42 +0100 Subject: [PATCH] Occupation Map --- environments/factory/base/base_factory.py | 9 +- environments/factory/factory_dirt.py | 5 + environments/logging/recorder.py | 41 ++++++++- reload_agent.py | 20 ++-- studies/e_1.py | 106 ++++++++++++---------- 5 files changed, 119 insertions(+), 62 deletions(-) diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py index 907ca61..bb0c121 100644 --- a/environments/factory/base/base_factory.py +++ b/environments/factory/base/base_factory.py @@ -50,6 +50,11 @@ class BaseFactory(gym.Env): def movement_actions(self): return self._actions.movement_actions + @property + def params(self) -> dict: + d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')} + return d + def __enter__(self): return self if self.obs_prop.frames_to_stack == 0 else \ FrameStack(self, self.obs_prop.frames_to_stack) @@ -576,8 +581,7 @@ class BaseFactory(gym.Env): def save_params(self, filepath: Path): # noinspection PyProtectedMember - # d = {key: val._asdict() if hasattr(val, '_asdict') else val for key, val in self.__dict__.items() - d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')} + d = self.params filepath.parent.mkdir(parents=True, exist_ok=True) with filepath.open('w') as f: simplejson.dump(d, f, indent=4, namedtuple_as_object=True) @@ -587,6 +591,7 @@ class BaseFactory(gym.Env): for entity_group in self._entities: summary.update({f'{REC_TAC}{entity_group.name}': entity_group.summarize_states(n_steps=self._steps)}) + return summary def print(self, string): diff --git a/environments/factory/factory_dirt.py b/environments/factory/factory_dirt.py index 92d15c6..675b39b 100644 --- a/environments/factory/factory_dirt.py +++ b/environments/factory/factory_dirt.py @@ -239,7 +239,12 @@ class DirtFactory(BaseFactory): if agent.temp_action == CLEAN_UP_ACTION: if agent.temp_valid: + # Reward if pickup succeds, + # 0.5 on every pickup reward += 0.5 + if self.dirt_prop.done_when_clean and (len(self[c.DIRT]) == 0): + # 0.5 additional reward for the very last pickup + reward += 0.5 self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.') info_dict.update(dirt_cleaned=1) else: diff --git a/environments/logging/recorder.py b/environments/logging/recorder.py index c52f91d..61f8391 100644 --- a/environments/logging/recorder.py +++ b/environments/logging/recorder.py @@ -3,6 +3,8 @@ from collections import defaultdict from pathlib import Path from typing import Union +import numpy as np +import pandas as pd import simplejson from stable_baselines3.common.callbacks import BaseCallback @@ -10,21 +12,39 @@ from environments.factory.base.base_factory import REC_TAC # noinspection PyAttributeOutsideInit +from environments.helpers import Constants as c + + class RecorderCallback(BaseCallback): - def __init__(self, filepath: Union[str, Path], occupation_map: bool = False, trajectory_map: bool = False): + def __init__(self, filepath: Union[str, Path], occupation_map: bool = False, trajectory_map: bool = False, + entities='all'): super(RecorderCallback, self).__init__() self.trajectory_map = trajectory_map self.occupation_map = occupation_map self.filepath = Path(filepath) self._recorder_dict = defaultdict(list) self._recorder_out_list = list() + self._env_params = None self.do_record: bool + if isinstance(entities, str): + if entities.lower() == 'all': + self._entities = None + else: + self._entities = [entities] + else: + self._entities = entities self.started = False self.closed = False + def read_params(self, params): + self._env_params = params + def read_info(self, env_idx, info: dict): if info_dict := {key.replace(REC_TAC, ''): val for key, val in info.items() if key.startswith(f'{REC_TAC}')}: + if self._entities: + info_dict = {k: v for k, v in info_dict.items() if k in self._entities} + info_dict.update(episode=(self.num_timesteps + env_idx)) self._recorder_dict[env_idx].append(info_dict) else: @@ -51,14 +71,27 @@ class RecorderCallback(BaseCallback): if self.do_record and self.started: # self.out_file.unlink(missing_ok=True) with self.filepath.open('w') as f: - out_dict = {'episodes': self._recorder_out_list} + out_dict = {'episodes': self._recorder_out_list, 'header': self._env_params} try: simplejson.dump(out_dict, f, indent=4) except TypeError: print('Shit') if self.occupation_map: - print('Recorder files were dumped to disk, now plotting the occupation map...') + a = np.zeros((15, 15)) + for episode in out_dict['episodes']: + df = pd.DataFrame([y for x in episode['steps'] for y in x['Agents']]) + + b = list(df[['x', 'y']].to_records(index=False)) + + np.add.at(a, tuple(zip(*b)), 1) + + # a = np.rot90(a) + import seaborn as sns + from matplotlib import pyplot as plt + hm = sns.heatmap(data=a) + hm.set_title('Very Nice Heatmap') + plt.show() if self.trajectory_map: print('Recorder files were dumped to disk, now plotting the occupation map...') @@ -75,7 +108,7 @@ class RecorderCallback(BaseCallback): for env_idx, done in list( enumerate(self.locals.get('dones', []))) + list( - enumerate(self.locals.get('done', []))): + enumerate(self.locals.get('done', []))): self.read_done(env_idx, done) else: pass diff --git a/reload_agent.py b/reload_agent.py index b43f781..f569447 100644 --- a/reload_agent.py +++ b/reload_agent.py @@ -19,6 +19,8 @@ if __name__ == '__main__': model_name = 'A2C_ItsDirt' run_id = 0 determin = True + render=False + record = True seed = 67 n_agents = 1 out_path = Path('study_out/e_1_Now_with_doors/no_obs/dirt/A2C_Now_with_doors/0_A2C_Now_with_doors') @@ -31,19 +33,21 @@ if __name__ == '__main__': env_kwargs['dirt_prop']['max_spawn_amount'] = gain_amount del env_kwargs['dirt_prop']['gain_amount'] - env_kwargs.update(record_episodes=False) + env_kwargs.update(record_episodes=record) this_model = out_path / 'model.zip' model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name) models = [model_cls.load(this_model) for _ in range(n_agents)] - with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json') as recorder: + with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json', occupation_map=True, + entities=['Agents']) as recorder: # Init Env with DirtFactory(**env_kwargs) as env: obs_shape = env.observation_space.shape # Evaluation Loop for i in range(n Episodes) - for episode in range(5): + recorder.read_params(env.params) + for episode in range(200): env_state = env.reset() rew, done_bool = 0, False while not done_bool: @@ -53,14 +57,16 @@ if __name__ == '__main__': deterministic=determin)[0] for j, model in enumerate(models)] else: actions = models[0].predict(env_state, deterministic=determin)[0] - if any([agent.pos in [door.pos for door in env.unwrapped[c.DOORS]] - for agent in env.unwrapped[c.AGENT]]): - print('On Door') + if False: + if any([agent.pos in [door.pos for door in env.unwrapped[c.DOORS]] + for agent in env.unwrapped[c.AGENT]]): + print('On Door') env_state, step_r, done_bool, info_obj = env.step(actions) recorder.read_info(0, info_obj) rew += step_r - env.render() + if render: + env.render() if done_bool: recorder.read_done(0, done_bool) break diff --git a/studies/e_1.py b/studies/e_1.py index bf5e1eb..e880f59 100644 --- a/studies/e_1.py +++ b/studies/e_1.py @@ -66,8 +66,8 @@ There are further distinctions to be made: """ n_agents = 4 -ood_monitor_file = f'e_1_monitor_{n_agents}_agents.pick' -baseline_monitor_file = 'e_1_baseline_monitor.pick' +ood_monitor_file = f'e_1_{n_agents}_agents' +baseline_monitor_file = 'e_1_baseline' def policy_model_kwargs(): @@ -103,7 +103,7 @@ def load_model_run_baseline(seed_path, env_to_run): env_kwargs = simplejson.load(f) env_kwargs.update(done_at_collision=True) # Monitor Init - with MonitorCallback(filepath=seed_path / baseline_monitor_file) as monitor: + with MonitorCallback(filepath=seed_path / f'{baseline_monitor_file}.pick') as monitor: # Init Env with env_to_run(**env_kwargs) as env_factory: # Evaluation Loop for i in range(n Episodes) @@ -139,7 +139,7 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): done_at_collision=True, **additional_kwargs_dict.get('post_training_kwargs', {})) # Monitor Init - with MonitorCallback(filepath=seed_path / ood_monitor_file) as monitor: + with MonitorCallback(filepath=seed_path / f'{ood_monitor_file}.pick') as monitor: # Init Env with env_to_run(**env_kwargs) as env_factory: # Evaluation Loop for i in range(n Episodes) @@ -172,7 +172,7 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): def start_mp_study_run(envs_map, policies_path): - paths = list(y for y in policies_path.iterdir() if y.is_dir() and not (y / ood_monitor_file).exists()) + paths = list(y for y in policies_path.iterdir() if y.is_dir() and not (y / f'{ood_monitor_file}.pick').exists()) if paths: import multiprocessing as mp pool = mp.Pool(mp.cpu_count()) @@ -185,7 +185,8 @@ def start_mp_study_run(envs_map, policies_path): def start_mp_baseline_run(envs_map, policies_path): - paths = list(y for y in policies_path.iterdir() if y.is_dir() and not (y / baseline_monitor_file).exists()) + paths = list(y for y in policies_path.iterdir() if y.is_dir() and + not (y / f'{baseline_monitor_file}.pick').exists()) if paths: import multiprocessing as mp pool = mp.Pool(mp.cpu_count()) @@ -197,11 +198,17 @@ def start_mp_baseline_run(envs_map, policies_path): if __name__ == '__main__': + # What to do: + train = True + baseline_run = True + ood_run = True + plotting = True + train_steps = 5e6 n_seeds = 3 # Define a global studi save path - start_time = 'Now_with_doors' # int(time.time()) + start_time = 'exploring_obs_stack' # int(time.time()) study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' # Define Global Env Parameters @@ -209,7 +216,7 @@ if __name__ == '__main__': obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, omit_agent_self=True, additional_agent_placeholder=None, - frames_to_stack=3, + frames_to_stack=6, pomdp_r=2 ) move_props = MovementProperties(allow_diagonal_movement=True, @@ -327,7 +334,7 @@ if __name__ == '__main__': # Train starts here ############################################################ # Build Major Loop parameters, parameter versions, Env Classes and models - if False: + if train: for obs_mode in observation_modes.keys(): for env_name in env_names: for model_cls in [h.MODEL_MAP['A2C']]: @@ -417,7 +424,7 @@ if __name__ == '__main__': # Evaluation starts here ##################################################### # First Iterate over every model and monitor "as trained" - if True: + if baseline_run: print('Start Baseline Tracking') for obs_mode in observation_modes: obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == obs_mode) @@ -432,7 +439,7 @@ if __name__ == '__main__': print('Baseline Tracking done') # Then iterate over every model and monitor "ood behavior" - "is it ood?" - if True: + if ood_run: print('Start OOD Tracking') for obs_mode in observation_modes: obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == obs_mode) @@ -449,17 +456,18 @@ if __name__ == '__main__': print('OOD Tracking Done') # Plotting - if True: + if plotting: # TODO: Plotting print('Start Plotting') + df_list = list() for observation_folder in (x for x in study_root_path.iterdir() if x.is_dir()): - df_list = list() + for env_folder in (x for x in observation_folder.iterdir() if x.is_dir()): for model_folder in (x for x in env_folder.iterdir() if x.is_dir()): # Gather per seed results in this list for seed_folder in (x for x in model_folder.iterdir() if x.is_dir()): - for monitor_file in [baseline_monitor_file, ood_monitor_file]: + for monitor_file in [f'{baseline_monitor_file}.pick', f'{ood_monitor_file}.pick']: with (seed_folder / monitor_file).open('rb') as f: monitor_df = pickle.load(f) @@ -476,47 +484,47 @@ if __name__ == '__main__': df_list.append(monitor_df) - id_cols = ['monitor', 'env', 'obs_mode', 'model'] - - df = pd.concat(df_list, ignore_index=True) - df = df.fillna(0) + id_cols = ['monitor', 'env', 'obs_mode', 'model'] + df = pd.concat(df_list, ignore_index=True) + df = df.fillna(0) + for env_name in env_names: for id_col in id_cols: df[id_col] = df[id_col].astype(str) - if True: - # df['fail_sum'] = df.loc[:, df.columns.str.contains("failed")].sum(1) - df['pick_up'] = df.loc[:, df.columns.str.contains("]_item_pickup")].sum(1) - df['drop_off'] = df.loc[:, df.columns.str.contains("]_item_dropoff")].sum(1) - df['failed_item_action'] = df.loc[:, df.columns.str.contains("]_failed_item_action")].sum(1) - df['failed_cleanup'] = df.loc[:, df.columns.str.contains("]_failed_dirt_cleanup")].sum(1) - df['coll_lvl'] = df.loc[:, df.columns.str.contains("]_vs_LEVEL")].sum(1) - df['coll_agent'] = df.loc[:, df.columns.str.contains("]_vs_Agent")].sum(1) / 2 - # df['collisions'] = df['coll_lvl'] + df['coll_agent'] + if True: + # df['fail_sum'] = df.loc[:, df.columns.str.contains("failed")].sum(1) + df['pick_up'] = df.loc[:, df.columns.str.contains("]_item_pickup")].sum(1) + df['drop_off'] = df.loc[:, df.columns.str.contains("]_item_dropoff")].sum(1) + df['failed_item_action'] = df.loc[:, df.columns.str.contains("]_failed_item_action")].sum(1) + df['failed_cleanup'] = df.loc[:, df.columns.str.contains("]_failed_dirt_cleanup")].sum(1) + df['coll_lvl'] = df.loc[:, df.columns.str.contains("]_vs_LEVEL")].sum(1) + df['coll_agent'] = df.loc[:, df.columns.str.contains("]_vs_Agent")].sum(1) / 2 + # df['collisions'] = df['coll_lvl'] + df['coll_agent'] - value_vars = ['pick_up', 'drop_off', 'failed_item_action', 'failed_cleanup', - 'coll_lvl', 'coll_agent', 'dirt_cleaned'] + value_vars = ['pick_up', 'drop_off', 'failed_item_action', 'failed_cleanup', + 'coll_lvl', 'coll_agent', 'dirt_cleaned'] - df_grouped = df.groupby(id_cols + ['seed'] - ).agg({key: 'sum' if "Agent" in key else 'mean' for key in df.columns - if key not in (id_cols + ['seed'])}) - df_melted = df_grouped.reset_index().melt(id_vars=id_cols, - value_vars=value_vars, # 'step_reward', - var_name="Measurement", - value_name="Score") - # df_melted["Measurements"] = df_melted["Measurement"] + " " + df_melted["monitor"] + df_grouped = df.groupby(id_cols + ['seed'] + ).agg({key: 'sum' if "Agent" in key else 'mean' for key in df.columns + if key not in (id_cols + ['seed'])}) + df_melted = df_grouped.reset_index().melt(id_vars=id_cols, + value_vars=value_vars, # 'step_reward', + var_name="Measurement", + value_name="Score") + # df_melted["Measurements"] = df_melted["Measurement"] + " " + df_melted["monitor"] - # Plotting - # fig, ax = plt.subplots(figsize=(11.7, 8.27)) + # Plotting + # fig, ax = plt.subplots(figsize=(11.7, 8.27)) - c = sns.catplot(data=df_melted[df_melted['obs_mode'] == observation_folder.name], - x='Measurement', hue='monitor', row='model', col='env', y='Score', - sharey=False, kind="box", height=4, aspect=.7, legend_out=False, legend=False, - showfliers=False) - c.set_xticklabels(rotation=65, horizontalalignment='right') - # c.fig.subplots_adjust(top=0.9) # adjust the Figure in rp - c.fig.suptitle(f"Cat plot for {observation_folder.name}") - # plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) - plt.tight_layout() - plt.savefig(study_root_path / f'results_{n_agents}_agents_{observation_folder.name}.png') + c = sns.catplot(data=df_melted[df_melted['env'] == env_name], + x='Measurement', hue='monitor', row='model', col='obs_mode', y='Score', + sharey=True, kind="box", height=4, aspect=.7, legend_out=False, legend=False, + showfliers=False) + c.set_xticklabels(rotation=65, horizontalalignment='right') + # c.fig.subplots_adjust(top=0.9) # adjust the Figure in rp + c.fig.suptitle(f"Cat plot for {env_name}") + # plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) + plt.tight_layout() + plt.savefig(study_root_path / f'results_{n_agents}_agents_{env_name}.png') pass