mirror of
				https://github.com/illiumst/marl-factory-grid.git
				synced 2025-10-31 04:37:25 +01:00 
			
		
		
		
	Monitor and Recorder are Wrappers.
This commit is contained in:
		| @@ -65,7 +65,7 @@ class BaseFactory(gym.Env): | ||||
|     def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), | ||||
|                  mv_prop: MovementProperties = MovementProperties(), | ||||
|                  obs_prop: ObservationProperties = ObservationProperties(), | ||||
|                  parse_doors=False, record_episodes=False, done_at_collision=False, | ||||
|                  parse_doors=False, done_at_collision=False, | ||||
|                  verbose=False, doors_have_area=True, env_seed=time.time_ns(), individual_rewards=False, | ||||
|                  **kwargs): | ||||
|  | ||||
| @@ -97,7 +97,7 @@ class BaseFactory(gym.Env): | ||||
|         self._pomdp_r = self.obs_prop.pomdp_r | ||||
|  | ||||
|         self.done_at_collision = done_at_collision | ||||
|         self.record_episodes = record_episodes | ||||
|         self._record_episodes = False | ||||
|         self.parse_doors = parse_doors | ||||
|         self.doors_have_area = doors_have_area | ||||
|         self.individual_rewards = individual_rewards | ||||
| @@ -249,7 +249,7 @@ class BaseFactory(gym.Env): | ||||
|         if self._steps >= self.max_steps: | ||||
|             done = True | ||||
|         info.update(step_reward=reward, step=self._steps) | ||||
|         if self.record_episodes: | ||||
|         if self._record_episodes: | ||||
|             info.update(self._summarize_state()) | ||||
|  | ||||
|         # Post step Hook for later use | ||||
| @@ -280,7 +280,7 @@ class BaseFactory(gym.Env): | ||||
|         if self.n_agents == 1: | ||||
|             obs = self._build_per_agent_obs(self[c.AGENT][0], state_array_dict) | ||||
|         elif self.n_agents >= 2: | ||||
|             obs = np.stack([self._build_per_agent_obs(agent, state_array_dict) for agent in self[c.AGENT]]) | ||||
|             obs = np.stack(self._build_per_agent_obs(agent, state_array_dict) for agent in self[c.AGENT]) | ||||
|         else: | ||||
|             raise ValueError('n_agents cannot be smaller than 1!!') | ||||
|         return obs | ||||
| @@ -290,9 +290,6 @@ class BaseFactory(gym.Env): | ||||
|         agent_omit_idx = None | ||||
|  | ||||
|         if self.obs_prop.omit_agent_self and self.n_agents == 1: | ||||
|             # There is only a single agent and we want to omit the agent obs, so just remove the array. | ||||
|             # del state_array_dict[c.AGENT] | ||||
|             # Not Needed any more, | ||||
|             pass | ||||
|         elif self.obs_prop.omit_agent_self and self.obs_prop.render_agents in [a_obs.COMBINED, ] and self.n_agents > 1: | ||||
|             state_array_dict[c.AGENT][0, agent.x, agent.y] -= agent.encoding | ||||
| @@ -439,7 +436,7 @@ class BaseFactory(gym.Env): | ||||
|         tiles_with_collisions = list() | ||||
|         for tile in self[c.FLOOR]: | ||||
|             if tile.is_occupied(): | ||||
|                 guests = [guest for guest in tile.guests if guest.can_collide] | ||||
|                 guests = tile.guests_that_can_collide | ||||
|                 if len(guests) >= 2: | ||||
|                     tiles_with_collisions.append(tile) | ||||
|         return tiles_with_collisions | ||||
| @@ -521,7 +518,7 @@ class BaseFactory(gym.Env): | ||||
|                 per_agent_info_dict[agent.name].update(no_op=1) | ||||
|                 # per_agent_reward -= 0.00 | ||||
|  | ||||
|             # Monitor Notes | ||||
|             # EnvMonitor Notes | ||||
|             if agent.temp_valid: | ||||
|                 per_agent_info_dict[agent.name].update(valid_action=1) | ||||
|                 per_agent_info_dict[agent.name].update({f'{agent.name}_valid_action': 1}) | ||||
|   | ||||
| @@ -209,7 +209,7 @@ class Tile(Object): | ||||
|         return not len(self._guests) | ||||
|  | ||||
|     def is_occupied(self): | ||||
|         return len(self._guests) | ||||
|         return bool(len(self._guests)) | ||||
|  | ||||
|     def enter(self, guest): | ||||
|         if guest.name not in self._guests: | ||||
|   | ||||
| @@ -28,7 +28,7 @@ class DirtProperties(NamedTuple): | ||||
|     max_global_amount: int = 20             # Max dirt amount in the whole environment. | ||||
|     dirt_smear_amount: float = 0.2          # Agents smear dirt, when not cleaning up in place. | ||||
|     agent_can_interact: bool = True         # Whether the agents can interact with the dirt in this environment. | ||||
|     done_when_clean = True | ||||
|     done_when_clean: bool = True | ||||
|  | ||||
|  | ||||
| class Dirt(Entity): | ||||
| @@ -228,14 +228,14 @@ class DirtFactory(BaseFactory): | ||||
|         dirt = [dirt.amount for dirt in self[c.DIRT]] | ||||
|         current_dirt_amount = sum(dirt) | ||||
|         dirty_tile_count = len(dirt) | ||||
|         if dirty_tile_count: | ||||
|             dirt_distribution_score = entropy(softmax(np.asarray(dirt)) / dirty_tile_count) | ||||
|         else: | ||||
|             dirt_distribution_score = 0 | ||||
|         # if dirty_tile_count: | ||||
|         #    dirt_distribution_score = entropy(softmax(np.asarray(dirt)) / dirty_tile_count) | ||||
|         #else: | ||||
|         #    dirt_distribution_score = 0 | ||||
|  | ||||
|         info_dict.update(dirt_amount=current_dirt_amount) | ||||
|         info_dict.update(dirty_tile_count=dirty_tile_count) | ||||
|         info_dict.update(dirt_distribution_score=dirt_distribution_score) | ||||
|         # info_dict.update(dirt_distribution_score=dirt_distribution_score) | ||||
|  | ||||
|         if agent.temp_action == CLEAN_UP_ACTION: | ||||
|             if agent.temp_valid: | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| import pickle | ||||
| from collections import defaultdict | ||||
| from pathlib import Path | ||||
| from typing import List, Dict | ||||
| from typing import List, Dict, Union | ||||
| 
 | ||||
| from stable_baselines3.common.callbacks import BaseCallback | ||||
| 
 | ||||
| @@ -10,57 +10,50 @@ from environments.helpers import IGNORED_DF_COLUMNS | ||||
| import pandas as pd | ||||
| 
 | ||||
| 
 | ||||
| class MonitorCallback(BaseCallback): | ||||
| class EnvMonitor(BaseCallback): | ||||
| 
 | ||||
|     ext = 'png' | ||||
| 
 | ||||
|     def __init__(self, filepath=Path('debug_out/monitor.pick')): | ||||
|         super(MonitorCallback, self).__init__() | ||||
|         self.filepath = Path(filepath) | ||||
|     def __init__(self, env): | ||||
|         super(EnvMonitor, self).__init__() | ||||
|         self.unwrapped = env | ||||
|         self._monitor_df = pd.DataFrame() | ||||
|         self._monitor_dicts = defaultdict(dict) | ||||
|         self.started = False | ||||
|         self.closed = False | ||||
| 
 | ||||
|     def __enter__(self): | ||||
|         self.start() | ||||
|         return self | ||||
|     def __getattr__(self, item): | ||||
|         return getattr(self.unwrapped, item) | ||||
| 
 | ||||
|     def __exit__(self, exc_type, exc_val, exc_tb): | ||||
|         self.stop() | ||||
|     def step(self, action): | ||||
|         obs, reward, done, info = self.unwrapped.step(action) | ||||
|         self._read_info(0, info) | ||||
|         self._read_done(0, done) | ||||
|         return obs, reward, done, info | ||||
| 
 | ||||
|     def reset(self): | ||||
|         return self.unwrapped.reset() | ||||
| 
 | ||||
|     def _on_training_start(self) -> None: | ||||
|         if self.started: | ||||
|             pass | ||||
|         else: | ||||
|             self.start() | ||||
|         pass | ||||
| 
 | ||||
|     def _on_training_end(self) -> None: | ||||
|         if self.closed: | ||||
|             pass | ||||
|         else: | ||||
|             self.stop() | ||||
|         pass | ||||
| 
 | ||||
|     def _on_step(self, alt_infos: List[Dict] = None, alt_dones: List[bool] = None) -> bool: | ||||
|         if self.started: | ||||
|             for env_idx, info in enumerate(self.locals.get('infos', [])): | ||||
|                 self.read_info(env_idx, info) | ||||
|         for env_idx, info in enumerate(self.locals.get('infos', [])): | ||||
|             self._read_info(env_idx, info) | ||||
| 
 | ||||
|             for env_idx, done in list( | ||||
|                     enumerate(self.locals.get('dones', []))) + list(enumerate(self.locals.get('done', []))): | ||||
|                 self.read_done(env_idx, done) | ||||
|         else: | ||||
|             pass | ||||
|         for env_idx, done in list( | ||||
|                 enumerate(self.locals.get('dones', []))) + list(enumerate(self.locals.get('done', []))): | ||||
|             self._read_done(env_idx, done) | ||||
|         return True | ||||
| 
 | ||||
|     def read_info(self, env_idx, info: dict): | ||||
|     def _read_info(self, env_idx, info: dict): | ||||
|         self._monitor_dicts[env_idx][len(self._monitor_dicts[env_idx])] = { | ||||
|             key: val for key, val in info.items() if | ||||
|             key not in ['terminal_observation', 'episode'] and not key.startswith('rec_')} | ||||
|         return | ||||
| 
 | ||||
|     def read_done(self, env_idx, done): | ||||
|     def _read_done(self, env_idx, done): | ||||
|         if done: | ||||
|             env_monitor_df = pd.DataFrame.from_dict(self._monitor_dicts[env_idx], orient='index') | ||||
|             self._monitor_dicts[env_idx] = dict() | ||||
| @@ -74,16 +67,8 @@ class MonitorCallback(BaseCallback): | ||||
|             pass | ||||
|         return | ||||
| 
 | ||||
|     def stop(self): | ||||
|         # self.out_file.unlink(missing_ok=True) | ||||
|         with self.filepath.open('wb') as f: | ||||
|     def save_run(self, filepath: Union[Path, str]): | ||||
|         filepath = Path(filepath) | ||||
|         filepath.parent.mkdir(exist_ok=True, parents=True) | ||||
|         with filepath.open('wb') as f: | ||||
|             pickle.dump(self._monitor_df.reset_index(), f, protocol=pickle.HIGHEST_PROTOCOL) | ||||
|         self.closed = True | ||||
| 
 | ||||
|     def start(self): | ||||
|         if self.started: | ||||
|             pass | ||||
|         else: | ||||
|             self.filepath.parent.mkdir(exist_ok=True, parents=True) | ||||
|             self.started = True | ||||
|         pass | ||||
| @@ -1,4 +1,3 @@ | ||||
| import json | ||||
| from collections import defaultdict | ||||
| from pathlib import Path | ||||
| from typing import Union | ||||
| @@ -11,22 +10,13 @@ from stable_baselines3.common.callbacks import BaseCallback | ||||
| from environments.factory.base.base_factory import REC_TAC | ||||
|  | ||||
|  | ||||
| # noinspection PyAttributeOutsideInit | ||||
| from environments.helpers import Constants as c | ||||
| class EnvRecorder(BaseCallback): | ||||
|  | ||||
|  | ||||
| class RecorderCallback(BaseCallback): | ||||
|  | ||||
|     def __init__(self, filepath: Union[str, Path], occupation_map: bool = False, trajectory_map: bool = False, | ||||
|                  entities='all'): | ||||
|         super(RecorderCallback, self).__init__() | ||||
|         self.trajectory_map = trajectory_map | ||||
|         self.occupation_map = occupation_map | ||||
|         self.filepath = Path(filepath) | ||||
|     def __init__(self, env, entities='all'): | ||||
|         super(EnvRecorder, self).__init__() | ||||
|         self.unwrapped = env | ||||
|         self._recorder_dict = defaultdict(list) | ||||
|         self._recorder_out_list = list() | ||||
|         self._env_params = None | ||||
|         self.do_record: bool | ||||
|         if isinstance(entities, str): | ||||
|             if entities.lower() == 'all': | ||||
|                 self._entities = None | ||||
| @@ -37,10 +27,18 @@ class RecorderCallback(BaseCallback): | ||||
|         self.started = False | ||||
|         self.closed = False | ||||
|  | ||||
|     def read_params(self, params): | ||||
|         self._env_params = params | ||||
|     def __getattr__(self, item): | ||||
|         return getattr(self.unwrapped, item) | ||||
|  | ||||
|     def read_info(self, env_idx, info: dict): | ||||
|     def reset(self): | ||||
|         self.unwrapped._record_episodes = True | ||||
|         return self.unwrapped.reset() | ||||
|  | ||||
|     def _on_training_start(self) -> None: | ||||
|         self.unwrapped._record_episodes = True | ||||
|         pass | ||||
|  | ||||
|     def _read_info(self, env_idx, info: dict): | ||||
|         if info_dict := {key.replace(REC_TAC, ''): val for key, val in info.items() if key.startswith(f'{REC_TAC}')}: | ||||
|             if self._entities: | ||||
|                 info_dict = {k: v for k, v in info_dict.items() if k in self._entities} | ||||
| @@ -51,7 +49,7 @@ class RecorderCallback(BaseCallback): | ||||
|             pass | ||||
|         return | ||||
|  | ||||
|     def read_done(self, env_idx, done): | ||||
|     def _read_done(self, env_idx, done): | ||||
|         if done: | ||||
|             self._recorder_out_list.append({'steps': self._recorder_dict[env_idx], | ||||
|                                             'episode': len(self._recorder_out_list)}) | ||||
| @@ -59,77 +57,46 @@ class RecorderCallback(BaseCallback): | ||||
|         else: | ||||
|             pass | ||||
|  | ||||
|     def start(self, force=False): | ||||
|         if (hasattr(self.training_env, 'record_episodes') and self.training_env.record_episodes) or force: | ||||
|             self.do_record = True | ||||
|             self.filepath.parent.mkdir(exist_ok=True, parents=True) | ||||
|             self.started = True | ||||
|         else: | ||||
|             self.do_record = False | ||||
|     def save_records(self, filepath: Union[Path, str], save_occupation_map=False, save_trajectory_map=False): | ||||
|         filepath = Path(filepath) | ||||
|         filepath.parent.mkdir(exist_ok=True, parents=True) | ||||
|         # self.out_file.unlink(missing_ok=True) | ||||
|         with filepath.open('w') as f: | ||||
|             out_dict = {'episodes': self._recorder_out_list, 'header': self.unwrapped.params} | ||||
|             try: | ||||
|                 simplejson.dump(out_dict, f, indent=4) | ||||
|             except TypeError: | ||||
|                 print('Shit') | ||||
|  | ||||
|     def stop(self): | ||||
|         if self.do_record and self.started: | ||||
|             # self.out_file.unlink(missing_ok=True) | ||||
|             with self.filepath.open('w') as f: | ||||
|                 out_dict = {'episodes': self._recorder_out_list, 'header': self._env_params} | ||||
|                 try: | ||||
|                     simplejson.dump(out_dict, f, indent=4) | ||||
|                 except TypeError: | ||||
|                     print('Shit') | ||||
|         if save_occupation_map: | ||||
|             a = np.zeros((15, 15)) | ||||
|             for episode in out_dict['episodes']: | ||||
|                 df = pd.DataFrame([y for x in episode['steps'] for y in x['Agents']]) | ||||
|  | ||||
|             if self.occupation_map: | ||||
|                 a = np.zeros((15, 15)) | ||||
|                 for episode in  out_dict['episodes']: | ||||
|                     df = pd.DataFrame([y for x in episode['steps'] for y in x['Agents']]) | ||||
|                 b = list(df[['x', 'y']].to_records(index=False)) | ||||
|  | ||||
|                     b = list(df[['x', 'y']].to_records(index=False)) | ||||
|                 np.add.at(a, tuple(zip(*b)), 1) | ||||
|  | ||||
|                     np.add.at(a, tuple(zip(*b)), 1) | ||||
|             # a = np.rot90(a) | ||||
|             import seaborn as sns | ||||
|             from matplotlib import pyplot as plt | ||||
|             hm = sns.heatmap(data=a) | ||||
|             hm.set_title('Very Nice Heatmap') | ||||
|             plt.show() | ||||
|  | ||||
|                 # a = np.rot90(a) | ||||
|                 import seaborn as sns | ||||
|                 from matplotlib import pyplot as plt | ||||
|                 hm = sns.heatmap(data=a) | ||||
|                 hm.set_title('Very Nice Heatmap') | ||||
|                 plt.show() | ||||
|  | ||||
|             if self.trajectory_map: | ||||
|                 print('Recorder files were dumped to disk, now plotting the occupation map...') | ||||
|  | ||||
|             self.closed = True | ||||
|             self.started = False | ||||
|         else: | ||||
|             pass | ||||
|         if save_trajectory_map: | ||||
|             raise NotImplementedError('This has not yet been implemented.') | ||||
|  | ||||
|     def _on_step(self) -> bool: | ||||
|         if self.do_record and self.started: | ||||
|             for env_idx, info in enumerate(self.locals.get('infos', [])): | ||||
|                 self.read_info(env_idx, info) | ||||
|         for env_idx, info in enumerate(self.locals.get('infos', [])): | ||||
|             self._read_info(env_idx, info) | ||||
|  | ||||
|         dones = list(enumerate(self.locals.get('dones', []))) | ||||
|         dones.extend(list(enumerate(self.locals.get('done', [])))) | ||||
|         for env_idx, done in dones: | ||||
|             self._read_done(env_idx, done) | ||||
|  | ||||
|             for env_idx, done in list( | ||||
|                     enumerate(self.locals.get('dones', []))) + list( | ||||
|                 enumerate(self.locals.get('done', []))): | ||||
|                 self.read_done(env_idx, done) | ||||
|         else: | ||||
|             pass | ||||
|         return True | ||||
|  | ||||
|     def __enter__(self): | ||||
|         self.start(force=True) | ||||
|         return self | ||||
|  | ||||
|     def __exit__(self, exc_type, exc_val, exc_tb): | ||||
|         self.stop() | ||||
|  | ||||
|     def _on_training_start(self) -> None: | ||||
|         if self.started: | ||||
|             pass | ||||
|         else: | ||||
|             self.start() | ||||
|         pass | ||||
|  | ||||
|     def _on_training_end(self) -> None: | ||||
|         if self.closed: | ||||
|             pass | ||||
|         else: | ||||
|             self.stop() | ||||
|         pass | ||||
|   | ||||
| @@ -1,54 +0,0 @@ | ||||
| from collections import defaultdict | ||||
| from pathlib import Path | ||||
|  | ||||
| import numpy as np | ||||
| import pandas as pd | ||||
| from stable_baselines3.common.callbacks import BaseCallback | ||||
|  | ||||
| from environments.logging.plotting import prepare_plot | ||||
|  | ||||
|  | ||||
| class TraningMonitor(BaseCallback): | ||||
|  | ||||
|     def __init__(self, filepath, flush_interval=None): | ||||
|         super(TraningMonitor, self).__init__() | ||||
|         self.values = defaultdict(dict) | ||||
|         self.rewards = defaultdict(lambda: 0) | ||||
|  | ||||
|         self.filepath = Path(filepath) | ||||
|         self.flush_interval = flush_interval | ||||
|         self.next_flush: int | ||||
|         pass | ||||
|  | ||||
|     def _on_training_start(self) -> None: | ||||
|         self.flush_interval = self.flush_interval or (self.locals['total_timesteps'] * 0.1) | ||||
|         self.next_flush = self.flush_interval | ||||
|  | ||||
|     def _flush(self): | ||||
|         df = pd.DataFrame.from_dict(self.values,  orient='index') | ||||
|         if not self.filepath.exists(): | ||||
|             df.to_csv(self.filepath, mode='wb', header=True) | ||||
|         else: | ||||
|             df.to_csv(self.filepath, mode='a', header=False) | ||||
|  | ||||
|     def _on_step(self) -> bool: | ||||
|         for idx, done in np.ndenumerate(self.locals.get('dones', [])): | ||||
|             idx = idx[0] | ||||
|             # self.values[self.num_timesteps].update(**{f'reward_env_{idx}': self.locals['rewards'][idx]}) | ||||
|             self.rewards[idx] += self.locals['rewards'][idx] | ||||
|             if done: | ||||
|                 self.values[self.num_timesteps].update(**{f'acc_epispde_r_env_{idx}': self.rewards[idx]}) | ||||
|                 self.rewards[idx] = 0 | ||||
|  | ||||
|         if self.num_timesteps >= self.next_flush and self.values: | ||||
|             self._flush() | ||||
|             self.values = defaultdict(dict) | ||||
|  | ||||
|         self.next_flush += self.flush_interval | ||||
|         return True | ||||
|  | ||||
|     def on_training_end(self) -> None: | ||||
|         self._flush() | ||||
|         self.values = defaultdict(dict) | ||||
|         # prepare_plot() | ||||
|  | ||||
| @@ -1,4 +1,3 @@ | ||||
| from enum import Enum | ||||
| from typing import NamedTuple, Union | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -39,7 +39,9 @@ def prepare_plt(df, hue, style, hue_order): | ||||
|     plt.close('all') | ||||
|     sns.set(rc={'text.usetex': False}, style='whitegrid') | ||||
|     lineplot = sns.lineplot(data=df, x='Episode', y='Score', hue=hue, style=style, | ||||
|                             ci=95, palette=PALETTE, hue_order=hue_order) | ||||
|                             ci=95, palette=PALETTE, hue_order=hue_order, ) | ||||
|     plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0) | ||||
|     plt.tight_layout() | ||||
|     # lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}') | ||||
|     return lineplot | ||||
|  | ||||
|   | ||||
| @@ -8,7 +8,7 @@ from environments import helpers as h | ||||
| from environments.helpers import Constants as c | ||||
| from environments.factory.factory_dirt import DirtFactory | ||||
| from environments.factory.combined_factories import DirtItemFactory | ||||
| from environments.logging.recorder import RecorderCallback | ||||
| from environments.logging.recorder import EnvRecorder | ||||
|  | ||||
| warnings.filterwarnings('ignore', category=FutureWarning) | ||||
| warnings.filterwarnings('ignore', category=UserWarning) | ||||
| @@ -16,14 +16,13 @@ warnings.filterwarnings('ignore', category=UserWarning) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|  | ||||
|     model_name = 'A2C_ItsDirt' | ||||
|     run_id = 0 | ||||
|     determin = True | ||||
|     render=False | ||||
|     determin = False | ||||
|     render = True | ||||
|     record = True | ||||
|     seed = 67 | ||||
|     n_agents = 1 | ||||
|     out_path = Path('study_out/e_1_Now_with_doors/no_obs/dirt/A2C_Now_with_doors/0_A2C_Now_with_doors') | ||||
|     n_agents = 2 | ||||
|     out_path = Path('study_out/e_1_obs_stack_3_gae_0.25_n_steps_16/seperate_N/dirt/A2C_obs_stack_3_gae_0.25_n_steps_16/0_A2C_obs_stack_3_gae_0.25_n_steps_16') | ||||
|     out_path_2 = Path('study_out/e_1_obs_stack_3_gae_0.25_n_steps_16/seperate_N/dirt/A2C_obs_stack_3_gae_0.25_n_steps_16/1_A2C_obs_stack_3_gae_0.25_n_steps_16') | ||||
|     model_path = out_path | ||||
|  | ||||
|     with (out_path / f'env_params.json').open('r') as f: | ||||
| @@ -33,42 +32,35 @@ if __name__ == '__main__': | ||||
|             env_kwargs['dirt_prop']['max_spawn_amount'] = gain_amount | ||||
|             del env_kwargs['dirt_prop']['gain_amount'] | ||||
|  | ||||
|         env_kwargs.update(record_episodes=record) | ||||
|         env_kwargs.update(record_episodes=record, done_at_collision=True) | ||||
|  | ||||
|     this_model = out_path / 'model.zip' | ||||
|     other_model = out_path / 'model.zip' | ||||
|  | ||||
|     model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name) | ||||
|     models = [model_cls.load(this_model) for _ in range(n_agents)] | ||||
|     model_cls = next(val for key, val in h.MODEL_MAP.items() if key in out_path.parent.name) | ||||
|     models = [model_cls.load(this_model), model_cls.load(other_model)] | ||||
|  | ||||
|     with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json', occupation_map=True, | ||||
|                           entities=['Agents']) as recorder: | ||||
|         # Init Env | ||||
|         with DirtFactory(**env_kwargs) as env: | ||||
|             obs_shape = env.observation_space.shape | ||||
|             # Evaluation Loop for i in range(n Episodes) | ||||
|             recorder.read_params(env.params) | ||||
|             for episode in range(200): | ||||
|                 env_state = env.reset() | ||||
|                 rew, done_bool = 0, False | ||||
|                 while not done_bool: | ||||
|                     if n_agents > 1: | ||||
|                         actions = [model.predict( | ||||
|                             np.stack([env_state[i][j] for i in range(env_state.shape[0])]), | ||||
|                             deterministic=determin)[0] for j, model in enumerate(models)] | ||||
|                     else: | ||||
|                         actions = models[0].predict(env_state, deterministic=determin)[0] | ||||
|                     if False: | ||||
|                         if any([agent.pos in [door.pos for door in env.unwrapped[c.DOORS]] | ||||
|                                 for agent in env.unwrapped[c.AGENT]]): | ||||
|                             print('On Door') | ||||
|                     env_state, step_r, done_bool, info_obj = env.step(actions) | ||||
|     # Init Env | ||||
|     with DirtFactory(**env_kwargs) as env: | ||||
|         env = EnvRecorder(env) | ||||
|         obs_shape = env.observation_space.shape | ||||
|         # Evaluation Loop for i in range(n Episodes) | ||||
|         for episode in range(50): | ||||
|             env_state = env.reset() | ||||
|             rew, done_bool = 0, False | ||||
|             while not done_bool: | ||||
|                 if n_agents > 1: | ||||
|                     actions = [model.predict( | ||||
|                         np.stack([env_state[i][j] for i in range(env_state.shape[0])]), | ||||
|                         deterministic=determin)[0] for j, model in enumerate(models)] | ||||
|                 else: | ||||
|                     actions = models[0].predict(env_state, deterministic=determin)[0] | ||||
|                 env_state, step_r, done_bool, info_obj = env.step(actions) | ||||
|  | ||||
|                     recorder.read_info(0, info_obj) | ||||
|                     rew += step_r | ||||
|                     if render: | ||||
|                         env.render() | ||||
|                     if done_bool: | ||||
|                         recorder.read_done(0, done_bool) | ||||
|                         break | ||||
|                 rew += step_r | ||||
|                 if render: | ||||
|                     env.render() | ||||
|                 if done_bool: | ||||
|                     break | ||||
|                 print(f'Factory run {episode} done, reward is:\n    {rew}') | ||||
|     print('all done') | ||||
|   | ||||
							
								
								
									
										235
									
								
								studies/e_1.py
									
									
									
									
									
								
							
							
						
						
									
										235
									
								
								studies/e_1.py
									
									
									
									
									
								
							| @@ -18,7 +18,6 @@ except NameError: | ||||
|  | ||||
| import time | ||||
|  | ||||
|  | ||||
| import simplejson | ||||
| from stable_baselines3.common.vec_env import SubprocVecEnv | ||||
|  | ||||
| @@ -26,13 +25,17 @@ from environments import helpers as h | ||||
| from environments.factory.factory_dirt import DirtProperties, DirtFactory | ||||
| from environments.factory.combined_factories import DirtItemFactory | ||||
| from environments.factory.factory_item import ItemProperties, ItemFactory | ||||
| from environments.logging.monitor import MonitorCallback | ||||
| from environments.logging.envmonitor import EnvMonitor | ||||
| from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions | ||||
| import pickle | ||||
| from plotting.compare_runs import compare_seed_runs, compare_model_runs, compare_all_parameter_runs | ||||
| import pandas as pd | ||||
| import seaborn as sns | ||||
|  | ||||
| import multiprocessing as mp | ||||
|  | ||||
| # mp.set_start_method("spawn") | ||||
|  | ||||
| """ | ||||
| In this studie, we want to explore the macro behaviour of multi agents which are trained on the same task,  | ||||
| but never saw each other in training. | ||||
| @@ -69,9 +72,10 @@ n_agents = 4 | ||||
| ood_monitor_file = f'e_1_{n_agents}_agents' | ||||
| baseline_monitor_file = 'e_1_baseline' | ||||
|  | ||||
| from stable_baselines3 import A2C | ||||
|  | ||||
| def policy_model_kwargs(): | ||||
|     return dict() | ||||
|     return dict(gae_lambda=0.25, n_steps=16, max_grad_norm=0, use_rms_prop=False) | ||||
|  | ||||
|  | ||||
| def dqn_model_kwargs(): | ||||
| @@ -102,27 +106,23 @@ def load_model_run_baseline(seed_path, env_to_run): | ||||
|     with next(seed_path.glob('*.json')).open('r') as f: | ||||
|         env_kwargs = simplejson.load(f) | ||||
|         env_kwargs.update(done_at_collision=True) | ||||
|     # Monitor Init | ||||
|     with MonitorCallback(filepath=seed_path / f'{baseline_monitor_file}.pick') as monitor: | ||||
|         # Init Env | ||||
|         with env_to_run(**env_kwargs) as env_factory: | ||||
|             # Evaluation Loop for i in range(n Episodes) | ||||
|             for episode in range(100): | ||||
|                 env_state = env_factory.reset() | ||||
|                 rew, done_bool = 0, False | ||||
|                 while not done_bool: | ||||
|                     action = model.predict(env_state, deterministic=True)[0] | ||||
|                     env_state, step_r, done_bool, info_obj = env_factory.step(action) | ||||
|                     monitor.read_info(0, info_obj) | ||||
|                     rew += step_r | ||||
|                     if done_bool: | ||||
|                         monitor.read_done(0, done_bool) | ||||
|                         break | ||||
|                 print(f'Factory run {episode} done, reward is:\n    {rew}') | ||||
|         # Eval monitor outputs are automatically stored by the monitor object | ||||
|         # del model, env_kwargs, env_factory | ||||
|         # import gc | ||||
|         # gc.collect() | ||||
|     # Init Env | ||||
|     with env_to_run(**env_kwargs) as env_factory: | ||||
|         monitored_env_factory = EnvMonitor(env_factory) | ||||
|  | ||||
|         # Evaluation Loop for i in range(n Episodes) | ||||
|         for episode in range(100): | ||||
|             env_state = monitored_env_factory.reset() | ||||
|             rew, done_bool = 0, False | ||||
|             while not done_bool: | ||||
|                 action = model.predict(env_state, deterministic=True)[0] | ||||
|                 env_state, step_r, done_bool, info_obj = monitored_env_factory.step(action) | ||||
|                 rew += step_r | ||||
|                 if done_bool: | ||||
|                     break | ||||
|             print(f'Factory run {episode} done, reward is:\n    {rew}') | ||||
|         monitored_env_factory.save_run(filepath=seed_path / f'{ood_monitor_file}.pick') | ||||
|  | ||||
|  | ||||
|  | ||||
| def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): | ||||
| @@ -138,33 +138,31 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): | ||||
|             n_agents=n_agents, | ||||
|             done_at_collision=True, | ||||
|             **additional_kwargs_dict.get('post_training_kwargs', {})) | ||||
|     # Monitor Init | ||||
|     with MonitorCallback(filepath=seed_path / f'{ood_monitor_file}.pick') as monitor: | ||||
|         # Init Env | ||||
|         with env_to_run(**env_kwargs) as env_factory: | ||||
|             # Evaluation Loop for i in range(n Episodes) | ||||
|             for episode in range(50): | ||||
|                 env_state = env_factory.reset() | ||||
|                 rew, done_bool = 0, False | ||||
|                 while not done_bool: | ||||
|                     try: | ||||
|                         actions = [model.predict( | ||||
|                             np.stack([env_state[i][j] for i in range(env_state.shape[0])]), | ||||
|                             deterministic=True)[0] for j, model in enumerate(models)] | ||||
|                     except ValueError as e: | ||||
|                         print(e) | ||||
|                         print('Env_Kwargs are:\n') | ||||
|                         print(env_kwargs) | ||||
|                         print('Path is:\n') | ||||
|                         print(seed_path) | ||||
|                         exit() | ||||
|                     env_state, step_r, done_bool, info_obj = env_factory.step(actions) | ||||
|                     monitor.read_info(0, info_obj) | ||||
|                     rew += step_r | ||||
|                     if done_bool: | ||||
|                         monitor.read_done(0, done_bool) | ||||
|                         break | ||||
|                 print(f'Factory run {episode} done, reward is:\n    {rew}') | ||||
|     # Init Env | ||||
|     with env_to_run(**env_kwargs) as env_factory: | ||||
|         monitored_factory_env = EnvMonitor(env_factory) | ||||
|         # Evaluation Loop for i in range(n Episodes) | ||||
|         for episode in range(50): | ||||
|             env_state = monitored_factory_env.reset() | ||||
|             rew, done_bool = 0, False | ||||
|             while not done_bool: | ||||
|                 try: | ||||
|                     actions = [model.predict( | ||||
|                         np.stack([env_state[i][j] for i in range(env_state.shape[0])]), | ||||
|                         deterministic=True)[0] for j, model in enumerate(models)] | ||||
|                 except ValueError as e: | ||||
|                     print(e) | ||||
|                     print('Env_Kwargs are:\n') | ||||
|                     print(env_kwargs) | ||||
|                     print('Path is:\n') | ||||
|                     print(seed_path) | ||||
|                     exit() | ||||
|                 env_state, step_r, done_bool, info_obj = monitored_factory_env.step(actions) | ||||
|                 rew += step_r | ||||
|                 if done_bool: | ||||
|                     break | ||||
|             print(f'Factory run {episode} done, reward is:\n    {rew}') | ||||
|         monitored_factory_env.save_run(filepath=seed_path / f'{ood_monitor_file}.pick') | ||||
|     # Eval monitor outputs are automatically stored by the monitor object | ||||
|     del models, env_kwargs, env_factory | ||||
|     import gc | ||||
| @@ -174,27 +172,25 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): | ||||
| def start_mp_study_run(envs_map, policies_path): | ||||
|     paths = list(y for y in policies_path.iterdir() if y.is_dir() and not (y / f'{ood_monitor_file}.pick').exists()) | ||||
|     if paths: | ||||
|         import multiprocessing as mp | ||||
|         pool = mp.Pool(mp.cpu_count()) | ||||
|         print("Starting MP with: ", pool._processes, " Processes") | ||||
|         _ = pool.starmap(load_model_run_study, | ||||
|                          it.product(paths, | ||||
|                                     (envs_map[policies_path.parent.name][0],), | ||||
|                                     (observation_modes[policies_path.parent.parent.name],)) | ||||
|                          ) | ||||
|         with mp.get_context("spawn").Pool(mp.cpu_count()) as pool: | ||||
|             print("Starting MP with: ", pool._processes, " Processes") | ||||
|             _ = pool.starmap(load_model_run_study, | ||||
|                              it.product(paths, | ||||
|                                         (envs_map[policies_path.parent.name][0],), | ||||
|                                         (observation_modes[policies_path.parent.parent.name],)) | ||||
|                              ) | ||||
|  | ||||
|  | ||||
| def start_mp_baseline_run(envs_map, policies_path): | ||||
|     paths = list(y for y in policies_path.iterdir() if y.is_dir() and | ||||
|                  not (y / f'{baseline_monitor_file}.pick').exists()) | ||||
|     if paths: | ||||
|         import multiprocessing as mp | ||||
|         pool = mp.Pool(mp.cpu_count()) | ||||
|         print("Starting MP with: ", pool._processes, " Processes") | ||||
|         _ = pool.starmap(load_model_run_baseline, | ||||
|                          it.product(paths, | ||||
|                                     (envs_map[policies_path.parent.name][0],)) | ||||
|                          ) | ||||
|         with mp.get_context("spawn").Pool(mp.cpu_count()) as pool: | ||||
|             print("Starting MP with: ", pool._processes, " Processes") | ||||
|             _ = pool.starmap(load_model_run_baseline, | ||||
|                              it.product(paths, | ||||
|                                         (envs_map[policies_path.parent.name][0],)) | ||||
|                              ) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
| @@ -206,9 +202,10 @@ if __name__ == '__main__': | ||||
|  | ||||
|     train_steps = 5e6 | ||||
|     n_seeds = 3 | ||||
|     frames_to_stack = 3 | ||||
|  | ||||
|     # Define a global studi save path | ||||
|     start_time = 'exploring_obs_stack'  # int(time.time()) | ||||
|     start_time = 'obs_stack_3_gae_0.25_n_steps_16'  # int(time.time()) | ||||
|     study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' | ||||
|  | ||||
|     # Define Global Env Parameters | ||||
| @@ -216,7 +213,7 @@ if __name__ == '__main__': | ||||
|     obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, | ||||
|                                       omit_agent_self=True, | ||||
|                                       additional_agent_placeholder=None, | ||||
|                                       frames_to_stack=6, | ||||
|                                       frames_to_stack=frames_to_stack, | ||||
|                                       pomdp_r=2 | ||||
|                                       ) | ||||
|     move_props = MovementProperties(allow_diagonal_movement=True, | ||||
| @@ -234,7 +231,8 @@ if __name__ == '__main__': | ||||
|                           level_name='rooms', record_episodes=False, doors_have_area=True, | ||||
|                           verbose=False, | ||||
|                           mv_prop=move_props, | ||||
|                           obs_prop=obs_props | ||||
|                           obs_prop=obs_props, | ||||
|                           done_at_collision=True | ||||
|                           ) | ||||
|  | ||||
|     # Bundle both environments with global kwargs and parameters | ||||
| @@ -250,44 +248,45 @@ if __name__ == '__main__': | ||||
|  | ||||
|     # Define parameter versions according with #1,2[1,0,N],3 | ||||
|     observation_modes = {} | ||||
|     observation_modes.update({ | ||||
|         'seperate_1': dict( | ||||
|             post_training_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.COMBINED, | ||||
|                 additional_agent_placeholder=None, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ), | ||||
|             additional_env_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.NOT, | ||||
|                 additional_agent_placeholder=1, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ) | ||||
|         )}) | ||||
|     observation_modes.update({ | ||||
|         'seperate_0': dict( | ||||
|             post_training_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.COMBINED, | ||||
|                 additional_agent_placeholder=None, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ), | ||||
|             additional_env_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.NOT, | ||||
|                 additional_agent_placeholder=0, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ) | ||||
|         )}) | ||||
|     if False: | ||||
|         observation_modes.update({ | ||||
|             'seperate_1': dict( | ||||
|                 post_training_kwargs= | ||||
|                 dict(obs_prop=ObservationProperties( | ||||
|                     render_agents=AgentRenderOptions.COMBINED, | ||||
|                     additional_agent_placeholder=None, | ||||
|                     omit_agent_self=True, | ||||
|                     frames_to_stack=frames_to_stack, | ||||
|                     pomdp_r=2) | ||||
|                 ), | ||||
|                 additional_env_kwargs= | ||||
|                 dict(obs_prop=ObservationProperties( | ||||
|                     render_agents=AgentRenderOptions.NOT, | ||||
|                     additional_agent_placeholder=1, | ||||
|                     omit_agent_self=True, | ||||
|                     frames_to_stack=frames_to_stack, | ||||
|                     pomdp_r=2) | ||||
|                 ) | ||||
|             )}) | ||||
|         observation_modes.update({ | ||||
|             'seperate_0': dict( | ||||
|                 post_training_kwargs= | ||||
|                 dict(obs_prop=ObservationProperties( | ||||
|                     render_agents=AgentRenderOptions.COMBINED, | ||||
|                     additional_agent_placeholder=None, | ||||
|                     omit_agent_self=True, | ||||
|                     frames_to_stack=frames_to_stack, | ||||
|                     pomdp_r=2) | ||||
|                 ), | ||||
|                 additional_env_kwargs= | ||||
|                 dict(obs_prop=ObservationProperties( | ||||
|                     render_agents=AgentRenderOptions.NOT, | ||||
|                     additional_agent_placeholder=0, | ||||
|                     omit_agent_self=True, | ||||
|                     frames_to_stack=frames_to_stack, | ||||
|                     pomdp_r=2) | ||||
|                 ) | ||||
|             )}) | ||||
|     observation_modes.update({ | ||||
|         'seperate_N': dict( | ||||
|             post_training_kwargs= | ||||
| @@ -295,7 +294,7 @@ if __name__ == '__main__': | ||||
|                 render_agents=AgentRenderOptions.COMBINED, | ||||
|                 additional_agent_placeholder=None, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 frames_to_stack=frames_to_stack, | ||||
|                 pomdp_r=2) | ||||
|             ), | ||||
|             additional_env_kwargs= | ||||
| @@ -303,7 +302,7 @@ if __name__ == '__main__': | ||||
|                 render_agents=AgentRenderOptions.NOT, | ||||
|                 additional_agent_placeholder='N', | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 frames_to_stack=frames_to_stack, | ||||
|                 pomdp_r=2) | ||||
|             ) | ||||
|         )}) | ||||
| @@ -314,7 +313,7 @@ if __name__ == '__main__': | ||||
|                 render_agents=AgentRenderOptions.LEVEL, | ||||
|                 omit_agent_self=True, | ||||
|                 additional_agent_placeholder=None, | ||||
|                 frames_to_stack=3, | ||||
|                 frames_to_stack=frames_to_stack, | ||||
|                 pomdp_r=2) | ||||
|             ) | ||||
|         )}) | ||||
| @@ -326,7 +325,7 @@ if __name__ == '__main__': | ||||
|                 render_agents=AgentRenderOptions.NOT, | ||||
|                 additional_agent_placeholder=None, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 frames_to_stack=frames_to_stack, | ||||
|                 pomdp_r=2) | ||||
|             ) | ||||
|         ) | ||||
| @@ -355,9 +354,6 @@ if __name__ == '__main__': | ||||
|                             continue | ||||
|                         seed_path.mkdir(parents=True, exist_ok=True) | ||||
|  | ||||
|                         # Monitor Init | ||||
|                         callbacks = [MonitorCallback(seed_path / 'monitor.pick')] | ||||
|  | ||||
|                         # Env Init & Model kwargs definition | ||||
|                         if model_cls.__name__ in ["PPO", "A2C"]: | ||||
|                             # env_factory = env_class(**env_kwargs) | ||||
| @@ -378,6 +374,9 @@ if __name__ == '__main__': | ||||
|                         except AttributeError: | ||||
|                             env_factory.save_params(param_path) | ||||
|  | ||||
|                         # EnvMonitor Init | ||||
|                         callbacks = [EnvMonitor(env_factory)] | ||||
|  | ||||
|                         # Model Init | ||||
|                         model = model_cls("MlpPolicy", env_factory, | ||||
|                                           verbose=1, seed=seed, device='cpu', | ||||
| @@ -390,6 +389,9 @@ if __name__ == '__main__': | ||||
|                         save_path = seed_path / f'model.zip' | ||||
|                         model.save(save_path) | ||||
|  | ||||
|                         # Monitor Save | ||||
|                         callbacks[0].save_run(seed_path / 'monitor.pick') | ||||
|  | ||||
|                         # Better be save then sorry: Clean up! | ||||
|                         del env_factory, model | ||||
|                         import gc | ||||
| @@ -500,13 +502,14 @@ if __name__ == '__main__': | ||||
|                     df['failed_cleanup'] = df.loc[:, df.columns.str.contains("]_failed_dirt_cleanup")].sum(1) | ||||
|                     df['coll_lvl'] = df.loc[:, df.columns.str.contains("]_vs_LEVEL")].sum(1) | ||||
|                     df['coll_agent'] = df.loc[:, df.columns.str.contains("]_vs_Agent")].sum(1) / 2 | ||||
|                     # df['collisions'] = df['coll_lvl'] + df['coll_agent'] | ||||
|                     # df['`collis`ions'] = df['coll_lvl'] + df['coll_agent'] | ||||
|  | ||||
|                     value_vars = ['pick_up', 'drop_off', 'failed_item_action', 'failed_cleanup', | ||||
|                                   'coll_lvl', 'coll_agent', 'dirt_cleaned'] | ||||
|  | ||||
|                 df_grouped = df.groupby(id_cols + ['seed'] | ||||
|                                         ).agg({key: 'sum' if "Agent" in key else 'mean' for key in df.columns | ||||
|                                         # 'sum' if "agent" in key else 'mean' | ||||
|                                         ).agg({key: 'sum' for key in df.columns | ||||
|                                                if key not in (id_cols + ['seed'])}) | ||||
|                 df_melted = df_grouped.reset_index().melt(id_vars=id_cols, | ||||
|                                                           value_vars=value_vars,  # 'step_reward', | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Steffen Illium
					Steffen Illium