From b5c6105b7b215e1da496d4ab06867a25e9ff4b71 Mon Sep 17 00:00:00 2001 From: Steffen Illium Date: Wed, 27 Oct 2021 18:47:57 +0200 Subject: [PATCH] new dirt paradigm -> clean everything up --- environments/factory/base/base_factory.py | 47 +++++++--- environments/factory/factory_dirt.py | 59 ++++++++----- environments/factory/factory_dirt_item.py | 6 +- environments/factory/factory_item.py | 19 ++-- environments/logging/recorder.py | 3 +- plotting/compare_runs.py | 15 ++-- plotting/plotting.py | 42 ++++++--- reload_agent.py | 31 ++++--- studies/e_1.py | 102 ++++++++++++++-------- 9 files changed, 210 insertions(+), 114 deletions(-) diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py index f0cdf16..ac0be21 100644 --- a/environments/factory/base/base_factory.py +++ b/environments/factory/base/base_factory.py @@ -1,5 +1,6 @@ import abc import time +from collections import defaultdict from enum import Enum from pathlib import Path from typing import List, Union, Iterable, Dict @@ -230,8 +231,9 @@ class BaseFactory(gym.Env): del this_collisions[i] guest.temp_collisions = this_collisions - if self.done_at_collision and tiles_with_collisions: - done = True + done = self.done_at_collision and tiles_with_collisions + + done = done or self.check_additional_done() # Step the door close intervall if self.parse_doors: @@ -440,48 +442,61 @@ class BaseFactory(gym.Env): def calculate_reward(self) -> (int, dict): # Returns: Reward, Info - info_dict = dict() + per_agent_info_dict = defaultdict(dict) reward = 0 for agent in self[c.AGENT]: if self._actions.is_moving_action(agent.temp_action): if agent.temp_valid: # info_dict.update(movement=1) - # info_dict.update({f'{agent.name}_failed_action': 1}) # reward += 0.00 pass else: - # self.print('collision') reward -= 0.01 self.print(f'{agent.name} just hit the wall at {agent.pos}.') - info_dict.update({f'{agent.name}_vs_LEVEL': 1}) + per_agent_info_dict[agent.name].update({f'{agent.name}_vs_LEVEL': 1}) elif h.EnvActions.USE_DOOR == agent.temp_action: if agent.temp_valid: # reward += 0.00 self.print(f'{agent.name} did just use the door at {agent.pos}.') - info_dict.update(door_used=1) + per_agent_info_dict[agent.name].update(door_used=1) else: # reward -= 0.00 self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.') - info_dict.update({f'{agent.name}_failed_action': 1}) - info_dict.update({f'{agent.name}_failed_door_open': 1}) + per_agent_info_dict[agent.name].update({f'{agent.name}_failed_door_open': 1}) elif h.EnvActions.NOOP == agent.temp_action: - info_dict.update(no_op=1) + per_agent_info_dict[agent.name].update(no_op=1) # reward -= 0.00 + # Monitor Notes + if agent.temp_valid: + per_agent_info_dict[agent.name].update(valid_action=1) + per_agent_info_dict[agent.name].update({f'{agent.name}_valid_action': 1}) + else: + per_agent_info_dict[agent.name].update(failed_action=1) + per_agent_info_dict[agent.name].update({f'{agent.name}_failed_action': 1}) + additional_reward, additional_info_dict = self.calculate_additional_reward(agent) reward += additional_reward - info_dict.update(additional_info_dict) + per_agent_info_dict[agent.name].update(additional_info_dict) if agent.temp_collisions: self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}') + per_agent_info_dict[agent.name].update(collisions=1) - for other_agent in agent.temp_collisions: - info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1}) + for other_agent in agent.temp_collisions: + per_agent_info_dict[agent.name].update({f'{agent.name}_vs_{other_agent.name}': 1}) + + # Combine the per_agent_info_dict: + combined_info_dict = defaultdict(lambda: 0) + for info_dict in per_agent_info_dict.values(): + for key, value in info_dict.items(): + combined_info_dict[key] += value + combined_info_dict = dict(combined_info_dict) self.print(f"reward is {reward}") - return reward, info_dict + return reward, combined_info_dict def render(self, mode='human'): if not self._renderer: # lazy init @@ -565,6 +580,10 @@ class BaseFactory(gym.Env): def do_additional_actions(self, agent: Agent, action: Action) -> Union[None, c]: return None + @abc.abstractmethod + def check_additional_done(self) -> bool: + return False + @abc.abstractmethod def calculate_additional_reward(self, agent: Agent) -> (int, dict): return 0, {} diff --git a/environments/factory/factory_dirt.py b/environments/factory/factory_dirt.py index fdecb01..15a7f3d 100644 --- a/environments/factory/factory_dirt.py +++ b/environments/factory/factory_dirt.py @@ -20,14 +20,17 @@ CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP class DirtProperties(NamedTuple): - clean_amount: int = 1 # How much does the robot clean with one actions. - max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent. - gain_amount: float = 0.3 # How much dirt does spawn per tile. - spawn_frequency: int = 5 # Spawn Frequency in Steps. - max_local_amount: int = 2 # Max dirt amount per tile. - max_global_amount: int = 20 # Max dirt amount in the whole environment. - dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place. - agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment. + initial_dirt_ratio: float = 0.3 # On INIT, on max how much tiles does the dirt spawn in percent. + initial_dirt_spawn_r_var: float = 0.05 # How much does the dirt spawn amount vary? + clean_amount: float = 1 # How much does the robot clean with one actions. + max_spawn_ratio: float = 0.20 # On max how much tiles does the dirt spawn in percent. + max_spawn_amount: float = 0.3 # How much dirt does spawn per tile at max. + spawn_frequency: int = 0 # Spawn Frequency in Steps. + max_local_amount: int = 2 # Max dirt amount per tile. + max_global_amount: int = 20 # Max dirt amount in the whole environment. + dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place. + agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment. + done_when_clean = True class Dirt(Entity): @@ -91,10 +94,10 @@ class DirtRegister(MovingEntityObjectRegister): if not self.amount > self.dirt_properties.max_global_amount: dirt = self.by_pos(tile.pos) if dirt is None: - dirt = Dirt(tile, amount=self.dirt_properties.gain_amount) + dirt = Dirt(tile, amount=self.dirt_properties.max_spawn_amount) self.register_item(dirt) else: - new_value = dirt.amount + self.dirt_properties.gain_amount + new_value = dirt.amount + self.dirt_properties.max_spawn_amount dirt.set_new_amount(min(new_value, self.dirt_properties.max_local_amount)) else: return c.NOT_VALID @@ -160,12 +163,17 @@ class DirtFactory(BaseFactory): else: return c.NOT_VALID - def trigger_dirt_spawn(self): + def trigger_dirt_spawn(self, initial_spawn=False): + dirt_rng = self._dirt_rng free_for_dirt = [x for x in self[c.FLOOR] if len(x.guests) == 0 or (len(x.guests) == 1 and isinstance(next(y for y in x.guests), Dirt)) ] self._dirt_rng.shuffle(free_for_dirt) - new_spawn = self._dirt_rng.uniform(0, self.dirt_properties.max_spawn_ratio) + if initial_spawn: + var = self.dirt_properties.initial_dirt_spawn_r_var + new_spawn = self.dirt_properties.initial_dirt_ratio + dirt_rng.uniform(-var, var) + else: + new_spawn = dirt_rng.uniform(0, self.dirt_properties.max_spawn_ratio) n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt))) self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles]) @@ -184,8 +192,9 @@ class DirtFactory(BaseFactory): if self[c.DIRT].spawn_dirt(agent.tile): new_pos_dirt = self[c.DIRT].by_pos(agent.pos) new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt)) - - if not self._next_dirt_spawn: + if self._next_dirt_spawn < 0: + pass # No Dirt Spawn + elif not self._next_dirt_spawn: self.trigger_dirt_spawn() self._next_dirt_spawn = self.dirt_properties.spawn_frequency else: @@ -208,8 +217,13 @@ class DirtFactory(BaseFactory): def do_additional_reset(self) -> None: super().do_additional_reset() - self.trigger_dirt_spawn() - self._next_dirt_spawn = self.dirt_properties.spawn_frequency + self.trigger_dirt_spawn(initial_spawn=True) + self._next_dirt_spawn = self.dirt_properties.spawn_frequency if self.dirt_properties.spawn_frequency else -1 + + def check_additional_done(self): + super_done = super().check_additional_done() + done = self.dirt_properties.done_when_clean and (len(self[c.DIRT]) == 0) + return super_done or done def calculate_additional_reward(self, agent: Agent) -> (int, dict): reward, info_dict = super().calculate_additional_reward(agent) @@ -233,9 +247,8 @@ class DirtFactory(BaseFactory): else: reward -= 0.01 self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.') - info_dict.update({f'{agent.name}_failed_action': 1}) - info_dict.update({f'{agent.name}_failed_action': 1}) info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1}) + info_dict.update(failed_dirt_clean=1) # Potential based rewards -> # track the last reward , minus the current reward = potential @@ -243,12 +256,12 @@ class DirtFactory(BaseFactory): if __name__ == '__main__': - render = False + render = True dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0.0) move_props = {'allow_square_movement': True, - 'allow_diagonal_movement': False, - 'allow_no_op': False} #MovementProperties(True, True, False) + 'allow_diagonal_movement': False, + 'allow_no_op': False} #MovementProperties(True, True, False) with RecorderCallback(filepath=Path('debug_out') / f'recorder_xxxx.json', occupation_map=False, trajectory_map=False) as recorder: @@ -272,12 +285,12 @@ if __name__ == '__main__': r = 0 for agent_i_action in random_actions: env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) - recorder.read_info(0, info_obj) + #recorder.read_info(0, info_obj) r += step_r if render: factory.render() if done_bool: - recorder.read_done(0, done_bool) + # recorder.read_done(0, done_bool) break print(f'Factory run {epoch} done, reward is:\n {r}') pass diff --git a/environments/factory/factory_dirt_item.py b/environments/factory/factory_dirt_item.py index 077f07c..04752ea 100644 --- a/environments/factory/factory_dirt_item.py +++ b/environments/factory/factory_dirt_item.py @@ -25,7 +25,7 @@ if __name__ == '__main__': allow_square_movement=True, allow_no_op=False) - render = False + render = True factory = DirtItemFactory(n_agents=1, done_at_collision=False, frames_to_stack=0, level_name='rooms', max_steps=200, combin_agent_obs=True, @@ -46,12 +46,12 @@ if __name__ == '__main__': r = 0 for agent_i_action in random_actions: env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) - recorder.read_info(0, info_obj) + # recorder.read_info(0, info_obj) r += step_r if render: factory.render() if done_bool: - recorder.read_done(0, done_bool) + # recorder.read_done(0, done_bool) break print(f'Factory run {epoch} done, reward is:\n {r}') pass diff --git a/environments/factory/factory_item.py b/environments/factory/factory_item.py index f280c40..18fd4a5 100644 --- a/environments/factory/factory_item.py +++ b/environments/factory/factory_item.py @@ -318,17 +318,26 @@ class ItemFactory(BaseFactory): if h.EnvActions.ITEM_ACTION == agent.temp_action: if agent.temp_valid: if drop_off := self[c.DROP_OFF].by_pos(agent.pos): - info_dict.update({f'{agent.name}_item_dropoff': 1}) + info_dict.update({f'{agent.name}_item_drop_off': 1}) + info_dict.update(item_drop_off=1) self.print(f'{agent.name} just dropped of an item at {drop_off.pos}.') reward += 0.5 else: info_dict.update({f'{agent.name}_item_pickup': 1}) + info_dict.update(item_pickup=1) self.print(f'{agent.name} just picked up an item at {agent.pos}') reward += 0.1 else: - info_dict.update({f'{agent.name}_failed_item_action': 1}) - self.print(f'{agent.name} just tried to pick up an item at {agent.pos}, but failed.') - reward -= 0.1 + if self[c.DROP_OFF].by_pos(agent.pos): + info_dict.update({f'{agent.name}_failed_drop_off': 1}) + info_dict.update(failed_drop_off=1) + self.print(f'{agent.name} just tried to drop off at {agent.pos}, but failed.') + reward -= 0.1 + else: + info_dict.update({f'{agent.name}_failed_item_action': 1}) + info_dict.update(failed_pick_up=1) + self.print(f'{agent.name} just tried to pick up an item at {agent.pos}, but failed.') + reward -= 0.1 return reward, info_dict def render_additional_assets(self, mode='human'): @@ -343,7 +352,7 @@ class ItemFactory(BaseFactory): if __name__ == '__main__': import random - render = False + render = True item_props = ItemProperties() diff --git a/environments/logging/recorder.py b/environments/logging/recorder.py index bca4a8a..c52f91d 100644 --- a/environments/logging/recorder.py +++ b/environments/logging/recorder.py @@ -33,7 +33,8 @@ class RecorderCallback(BaseCallback): def read_done(self, env_idx, done): if done: - self._recorder_out_list.append({'steps': self._recorder_dict[env_idx]}) + self._recorder_out_list.append({'steps': self._recorder_dict[env_idx], + 'episode': len(self._recorder_out_list)}) self._recorder_dict[env_idx] = list() else: pass diff --git a/plotting/compare_runs.py b/plotting/compare_runs.py index 3af498b..5eb408b 100644 --- a/plotting/compare_runs.py +++ b/plotting/compare_runs.py @@ -10,7 +10,7 @@ from environments.helpers import IGNORED_DF_COLUMNS, MODEL_MAP from plotting.plotting import prepare_plot -def compare_seed_runs(run_path: Union[str, PathLike]): +def compare_seed_runs(run_path: Union[str, PathLike], use_tex: bool = False): run_path = Path(run_path) df_list = list() for run, monitor_file in enumerate(run_path.rglob('monitor*.pick')): @@ -37,11 +37,12 @@ def compare_seed_runs(run_path: Union[str, PathLike]): skip_n = round(df_melted['Episode'].max() * 0.02) df_melted = df_melted[df_melted['Episode'] % skip_n == 0] - prepare_plot(run_path / f'{run_path.name}_monitor_lineplot.png', df_melted) + prepare_plot(run_path / f'{run_path.name}_monitor_lineplot.png', df_melted, use_tex=use_tex) print('Plotting done.') -def compare_model_runs(run_path: Path, run_identifier: Union[str, int], parameter: Union[str, List[str]]): +def compare_model_runs(run_path: Path, run_identifier: Union[str, int], parameter: Union[str, List[str]], + use_tex: bool = False): run_path = Path(run_path) df_list = list() parameter = [parameter] if isinstance(parameter, str) else parameter @@ -75,12 +76,13 @@ def compare_model_runs(run_path: Path, run_identifier: Union[str, int], paramete df_melted = df_melted[df_melted['Episode'] % skip_n == 0] style = 'Measurement' if len(columns) > 1 else None - prepare_plot(run_path / f'{run_identifier}_compare_{parameter}.png', df_melted, hue='Model', style=style) + prepare_plot(run_path / f'{run_identifier}_compare_{parameter}.png', df_melted, hue='Model', style=style, + use_tex=use_tex) print('Plotting done.') def compare_all_parameter_runs(run_root_path: Path, parameter: Union[str, List[str]], - param_names: Union[List[str], None] = None, str_to_ignore=''): + param_names: Union[List[str], None] = None, str_to_ignore='', use_tex: bool = False): run_root_path = Path(run_root_path) df_list = list() parameter = [parameter] if isinstance(parameter, str) else parameter @@ -151,5 +153,6 @@ def compare_all_parameter_runs(run_root_path: Path, parameter: Union[str, List[s value_name="Score") style = 'Measurement' if len(columns) > 1 else None - prepare_plot(run_root_path / f'compare_{parameter}.png', df_melted, hue='Parameter Combination', style=style) + prepare_plot(run_root_path / f'compare_{parameter}.png', df_melted, hue='Parameter Combination', + style=style, use_tex=use_tex) print('Plotting done.') diff --git a/plotting/plotting.py b/plotting/plotting.py index b93bab1..4f0fba0 100644 --- a/plotting/plotting.py +++ b/plotting/plotting.py @@ -26,21 +26,35 @@ def plot(filepath, ext='png'): plt.clf() -def prepare_plot(filepath, results_df, ext='png', hue='Measurement', style=None): +def prepare_tex(df, hue, style, hue_order): + sns.set(rc={'text.usetex': True}, style='whitegrid') + lineplot = sns.lineplot(data=df, x='Episode', y='Score', ci=95, palette=PALETTE, + hue_order=hue_order, hue=hue, style=style) + # lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}') + return lineplot + + +def prepare_plt(df, hue, style, hue_order): + print('Struggling to plot Figure using LaTeX - going back to normal.') + plt.close('all') + sns.set(rc={'text.usetex': False}, style='whitegrid') + lineplot = sns.lineplot(data=df, x='Episode', y='Score', hue=hue, style=style, + ci=95, palette=PALETTE, hue_order=hue_order) + # lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}') + return lineplot + + +def prepare_plot(filepath, results_df, ext='png', hue='Measurement', style=None, use_tex=False): df = results_df.copy() df[hue] = df[hue].str.replace('_', '-') hue_order = sorted(list(df[hue].unique())) - try: - sns.set(rc={'text.usetex': True}, style='whitegrid') - lineplot = sns.lineplot(data=df, x='Episode', y='Score', ci=95, palette=PALETTE, - hue_order=hue_order, hue=hue, style=style) - # lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}') - plot(filepath, ext=ext) # plot raises errors not lineplot! - except (FileNotFoundError, RuntimeError): - print('Struggling to plot Figure using LaTeX - going back to normal.') - plt.close('all') - sns.set(rc={'text.usetex': False}, style='whitegrid') - lineplot = sns.lineplot(data=df, x='Episode', y='Score', hue=hue, style=style, - ci=95, palette=PALETTE, hue_order=hue_order) - # lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}') + if use_tex: + try: + _ = prepare_tex(df, hue, style, hue_order) + plot(filepath, ext=ext) # plot raises errors not lineplot! + except (FileNotFoundError, RuntimeError): + _ = prepare_plt(df, hue, style, hue_order) + plot(filepath, ext=ext) + else: + _ = prepare_plt(df, hue, style, hue_order) plot(filepath, ext=ext) diff --git a/reload_agent.py b/reload_agent.py index 8efe683..9f9342b 100644 --- a/reload_agent.py +++ b/reload_agent.py @@ -1,6 +1,7 @@ import warnings from pathlib import Path +import numpy as np import yaml from environments import helpers as h @@ -14,36 +15,42 @@ warnings.filterwarnings('ignore', category=UserWarning) if __name__ == '__main__': - model_name = 'PPO_1631187073' + model_name = 'DQN_1631187073' run_id = 0 seed = 69 - out_path = Path(__file__).parent / 'study_out' / 'e_1_1631709932' / 'no_obs' / 'dirt' / 'A2C_1631709932' / '0_A2C_1631709932' - model_path = out_path / model_name + out_path = Path('debug_out/DQN_1635176929/0_DQN_1635176929') + model_path = out_path with (out_path / f'env_params.json').open('r') as f: env_kwargs = yaml.load(f, Loader=yaml.FullLoader) - env_kwargs.update(additional_agent_placeholder=None) - # env_kwargs.update(verbose=False, env_seed=seed, record_episodes=True, parse_doors=True) + env_kwargs.update(additional_agent_placeholder=None, n_agents=4) + if gain_amount := env_kwargs.get('dirt_properties', {}).get('gain_amount', None): + env_kwargs['dirt_properties']['max_spawn_amount'] = gain_amount + del env_kwargs['dirt_properties']['gain_amount'] + + env_kwargs.update(record_episodes=True) this_model = out_path / 'model.zip' model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name) - model = model_cls.load(this_model) + models = [model_cls.load(this_model) for _ in range(4)] - with RecorderCallback(filepath=Path() / 'recorder_out_doors.json') as recorder: + with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json') as recorder: # Init Env - with DirtFactory(**env_kwargs) as env: + with DirtItemFactory(**env_kwargs) as env: obs_shape = env.observation_space.shape # Evaluation Loop for i in range(n Episodes) for episode in range(5): - obs = env.reset() + env_state = env.reset() rew, done_bool = 0, False while not done_bool: - action = model.predict(obs, deterministic=False)[0] - env_state, step_r, done_bool, info_obj = env.step(action[0]) + actions = [model.predict( + np.stack([env_state[i][j] for i in range(env_state.shape[0])]), + deterministic=True)[0] for j, model in enumerate(models)] + env_state, step_r, done_bool, info_obj = env.step(actions) recorder.read_info(0, info_obj) rew += step_r - env.render() + # env.render() if done_bool: recorder.read_done(0, done_bool) break diff --git a/studies/e_1.py b/studies/e_1.py index 221d0e1..5acc40e 100644 --- a/studies/e_1.py +++ b/studies/e_1.py @@ -33,7 +33,7 @@ import pandas as pd import seaborn as sns # Define a global studi save path -start_time = 1634134997 # int(time.time()) +start_time = 163519000 # int(time.time()) study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' """ @@ -70,7 +70,7 @@ There are further distinctions to be made: def policy_model_kwargs(): - return dict(ent_coef=0.01) + return dict(ent_coef=0.05) def dqn_model_kwargs(): @@ -93,21 +93,23 @@ def encapsule_env_factory(env_fctry, env_kwrgs): if __name__ == '__main__': - train_steps = 5e5 + train_steps = 8e5 # Define Global Env Parameters # Define properties object parameters move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True, allow_no_op=False) - dirt_props = DirtProperties(clean_amount=2, gain_amount=0.1, max_global_amount=20, - max_local_amount=1, spawn_frequency=15, max_spawn_ratio=0.05, + dirt_props = DirtProperties(initial_dirt_ratio=0.35, initial_dirt_spawn_r_var=0.1, + clean_amount=0.34, + max_spawn_amount=0.1, max_global_amount=20, + max_local_amount=1, spawn_frequency=0, max_spawn_ratio=0.05, dirt_smear_amount=0.0, agent_can_interact=True) item_props = ItemProperties(n_items=10, agent_can_interact=True, spawn_frequency=30, n_drop_off_locations=2, max_agent_inventory_capacity=15) factory_kwargs = dict(n_agents=1, - pomdp_r=2, max_steps=400, parse_doors=False, + pomdp_r=2, max_steps=400, parse_doors=True, level_name='rooms', frames_to_stack=3, omit_agent_in_obs=True, combin_agent_obs=True, record_episodes=False, cast_shadows=True, doors_have_area=False, verbose=False, @@ -124,9 +126,9 @@ if __name__ == '__main__': # Define parameter versions according with #1,2[1,0,N],3 observation_modes = { # Fill-value = 0 - 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)), + # DEACTIVATED 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)), # Fill-value = 1 - 'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)), + # DEACTIVATED 'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)), # Fill-value = N(0, 1) 'seperate_N': dict(additional_env_kwargs=dict(additional_agent_placeholder='N')), # Further Adjustments are done post-training @@ -137,10 +139,10 @@ if __name__ == '__main__': # Train starts here ############################################################ # Build Major Loop parameters, parameter versions, Env Classes and models - if False: + if True: for observation_mode in observation_modes.keys(): for env_name in env_names: - for model_cls in h.MODEL_MAP.values(): + for model_cls in [h.MODEL_MAP['A2C'], h.MODEL_MAP['DQN']]: # Create an identifier, which is unique for every combination and easy to read in filesystem identifier = f'{model_cls.__name__}_{start_time}' # Train each combination per seed @@ -154,6 +156,8 @@ if __name__ == '__main__': env_kwargs.update(env_seed=seed) # Output folder seed_path = combination_path / f'{str(seed)}_{identifier}' + if (seed_path / 'monitor.pick').exists(): + continue seed_path.mkdir(parents=True, exist_ok=True) # Monitor Init @@ -163,7 +167,7 @@ if __name__ == '__main__': if model_cls.__name__ in ["PPO", "A2C"]: # env_factory = env_class(**env_kwargs) env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs) - for _ in range(1)], start_method="spawn") + for _ in range(6)], start_method="spawn") model_kwargs = policy_model_kwargs() elif model_cls.__name__ in ["RegDQN", "DQN", "QRDQN"]: @@ -197,15 +201,20 @@ if __name__ == '__main__': gc.collect() # Compare performance runs, for each seed within a model - compare_seed_runs(combination_path) + compare_seed_runs(combination_path, use_tex=False) # Better be save then sorry: Clean up! - del model_kwargs, env_kwargs - import gc - gc.collect() + try: + del env_kwargs + del model_kwargs + import gc + gc.collect() + except NameError: + pass # Compare performance runs, for each model # FIXME: Check THIS!!!! - compare_model_runs(study_root_path / observation_mode / env_name, f'{start_time}', 'step_reward') + compare_model_runs(study_root_path / observation_mode / env_name, f'{start_time}', 'step_reward', + use_tex=False) pass pass pass @@ -215,7 +224,7 @@ if __name__ == '__main__': # Evaluation starts here ##################################################### # First Iterate over every model and monitor "as trained" baseline_monitor_file = 'e_1_baseline_monitor.pick' - if False: + if True: render = False for observation_mode in observation_modes: obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode) @@ -312,8 +321,9 @@ if __name__ == '__main__': # Plotting if True: # TODO: Plotting - df_list = list() + for observation_folder in (x for x in study_root_path.iterdir() if x.is_dir()): + df_list = list() for env_folder in (x for x in observation_folder.iterdir() if x.is_dir()): for model_folder in (x for x in env_folder.iterdir() if x.is_dir()): # Gather per seed results in this list @@ -334,28 +344,48 @@ if __name__ == '__main__': monitor_df['obs_mode'] = monitor_df['obs_mode'].astype(str) monitor_df['model'] = model_folder.name.split('_')[0] - df_list.append(monitor_df) - id_cols = ['monitor', 'env', 'obs_mode', 'model'] + id_cols = ['monitor', 'env', 'obs_mode', 'model'] - df = pd.concat(df_list, ignore_index=True) - df = df.fillna(0) + df = pd.concat(df_list, ignore_index=True) + df = df.fillna(0) - for id_col in id_cols: - df[id_col] = df[id_col].astype(str) + for id_col in id_cols: + df[id_col] = df[id_col].astype(str) - df_grouped = df.groupby(id_cols + ['seed'] - ).agg({key: 'sum' if "Agent" in key else 'mean' for key in df.columns - if key not in (id_cols + ['seed'])}) - df_melted = df_grouped.reset_index().melt(id_vars=id_cols, - value_vars='step_reward', var_name="Measurement", - value_name="Score") + if True: + # df['fail_sum'] = df.loc[:, df.columns.str.contains("failed")].sum(1) + df['pick_up'] = df.loc[:, df.columns.str.contains("]_item_pickup")].sum(1) + df['drop_off'] = df.loc[:, df.columns.str.contains("]_item_dropoff")].sum(1) + df['failed_item_action'] = df.loc[:, df.columns.str.contains("]_failed_item_action")].sum(1) + df['failed_cleanup'] = df.loc[:, df.columns.str.contains("]_failed_dirt_cleanup")].sum(1) + df['coll_lvl'] = df.loc[:, df.columns.str.contains("]_vs_LEVEL")].sum(1) + df['coll_agent'] = df.loc[:, df.columns.str.contains("]_vs_Agent")].sum(1) / 2 + # df['collisions'] = df['coll_lvl'] + df['coll_agent'] - c = sns.catplot(data=df_melted, x='obs_mode', hue='monitor', row='model', col='env', y='Score', sharey=False, - kind="box", height=4, aspect=.7, legend_out=True) - c.set_xticklabels(rotation=65, horizontalalignment='right') - plt.tight_layout(pad=2) - plt.savefig(study_root_path / f'results_{n_agents}_agents.png') + value_vars = ['pick_up', 'drop_off', 'failed_item_action', 'failed_cleanup', + 'coll_lvl', 'coll_agent', 'dirt_cleaned'] - pass + df_grouped = df.groupby(id_cols + ['seed'] + ).agg({key: 'sum' if "Agent" in key else 'mean' for key in df.columns + if key not in (id_cols + ['seed'])}) + df_melted = df_grouped.reset_index().melt(id_vars=id_cols, + value_vars=value_vars, # 'step_reward', + var_name="Measurement", + value_name="Score") + # df_melted["Measurements"] = df_melted["Measurement"] + " " + df_melted["monitor"] + + # Plotting + fig, ax = plt.subplots(figsize=(11.7, 8.27)) + + c = sns.catplot(data=df_melted[df_melted['obs_mode'] == observation_folder.name], + x='Measurement', hue='monitor', row='model', col='env', y='Score', + sharey=False, kind="box", height=4, aspect=.7, legend_out=True, + showfliers=False) + c.set_xticklabels(rotation=65, horizontalalignment='right') + c.fig.subplots_adjust(top=0.9) # adjust the Figure in rp + c.fig.suptitle(f"Cat plot for {observation_folder.name}") + plt.tight_layout(pad=2) + plt.savefig(study_root_path / f'results_{n_agents}_agents_{observation_folder.name}.png') + pass