mirror of
				https://github.com/illiumst/marl-factory-grid.git
				synced 2025-10-31 12:37:27 +01:00 
			
		
		
		
	new dirt paradigm -> clean everything up
This commit is contained in:
		| @@ -1,5 +1,6 @@ | ||||
| import abc | ||||
| import time | ||||
| from collections import defaultdict | ||||
| from enum import Enum | ||||
| from pathlib import Path | ||||
| from typing import List, Union, Iterable, Dict | ||||
| @@ -230,8 +231,9 @@ class BaseFactory(gym.Env): | ||||
|                 del this_collisions[i] | ||||
|                 guest.temp_collisions = this_collisions | ||||
|  | ||||
|         if self.done_at_collision and tiles_with_collisions: | ||||
|             done = True | ||||
|         done = self.done_at_collision and tiles_with_collisions | ||||
|  | ||||
|         done = done or self.check_additional_done() | ||||
|  | ||||
|         # Step the door close intervall | ||||
|         if self.parse_doors: | ||||
| @@ -440,48 +442,61 @@ class BaseFactory(gym.Env): | ||||
|  | ||||
|     def calculate_reward(self) -> (int, dict): | ||||
|         # Returns: Reward, Info | ||||
|         info_dict = dict() | ||||
|         per_agent_info_dict = defaultdict(dict) | ||||
|         reward = 0 | ||||
|  | ||||
|         for agent in self[c.AGENT]: | ||||
|             if self._actions.is_moving_action(agent.temp_action): | ||||
|                 if agent.temp_valid: | ||||
|                     # info_dict.update(movement=1) | ||||
|                     # info_dict.update({f'{agent.name}_failed_action': 1}) | ||||
|                     # reward += 0.00 | ||||
|                     pass | ||||
|                 else: | ||||
|                     # self.print('collision') | ||||
|                     reward -= 0.01 | ||||
|                     self.print(f'{agent.name} just hit the wall at {agent.pos}.') | ||||
|                     info_dict.update({f'{agent.name}_vs_LEVEL': 1}) | ||||
|                     per_agent_info_dict[agent.name].update({f'{agent.name}_vs_LEVEL': 1}) | ||||
|  | ||||
|             elif h.EnvActions.USE_DOOR == agent.temp_action: | ||||
|                 if agent.temp_valid: | ||||
|                     # reward += 0.00 | ||||
|                     self.print(f'{agent.name} did just use the door at {agent.pos}.') | ||||
|                     info_dict.update(door_used=1) | ||||
|                     per_agent_info_dict[agent.name].update(door_used=1) | ||||
|                 else: | ||||
|                     # reward -= 0.00 | ||||
|                     self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.') | ||||
|                     info_dict.update({f'{agent.name}_failed_action': 1}) | ||||
|                     info_dict.update({f'{agent.name}_failed_door_open': 1}) | ||||
|                     per_agent_info_dict[agent.name].update({f'{agent.name}_failed_door_open': 1}) | ||||
|             elif h.EnvActions.NOOP == agent.temp_action: | ||||
|                 info_dict.update(no_op=1) | ||||
|                 per_agent_info_dict[agent.name].update(no_op=1) | ||||
|                 # reward -= 0.00 | ||||
|  | ||||
|             # Monitor Notes | ||||
|             if agent.temp_valid: | ||||
|                 per_agent_info_dict[agent.name].update(valid_action=1) | ||||
|                 per_agent_info_dict[agent.name].update({f'{agent.name}_valid_action': 1}) | ||||
|             else: | ||||
|                 per_agent_info_dict[agent.name].update(failed_action=1) | ||||
|                 per_agent_info_dict[agent.name].update({f'{agent.name}_failed_action': 1}) | ||||
|  | ||||
|             additional_reward, additional_info_dict = self.calculate_additional_reward(agent) | ||||
|             reward += additional_reward | ||||
|             info_dict.update(additional_info_dict) | ||||
|             per_agent_info_dict[agent.name].update(additional_info_dict) | ||||
|  | ||||
|             if agent.temp_collisions: | ||||
|                 self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}') | ||||
|                 per_agent_info_dict[agent.name].update(collisions=1) | ||||
|  | ||||
|             for other_agent in agent.temp_collisions: | ||||
|                 info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1}) | ||||
|                 for other_agent in agent.temp_collisions: | ||||
|                     per_agent_info_dict[agent.name].update({f'{agent.name}_vs_{other_agent.name}': 1}) | ||||
|  | ||||
|         # Combine the per_agent_info_dict: | ||||
|         combined_info_dict = defaultdict(lambda: 0) | ||||
|         for info_dict in per_agent_info_dict.values(): | ||||
|             for key, value in info_dict.items(): | ||||
|                 combined_info_dict[key] += value | ||||
|         combined_info_dict = dict(combined_info_dict) | ||||
|  | ||||
|         self.print(f"reward is {reward}") | ||||
|         return reward, info_dict | ||||
|         return reward, combined_info_dict | ||||
|  | ||||
|     def render(self, mode='human'): | ||||
|         if not self._renderer:  # lazy init | ||||
| @@ -565,6 +580,10 @@ class BaseFactory(gym.Env): | ||||
|     def do_additional_actions(self, agent: Agent, action: Action) -> Union[None, c]: | ||||
|         return None | ||||
|  | ||||
|     @abc.abstractmethod | ||||
|     def check_additional_done(self) -> bool: | ||||
|         return False | ||||
|  | ||||
|     @abc.abstractmethod | ||||
|     def calculate_additional_reward(self, agent: Agent) -> (int, dict): | ||||
|         return 0, {} | ||||
|   | ||||
| @@ -20,14 +20,17 @@ CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP | ||||
|  | ||||
|  | ||||
| class DirtProperties(NamedTuple): | ||||
|     clean_amount: int = 1               # How much does the robot clean with one actions. | ||||
|     max_spawn_ratio: float = 0.2        # On max how much tiles does the dirt spawn in percent. | ||||
|     gain_amount: float = 0.3            # How much dirt does spawn per tile. | ||||
|     spawn_frequency: int = 5            # Spawn Frequency in Steps. | ||||
|     max_local_amount: int = 2           # Max dirt amount per tile. | ||||
|     max_global_amount: int = 20         # Max dirt amount in the whole environment. | ||||
|     dirt_smear_amount: float = 0.2      # Agents smear dirt, when not cleaning up in place. | ||||
|     agent_can_interact: bool = True     # Whether the agents can interact with the dirt in this environment. | ||||
|     initial_dirt_ratio: float = 0.3         # On INIT, on max how much tiles does the dirt spawn in percent. | ||||
|     initial_dirt_spawn_r_var: float = 0.05   # How much does the dirt spawn amount vary? | ||||
|     clean_amount: float = 1                 # How much does the robot clean with one actions. | ||||
|     max_spawn_ratio: float = 0.20           # On max how much tiles does the dirt spawn in percent. | ||||
|     max_spawn_amount: float = 0.3           # How much dirt does spawn per tile at max. | ||||
|     spawn_frequency: int = 0                # Spawn Frequency in Steps. | ||||
|     max_local_amount: int = 2               # Max dirt amount per tile. | ||||
|     max_global_amount: int = 20             # Max dirt amount in the whole environment. | ||||
|     dirt_smear_amount: float = 0.2          # Agents smear dirt, when not cleaning up in place. | ||||
|     agent_can_interact: bool = True         # Whether the agents can interact with the dirt in this environment. | ||||
|     done_when_clean = True | ||||
|  | ||||
|  | ||||
| class Dirt(Entity): | ||||
| @@ -91,10 +94,10 @@ class DirtRegister(MovingEntityObjectRegister): | ||||
|             if not self.amount > self.dirt_properties.max_global_amount: | ||||
|                 dirt = self.by_pos(tile.pos) | ||||
|                 if dirt is None: | ||||
|                     dirt = Dirt(tile, amount=self.dirt_properties.gain_amount) | ||||
|                     dirt = Dirt(tile, amount=self.dirt_properties.max_spawn_amount) | ||||
|                     self.register_item(dirt) | ||||
|                 else: | ||||
|                     new_value = dirt.amount + self.dirt_properties.gain_amount | ||||
|                     new_value = dirt.amount + self.dirt_properties.max_spawn_amount | ||||
|                     dirt.set_new_amount(min(new_value, self.dirt_properties.max_local_amount)) | ||||
|             else: | ||||
|                 return c.NOT_VALID | ||||
| @@ -160,12 +163,17 @@ class DirtFactory(BaseFactory): | ||||
|         else: | ||||
|             return c.NOT_VALID | ||||
|  | ||||
|     def trigger_dirt_spawn(self): | ||||
|     def trigger_dirt_spawn(self, initial_spawn=False): | ||||
|         dirt_rng = self._dirt_rng | ||||
|         free_for_dirt = [x for x in self[c.FLOOR] | ||||
|                          if len(x.guests) == 0 or (len(x.guests) == 1 and isinstance(next(y for y in x.guests), Dirt)) | ||||
|                          ] | ||||
|         self._dirt_rng.shuffle(free_for_dirt) | ||||
|         new_spawn = self._dirt_rng.uniform(0, self.dirt_properties.max_spawn_ratio) | ||||
|         if initial_spawn: | ||||
|             var = self.dirt_properties.initial_dirt_spawn_r_var | ||||
|             new_spawn = self.dirt_properties.initial_dirt_ratio + dirt_rng.uniform(-var, var) | ||||
|         else: | ||||
|             new_spawn = dirt_rng.uniform(0, self.dirt_properties.max_spawn_ratio) | ||||
|         n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt))) | ||||
|         self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles]) | ||||
|  | ||||
| @@ -184,8 +192,9 @@ class DirtFactory(BaseFactory): | ||||
|                                     if self[c.DIRT].spawn_dirt(agent.tile): | ||||
|                                         new_pos_dirt = self[c.DIRT].by_pos(agent.pos) | ||||
|                                         new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt)) | ||||
|  | ||||
|         if not self._next_dirt_spawn: | ||||
|         if self._next_dirt_spawn < 0: | ||||
|             pass  # No Dirt Spawn | ||||
|         elif not self._next_dirt_spawn: | ||||
|             self.trigger_dirt_spawn() | ||||
|             self._next_dirt_spawn = self.dirt_properties.spawn_frequency | ||||
|         else: | ||||
| @@ -208,8 +217,13 @@ class DirtFactory(BaseFactory): | ||||
|  | ||||
|     def do_additional_reset(self) -> None: | ||||
|         super().do_additional_reset() | ||||
|         self.trigger_dirt_spawn() | ||||
|         self._next_dirt_spawn = self.dirt_properties.spawn_frequency | ||||
|         self.trigger_dirt_spawn(initial_spawn=True) | ||||
|         self._next_dirt_spawn = self.dirt_properties.spawn_frequency if self.dirt_properties.spawn_frequency else -1 | ||||
|  | ||||
|     def check_additional_done(self): | ||||
|         super_done = super().check_additional_done() | ||||
|         done = self.dirt_properties.done_when_clean and (len(self[c.DIRT]) == 0) | ||||
|         return super_done or done | ||||
|  | ||||
|     def calculate_additional_reward(self, agent: Agent) -> (int, dict): | ||||
|         reward, info_dict = super().calculate_additional_reward(agent) | ||||
| @@ -233,9 +247,8 @@ class DirtFactory(BaseFactory): | ||||
|             else: | ||||
|                 reward -= 0.01 | ||||
|                 self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.') | ||||
|                 info_dict.update({f'{agent.name}_failed_action': 1}) | ||||
|                 info_dict.update({f'{agent.name}_failed_action': 1}) | ||||
|                 info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1}) | ||||
|                 info_dict.update(failed_dirt_clean=1) | ||||
|  | ||||
|         # Potential based rewards -> | ||||
|         #  track the last reward , minus the current reward = potential | ||||
| @@ -243,12 +256,12 @@ class DirtFactory(BaseFactory): | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     render = False | ||||
|     render = True | ||||
|  | ||||
|     dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0.0) | ||||
|     move_props = {'allow_square_movement': True, | ||||
|   'allow_diagonal_movement': False, | ||||
|   'allow_no_op': False} #MovementProperties(True, True, False) | ||||
|                   'allow_diagonal_movement': False, | ||||
|                   'allow_no_op': False} #MovementProperties(True, True, False) | ||||
|  | ||||
|     with RecorderCallback(filepath=Path('debug_out') / f'recorder_xxxx.json', occupation_map=False, | ||||
|                           trajectory_map=False) as recorder: | ||||
| @@ -272,12 +285,12 @@ if __name__ == '__main__': | ||||
|             r = 0 | ||||
|             for agent_i_action in random_actions: | ||||
|                 env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) | ||||
|                 recorder.read_info(0, info_obj) | ||||
|                 #recorder.read_info(0, info_obj) | ||||
|                 r += step_r | ||||
|                 if render: | ||||
|                     factory.render() | ||||
|                 if done_bool: | ||||
|                     recorder.read_done(0, done_bool) | ||||
|                 #    recorder.read_done(0, done_bool) | ||||
|                     break | ||||
|             print(f'Factory run {epoch} done, reward is:\n    {r}') | ||||
|     pass | ||||
|   | ||||
| @@ -25,7 +25,7 @@ if __name__ == '__main__': | ||||
|                                         allow_square_movement=True, | ||||
|                                         allow_no_op=False) | ||||
|  | ||||
|         render = False | ||||
|         render = True | ||||
|  | ||||
|         factory = DirtItemFactory(n_agents=1, done_at_collision=False, frames_to_stack=0, | ||||
|                               level_name='rooms', max_steps=200, combin_agent_obs=True, | ||||
| @@ -46,12 +46,12 @@ if __name__ == '__main__': | ||||
|             r = 0 | ||||
|             for agent_i_action in random_actions: | ||||
|                 env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) | ||||
|                 recorder.read_info(0, info_obj) | ||||
|                 # recorder.read_info(0, info_obj) | ||||
|                 r += step_r | ||||
|                 if render: | ||||
|                     factory.render() | ||||
|                 if done_bool: | ||||
|                     recorder.read_done(0, done_bool) | ||||
|                     # recorder.read_done(0, done_bool) | ||||
|                     break | ||||
|             print(f'Factory run {epoch} done, reward is:\n    {r}') | ||||
|         pass | ||||
|   | ||||
| @@ -318,17 +318,26 @@ class ItemFactory(BaseFactory): | ||||
|         if h.EnvActions.ITEM_ACTION == agent.temp_action: | ||||
|             if agent.temp_valid: | ||||
|                 if drop_off := self[c.DROP_OFF].by_pos(agent.pos): | ||||
|                     info_dict.update({f'{agent.name}_item_dropoff': 1}) | ||||
|                     info_dict.update({f'{agent.name}_item_drop_off': 1}) | ||||
|                     info_dict.update(item_drop_off=1) | ||||
|                     self.print(f'{agent.name} just dropped of an item at {drop_off.pos}.') | ||||
|                     reward += 0.5 | ||||
|                 else: | ||||
|                     info_dict.update({f'{agent.name}_item_pickup': 1}) | ||||
|                     info_dict.update(item_pickup=1) | ||||
|                     self.print(f'{agent.name} just picked up an item at {agent.pos}') | ||||
|                     reward += 0.1 | ||||
|             else: | ||||
|                 info_dict.update({f'{agent.name}_failed_item_action': 1}) | ||||
|                 self.print(f'{agent.name} just tried to pick up an item at {agent.pos}, but failed.') | ||||
|                 reward -= 0.1 | ||||
|                 if self[c.DROP_OFF].by_pos(agent.pos): | ||||
|                     info_dict.update({f'{agent.name}_failed_drop_off': 1}) | ||||
|                     info_dict.update(failed_drop_off=1) | ||||
|                     self.print(f'{agent.name} just tried to drop off at {agent.pos}, but failed.') | ||||
|                     reward -= 0.1 | ||||
|                 else: | ||||
|                     info_dict.update({f'{agent.name}_failed_item_action': 1}) | ||||
|                     info_dict.update(failed_pick_up=1) | ||||
|                     self.print(f'{agent.name} just tried to pick up an item at {agent.pos}, but failed.') | ||||
|                     reward -= 0.1 | ||||
|         return reward, info_dict | ||||
|  | ||||
|     def render_additional_assets(self, mode='human'): | ||||
| @@ -343,7 +352,7 @@ class ItemFactory(BaseFactory): | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     import random | ||||
|     render = False | ||||
|     render = True | ||||
|  | ||||
|     item_props = ItemProperties() | ||||
|  | ||||
|   | ||||
| @@ -33,7 +33,8 @@ class RecorderCallback(BaseCallback): | ||||
|  | ||||
|     def read_done(self, env_idx, done): | ||||
|         if done: | ||||
|             self._recorder_out_list.append({'steps': self._recorder_dict[env_idx]}) | ||||
|             self._recorder_out_list.append({'steps': self._recorder_dict[env_idx], | ||||
|                                             'episode': len(self._recorder_out_list)}) | ||||
|             self._recorder_dict[env_idx] = list() | ||||
|         else: | ||||
|             pass | ||||
|   | ||||
| @@ -10,7 +10,7 @@ from environments.helpers import IGNORED_DF_COLUMNS, MODEL_MAP | ||||
| from plotting.plotting import prepare_plot | ||||
|  | ||||
|  | ||||
| def compare_seed_runs(run_path: Union[str, PathLike]): | ||||
| def compare_seed_runs(run_path: Union[str, PathLike], use_tex: bool = False): | ||||
|     run_path = Path(run_path) | ||||
|     df_list = list() | ||||
|     for run, monitor_file in enumerate(run_path.rglob('monitor*.pick')): | ||||
| @@ -37,11 +37,12 @@ def compare_seed_runs(run_path: Union[str, PathLike]): | ||||
|         skip_n = round(df_melted['Episode'].max() * 0.02) | ||||
|         df_melted = df_melted[df_melted['Episode'] % skip_n == 0] | ||||
|  | ||||
|     prepare_plot(run_path / f'{run_path.name}_monitor_lineplot.png', df_melted) | ||||
|     prepare_plot(run_path / f'{run_path.name}_monitor_lineplot.png', df_melted, use_tex=use_tex) | ||||
|     print('Plotting done.') | ||||
|  | ||||
|  | ||||
| def compare_model_runs(run_path: Path, run_identifier: Union[str, int], parameter: Union[str, List[str]]): | ||||
| def compare_model_runs(run_path: Path, run_identifier: Union[str, int], parameter: Union[str, List[str]], | ||||
|                        use_tex: bool = False): | ||||
|     run_path = Path(run_path) | ||||
|     df_list = list() | ||||
|     parameter = [parameter] if isinstance(parameter, str) else parameter | ||||
| @@ -75,12 +76,13 @@ def compare_model_runs(run_path: Path, run_identifier: Union[str, int], paramete | ||||
|         df_melted = df_melted[df_melted['Episode'] % skip_n == 0] | ||||
|  | ||||
|     style = 'Measurement' if len(columns) > 1 else None | ||||
|     prepare_plot(run_path / f'{run_identifier}_compare_{parameter}.png', df_melted, hue='Model', style=style) | ||||
|     prepare_plot(run_path / f'{run_identifier}_compare_{parameter}.png', df_melted, hue='Model', style=style, | ||||
|                  use_tex=use_tex) | ||||
|     print('Plotting done.') | ||||
|  | ||||
|  | ||||
| def compare_all_parameter_runs(run_root_path: Path, parameter: Union[str, List[str]], | ||||
|                                param_names: Union[List[str], None] = None, str_to_ignore=''): | ||||
|                                param_names: Union[List[str], None] = None, str_to_ignore='', use_tex: bool = False): | ||||
|     run_root_path = Path(run_root_path) | ||||
|     df_list = list() | ||||
|     parameter = [parameter] if isinstance(parameter, str) else parameter | ||||
| @@ -151,5 +153,6 @@ def compare_all_parameter_runs(run_root_path: Path, parameter: Union[str, List[s | ||||
|                                       value_name="Score") | ||||
|  | ||||
|     style = 'Measurement' if len(columns) > 1 else None | ||||
|     prepare_plot(run_root_path / f'compare_{parameter}.png', df_melted, hue='Parameter Combination', style=style) | ||||
|     prepare_plot(run_root_path / f'compare_{parameter}.png', df_melted, hue='Parameter Combination', | ||||
|                  style=style, use_tex=use_tex) | ||||
|     print('Plotting done.') | ||||
|   | ||||
| @@ -26,21 +26,35 @@ def plot(filepath, ext='png'): | ||||
|     plt.clf() | ||||
|  | ||||
|  | ||||
| def prepare_plot(filepath, results_df, ext='png', hue='Measurement', style=None): | ||||
| def prepare_tex(df, hue, style, hue_order): | ||||
|     sns.set(rc={'text.usetex': True}, style='whitegrid') | ||||
|     lineplot = sns.lineplot(data=df, x='Episode', y='Score', ci=95, palette=PALETTE, | ||||
|                             hue_order=hue_order, hue=hue, style=style) | ||||
|     # lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}') | ||||
|     return lineplot | ||||
|  | ||||
|  | ||||
| def prepare_plt(df, hue, style, hue_order): | ||||
|     print('Struggling to plot Figure using LaTeX - going back to normal.') | ||||
|     plt.close('all') | ||||
|     sns.set(rc={'text.usetex': False}, style='whitegrid') | ||||
|     lineplot = sns.lineplot(data=df, x='Episode', y='Score', hue=hue, style=style, | ||||
|                             ci=95, palette=PALETTE, hue_order=hue_order) | ||||
|     # lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}') | ||||
|     return lineplot | ||||
|  | ||||
|  | ||||
| def prepare_plot(filepath, results_df, ext='png', hue='Measurement', style=None, use_tex=False): | ||||
|     df = results_df.copy() | ||||
|     df[hue] = df[hue].str.replace('_', '-') | ||||
|     hue_order = sorted(list(df[hue].unique())) | ||||
|     try: | ||||
|         sns.set(rc={'text.usetex': True}, style='whitegrid') | ||||
|         lineplot = sns.lineplot(data=df, x='Episode', y='Score', ci=95, palette=PALETTE, | ||||
|                                 hue_order=hue_order, hue=hue, style=style) | ||||
|         # lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}') | ||||
|         plot(filepath, ext=ext)  # plot raises errors not lineplot! | ||||
|     except (FileNotFoundError, RuntimeError): | ||||
|         print('Struggling to plot Figure using LaTeX - going back to normal.') | ||||
|         plt.close('all') | ||||
|         sns.set(rc={'text.usetex': False}, style='whitegrid') | ||||
|         lineplot = sns.lineplot(data=df, x='Episode', y='Score', hue=hue, style=style, | ||||
|                                      ci=95, palette=PALETTE, hue_order=hue_order) | ||||
|         # lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}') | ||||
|     if use_tex: | ||||
|         try: | ||||
|             _ = prepare_tex(df, hue, style, hue_order) | ||||
|             plot(filepath, ext=ext)  # plot raises errors not lineplot! | ||||
|         except (FileNotFoundError, RuntimeError): | ||||
|             _ = prepare_plt(df, hue, style, hue_order) | ||||
|             plot(filepath, ext=ext) | ||||
|     else: | ||||
|         _ = prepare_plt(df, hue, style, hue_order) | ||||
|         plot(filepath, ext=ext) | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| import warnings | ||||
| from pathlib import Path | ||||
|  | ||||
| import numpy as np | ||||
| import yaml | ||||
|  | ||||
| from environments import helpers as h | ||||
| @@ -14,36 +15,42 @@ warnings.filterwarnings('ignore', category=UserWarning) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|  | ||||
|     model_name = 'PPO_1631187073' | ||||
|     model_name = 'DQN_1631187073' | ||||
|     run_id = 0 | ||||
|     seed = 69 | ||||
|     out_path = Path(__file__).parent / 'study_out' / 'e_1_1631709932' / 'no_obs' / 'dirt' / 'A2C_1631709932' / '0_A2C_1631709932' | ||||
|     model_path = out_path / model_name | ||||
|     out_path = Path('debug_out/DQN_1635176929/0_DQN_1635176929') | ||||
|     model_path = out_path | ||||
|  | ||||
|     with (out_path / f'env_params.json').open('r') as f: | ||||
|         env_kwargs = yaml.load(f, Loader=yaml.FullLoader) | ||||
|         env_kwargs.update(additional_agent_placeholder=None) | ||||
|         # env_kwargs.update(verbose=False, env_seed=seed, record_episodes=True, parse_doors=True) | ||||
|         env_kwargs.update(additional_agent_placeholder=None, n_agents=4) | ||||
|         if gain_amount := env_kwargs.get('dirt_properties', {}).get('gain_amount', None): | ||||
|             env_kwargs['dirt_properties']['max_spawn_amount'] = gain_amount | ||||
|             del env_kwargs['dirt_properties']['gain_amount'] | ||||
|  | ||||
|         env_kwargs.update(record_episodes=True) | ||||
|  | ||||
|     this_model = out_path / 'model.zip' | ||||
|  | ||||
|     model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name) | ||||
|     model = model_cls.load(this_model) | ||||
|     models = [model_cls.load(this_model) for _ in range(4)] | ||||
|  | ||||
|     with RecorderCallback(filepath=Path() / 'recorder_out_doors.json') as recorder: | ||||
|     with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json') as recorder: | ||||
|         # Init Env | ||||
|         with DirtFactory(**env_kwargs) as env: | ||||
|         with DirtItemFactory(**env_kwargs) as env: | ||||
|             obs_shape = env.observation_space.shape | ||||
|             # Evaluation Loop for i in range(n Episodes) | ||||
|             for episode in range(5): | ||||
|                 obs = env.reset() | ||||
|                 env_state = env.reset() | ||||
|                 rew, done_bool = 0, False | ||||
|                 while not done_bool: | ||||
|                     action = model.predict(obs, deterministic=False)[0] | ||||
|                     env_state, step_r, done_bool, info_obj = env.step(action[0]) | ||||
|                     actions = [model.predict( | ||||
|                         np.stack([env_state[i][j] for i in range(env_state.shape[0])]), | ||||
|                         deterministic=True)[0] for j, model in enumerate(models)] | ||||
|                     env_state, step_r, done_bool, info_obj = env.step(actions) | ||||
|                     recorder.read_info(0, info_obj) | ||||
|                     rew += step_r | ||||
|                     env.render() | ||||
|                     # env.render() | ||||
|                     if done_bool: | ||||
|                         recorder.read_done(0, done_bool) | ||||
|                         break | ||||
|   | ||||
							
								
								
									
										102
									
								
								studies/e_1.py
									
									
									
									
									
								
							
							
						
						
									
										102
									
								
								studies/e_1.py
									
									
									
									
									
								
							| @@ -33,7 +33,7 @@ import pandas as pd | ||||
| import seaborn as sns | ||||
|  | ||||
| # Define a global studi save path | ||||
| start_time = 1634134997  # int(time.time()) | ||||
| start_time = 163519000  # int(time.time()) | ||||
| study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' | ||||
|  | ||||
| """ | ||||
| @@ -70,7 +70,7 @@ There are further distinctions to be made: | ||||
|  | ||||
|  | ||||
| def policy_model_kwargs(): | ||||
|     return dict(ent_coef=0.01) | ||||
|     return dict(ent_coef=0.05) | ||||
|  | ||||
|  | ||||
| def dqn_model_kwargs(): | ||||
| @@ -93,21 +93,23 @@ def encapsule_env_factory(env_fctry, env_kwrgs): | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     train_steps = 5e5 | ||||
|     train_steps = 8e5 | ||||
|  | ||||
|     # Define Global Env Parameters | ||||
|     # Define properties object parameters | ||||
|     move_props = MovementProperties(allow_diagonal_movement=True, | ||||
|                                     allow_square_movement=True, | ||||
|                                     allow_no_op=False) | ||||
|     dirt_props = DirtProperties(clean_amount=2, gain_amount=0.1, max_global_amount=20, | ||||
|                                 max_local_amount=1, spawn_frequency=15, max_spawn_ratio=0.05, | ||||
|     dirt_props = DirtProperties(initial_dirt_ratio=0.35, initial_dirt_spawn_r_var=0.1, | ||||
|                                 clean_amount=0.34, | ||||
|                                 max_spawn_amount=0.1, max_global_amount=20, | ||||
|                                 max_local_amount=1, spawn_frequency=0, max_spawn_ratio=0.05, | ||||
|                                 dirt_smear_amount=0.0, agent_can_interact=True) | ||||
|     item_props = ItemProperties(n_items=10, agent_can_interact=True, | ||||
|                                 spawn_frequency=30, n_drop_off_locations=2, | ||||
|                                 max_agent_inventory_capacity=15) | ||||
|     factory_kwargs = dict(n_agents=1, | ||||
|                           pomdp_r=2, max_steps=400, parse_doors=False, | ||||
|                           pomdp_r=2, max_steps=400, parse_doors=True, | ||||
|                           level_name='rooms', frames_to_stack=3, | ||||
|                           omit_agent_in_obs=True, combin_agent_obs=True, record_episodes=False, | ||||
|                           cast_shadows=True, doors_have_area=False, verbose=False, | ||||
| @@ -124,9 +126,9 @@ if __name__ == '__main__': | ||||
|     # Define parameter versions according with #1,2[1,0,N],3 | ||||
|     observation_modes = { | ||||
|         #  Fill-value = 0 | ||||
|         'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)), | ||||
|          # DEACTIVATED 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)), | ||||
|         #  Fill-value = 1 | ||||
|         'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)), | ||||
|         # DEACTIVATED 'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)), | ||||
|         #  Fill-value = N(0, 1) | ||||
|         'seperate_N': dict(additional_env_kwargs=dict(additional_agent_placeholder='N')), | ||||
|         #  Further Adjustments are done post-training | ||||
| @@ -137,10 +139,10 @@ if __name__ == '__main__': | ||||
|  | ||||
|     # Train starts here ############################################################ | ||||
|     # Build Major Loop  parameters, parameter versions, Env Classes and models | ||||
|     if False: | ||||
|     if True: | ||||
|         for observation_mode in observation_modes.keys(): | ||||
|             for env_name in env_names: | ||||
|                 for model_cls in h.MODEL_MAP.values(): | ||||
|                 for model_cls in [h.MODEL_MAP['A2C'], h.MODEL_MAP['DQN']]: | ||||
|                     # Create an identifier, which is unique for every combination and easy to read in filesystem | ||||
|                     identifier = f'{model_cls.__name__}_{start_time}' | ||||
|                     # Train each combination per seed | ||||
| @@ -154,6 +156,8 @@ if __name__ == '__main__': | ||||
|                         env_kwargs.update(env_seed=seed) | ||||
|                         # Output folder | ||||
|                         seed_path = combination_path / f'{str(seed)}_{identifier}' | ||||
|                         if (seed_path / 'monitor.pick').exists(): | ||||
|                             continue | ||||
|                         seed_path.mkdir(parents=True, exist_ok=True) | ||||
|  | ||||
|                         # Monitor Init | ||||
| @@ -163,7 +167,7 @@ if __name__ == '__main__': | ||||
|                         if model_cls.__name__ in ["PPO", "A2C"]: | ||||
|                             # env_factory = env_class(**env_kwargs) | ||||
|                             env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs) | ||||
|                                                          for _ in range(1)], start_method="spawn") | ||||
|                                                          for _ in range(6)], start_method="spawn") | ||||
|                             model_kwargs = policy_model_kwargs() | ||||
|  | ||||
|                         elif model_cls.__name__ in ["RegDQN", "DQN", "QRDQN"]: | ||||
| @@ -197,15 +201,20 @@ if __name__ == '__main__': | ||||
|                         gc.collect() | ||||
|  | ||||
|                     # Compare performance runs, for each seed within a model | ||||
|                     compare_seed_runs(combination_path) | ||||
|                     compare_seed_runs(combination_path, use_tex=False) | ||||
|                     # Better be save then sorry: Clean up! | ||||
|                     del model_kwargs, env_kwargs | ||||
|                     import gc | ||||
|                     gc.collect() | ||||
|                     try: | ||||
|                         del env_kwargs | ||||
|                         del model_kwargs | ||||
|                         import gc | ||||
|                         gc.collect() | ||||
|                     except NameError: | ||||
|                         pass | ||||
|  | ||||
|                 # Compare performance runs, for each model | ||||
|                 # FIXME: Check THIS!!!! | ||||
|                 compare_model_runs(study_root_path / observation_mode / env_name, f'{start_time}', 'step_reward') | ||||
|                 compare_model_runs(study_root_path / observation_mode / env_name, f'{start_time}', 'step_reward', | ||||
|                                    use_tex=False) | ||||
|                 pass | ||||
|             pass | ||||
|         pass | ||||
| @@ -215,7 +224,7 @@ if __name__ == '__main__': | ||||
|     # Evaluation starts here ##################################################### | ||||
|     # First Iterate over every model and monitor "as trained" | ||||
|     baseline_monitor_file = 'e_1_baseline_monitor.pick' | ||||
|     if False: | ||||
|     if True: | ||||
|         render = False | ||||
|         for observation_mode in observation_modes: | ||||
|             obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode) | ||||
| @@ -312,8 +321,9 @@ if __name__ == '__main__': | ||||
|     # Plotting | ||||
|     if True: | ||||
|         # TODO: Plotting | ||||
|         df_list = list() | ||||
|  | ||||
|         for observation_folder in (x for x in study_root_path.iterdir() if x.is_dir()): | ||||
|             df_list = list() | ||||
|             for env_folder in (x for x in observation_folder.iterdir() if x.is_dir()): | ||||
|                 for model_folder in (x for x in env_folder.iterdir() if x.is_dir()): | ||||
|                     # Gather per seed results in this list | ||||
| @@ -334,28 +344,48 @@ if __name__ == '__main__': | ||||
|                             monitor_df['obs_mode'] = monitor_df['obs_mode'].astype(str) | ||||
|                             monitor_df['model'] = model_folder.name.split('_')[0] | ||||
|  | ||||
|  | ||||
|                             df_list.append(monitor_df) | ||||
|  | ||||
|         id_cols = ['monitor', 'env', 'obs_mode', 'model'] | ||||
|             id_cols = ['monitor', 'env', 'obs_mode', 'model'] | ||||
|  | ||||
|         df = pd.concat(df_list, ignore_index=True) | ||||
|         df = df.fillna(0) | ||||
|             df = pd.concat(df_list, ignore_index=True) | ||||
|             df = df.fillna(0) | ||||
|  | ||||
|         for id_col in id_cols: | ||||
|             df[id_col] = df[id_col].astype(str) | ||||
|             for id_col in id_cols: | ||||
|                 df[id_col] = df[id_col].astype(str) | ||||
|  | ||||
|         df_grouped = df.groupby(id_cols + ['seed'] | ||||
|                                 ).agg({key: 'sum' if "Agent" in key else 'mean' for key in df.columns | ||||
|                                        if key not in (id_cols + ['seed'])}) | ||||
|         df_melted = df_grouped.reset_index().melt(id_vars=id_cols, | ||||
|                                                   value_vars='step_reward', var_name="Measurement", | ||||
|                                                   value_name="Score") | ||||
|             if True: | ||||
|                 # df['fail_sum'] = df.loc[:, df.columns.str.contains("failed")].sum(1) | ||||
|                 df['pick_up'] = df.loc[:, df.columns.str.contains("]_item_pickup")].sum(1) | ||||
|                 df['drop_off'] = df.loc[:, df.columns.str.contains("]_item_dropoff")].sum(1) | ||||
|                 df['failed_item_action'] = df.loc[:, df.columns.str.contains("]_failed_item_action")].sum(1) | ||||
|                 df['failed_cleanup'] = df.loc[:, df.columns.str.contains("]_failed_dirt_cleanup")].sum(1) | ||||
|                 df['coll_lvl'] = df.loc[:, df.columns.str.contains("]_vs_LEVEL")].sum(1) | ||||
|                 df['coll_agent'] = df.loc[:, df.columns.str.contains("]_vs_Agent")].sum(1) / 2 | ||||
|                 # df['collisions'] = df['coll_lvl'] + df['coll_agent'] | ||||
|  | ||||
|         c = sns.catplot(data=df_melted, x='obs_mode', hue='monitor', row='model', col='env', y='Score', sharey=False, | ||||
|                         kind="box", height=4, aspect=.7, legend_out=True) | ||||
|         c.set_xticklabels(rotation=65, horizontalalignment='right') | ||||
|         plt.tight_layout(pad=2) | ||||
|         plt.savefig(study_root_path / f'results_{n_agents}_agents.png') | ||||
|             value_vars = ['pick_up', 'drop_off', 'failed_item_action', 'failed_cleanup', | ||||
|                           'coll_lvl', 'coll_agent', 'dirt_cleaned'] | ||||
|  | ||||
|     pass | ||||
|             df_grouped = df.groupby(id_cols + ['seed'] | ||||
|                                     ).agg({key: 'sum' if "Agent" in key else 'mean' for key in df.columns | ||||
|                                            if key not in (id_cols + ['seed'])}) | ||||
|             df_melted = df_grouped.reset_index().melt(id_vars=id_cols, | ||||
|                                                       value_vars=value_vars,  # 'step_reward', | ||||
|                                                       var_name="Measurement", | ||||
|                                                       value_name="Score") | ||||
|             # df_melted["Measurements"] = df_melted["Measurement"] + " " + df_melted["monitor"] | ||||
|  | ||||
|             # Plotting | ||||
|             fig, ax = plt.subplots(figsize=(11.7, 8.27)) | ||||
|  | ||||
|             c = sns.catplot(data=df_melted[df_melted['obs_mode'] == observation_folder.name], | ||||
|                             x='Measurement', hue='monitor', row='model', col='env', y='Score', | ||||
|                             sharey=False, kind="box", height=4, aspect=.7, legend_out=True, | ||||
|                             showfliers=False) | ||||
|             c.set_xticklabels(rotation=65, horizontalalignment='right') | ||||
|             c.fig.subplots_adjust(top=0.9)  # adjust the Figure in rp | ||||
|             c.fig.suptitle(f"Cat plot for {observation_folder.name}") | ||||
|             plt.tight_layout(pad=2) | ||||
|             plt.savefig(study_root_path / f'results_{n_agents}_agents_{observation_folder.name}.png') | ||||
|         pass | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Steffen Illium
					Steffen Illium