From 575eec9ee62e295ba30ff5c0418518dbd369cd25 Mon Sep 17 00:00:00 2001 From: steffen-illium Date: Wed, 19 May 2021 15:20:06 +0200 Subject: [PATCH] monitor Callback Monitor Class internal monitor list --- environments/factory/_factory_monitor.py | 47 ---------- environments/factory/base_factory.py | 17 +++- environments/factory/simple_factory.py | 2 +- .../factory/simple_factory_getting_dirty.py | 36 ++++---- environments/logging/__init__.py | 0 environments/logging/monitor.py | 92 +++++++++++++++++++ 6 files changed, 122 insertions(+), 72 deletions(-) create mode 100644 environments/logging/__init__.py create mode 100644 environments/logging/monitor.py diff --git a/environments/factory/_factory_monitor.py b/environments/factory/_factory_monitor.py index 8c71e7b..e69de29 100644 --- a/environments/factory/_factory_monitor.py +++ b/environments/factory/_factory_monitor.py @@ -1,47 +0,0 @@ -from collections import defaultdict - - -class FactoryMonitor: - - def __init__(self, env): - self._env = env - self._monitor = defaultdict(lambda: defaultdict(lambda: 0)) - self._last_vals = defaultdict(lambda: 0) - - def __iter__(self): - for key, value in self._monitor.items(): - yield key, dict(value) - - def add(self, key, value, step=None): - assert step is None or step >= 1 # Is this good practice? - step = step or self._env.steps - self._last_vals[key] = self._last_vals[key] + value - self._monitor[key][step] = self._last_vals[key] - return self._last_vals[key] - - def set(self, key, value, step=None): - assert step is None or step >= 1 # Is this good practice? - step = step or self._env.steps - self._last_vals[key] = value - self._monitor[key][step] = self._last_vals[key] - return self._last_vals[key] - - def remove(self, key, value, step=None): - assert step is None or step >= 1 # Is this good practice? - step = step or self._env.steps - self._last_vals[key] = self._last_vals[key] - value - self._monitor[key][step] = self._last_vals[key] - return self._last_vals[key] - - def to_dict(self): - return dict(self) - - def to_pd_dataframe(self): - import pandas as pd - df = pd.DataFrame.from_dict(self.to_dict()) - df.loc[0] = df.iloc[0].fillna(0) - df = df.fillna(method='ffill') - return df - - def reset(self): - raise RuntimeError("DO NOT DO THIS! Always initalize a new Monitor per Env-Run.") \ No newline at end of file diff --git a/environments/factory/base_factory.py b/environments/factory/base_factory.py index 3a1a03a..89c7e8a 100644 --- a/environments/factory/base_factory.py +++ b/environments/factory/base_factory.py @@ -6,7 +6,7 @@ import numpy as np from pathlib import Path from environments import helpers as h -from environments.factory._factory_monitor import FactoryMonitor +from environments.logging.monitor import FactoryMonitor class AgentState: @@ -18,6 +18,7 @@ class AgentState: self.collision_vector = None self.action_valid = None self.pos = None + self.info = {} @property def collisions(self): @@ -41,6 +42,10 @@ class BaseFactory(gym.Env): def observation_space(self): return spaces.Box(low=-1, high=1, shape=self.state.shape, dtype=np.float32) + @property + def monitor_as_df_list(self): + return [x.to_pd_dataframe() for x in self._monitor_list] + @property def movement_actions(self): return (int(self.allow_vertical_movement) + int(self.allow_horizontal_movement)) * 4 @@ -55,15 +60,16 @@ class BaseFactory(gym.Env): self.allow_vertical_movement = True self.allow_horizontal_movement = True self.allow_no_OP = True + self._monitor_list = list() self._registered_actions = self.movement_actions + int(self.allow_no_OP) self.level = h.one_hot_level( h.parse_level(Path(__file__).parent / h.LEVELS_DIR / f'{level}.txt') ) self.slice_strings = {0: 'level', **{i: f'agent#{i}' for i in range(1, self.n_agents+1)}} - if not self.__class__.__subclasses__(): - self.reset() - else: - self.register_additional_actions() + self.reset() + + def __init_subclass__(cls): + print(cls) def register_additional_actions(self): raise NotImplementedError('Please register additional actions ') @@ -73,6 +79,7 @@ class BaseFactory(gym.Env): self.steps = 0 self.cumulative_reward = 0 self.monitor = FactoryMonitor(self) + self._monitor_list.append(self.monitor) self.agent_states = [] # Agent placement ... agents = np.zeros((self.n_agents, *self.level.shape), dtype=np.int8) diff --git a/environments/factory/simple_factory.py b/environments/factory/simple_factory.py index b954b43..26bdfec 100644 --- a/environments/factory/simple_factory.py +++ b/environments/factory/simple_factory.py @@ -37,7 +37,7 @@ if __name__ == '__main__': import random factory = SimpleFactory(n_agents=1, max_dirt=8) monitor_list = list() - for epoch in range(100): + for epoch in range(5): random_actions = [random.randint(0, 7) for _ in range(200)] state, r, done, _ = factory.reset() for action in random_actions: diff --git a/environments/factory/simple_factory_getting_dirty.py b/environments/factory/simple_factory_getting_dirty.py index 2906305..533522e 100644 --- a/environments/factory/simple_factory_getting_dirty.py +++ b/environments/factory/simple_factory_getting_dirty.py @@ -10,6 +10,7 @@ from environments import helpers as h from environments.factory.renderer import Renderer from environments.factory.renderer import Entity +from environments.logging.monitor import MonitorCallback DIRT_INDEX = -1 @@ -24,9 +25,12 @@ class DirtProperties: class GettingDirty(BaseFactory): + def register_additional_actions(self): + self._registered_actions += 1 + return True + def _is_clean_up_action(self, action): - # Account for NoOP; remove -1 when activating NoOP - return self.movement_actions + 1 - 1 == action + return self.action_space.n - 1 == action def __init__(self, *args, dirt_properties: DirtProperties, **kwargs): self._dirt_properties = dirt_properties @@ -141,20 +145,14 @@ if __name__ == '__main__': dirt_props = DirtProperties() factory = GettingDirty(n_agents=2, dirt_properties=dirt_props) - monitor_list = list() - for epoch in range(100): - random_actions = [(random.randint(0, 8), random.randint(0, 8)) for _ in range(200)] - env_state, reward, done_bool, _ = factory.reset() - for agent_i_action in random_actions: - env_state, reward, done_bool, info_obj = factory.step(agent_i_action) - if render: - factory.render() - monitor_list.append(factory.monitor.to_pd_dataframe()) - print(f'Factory run {epoch} done, reward is:\n {reward}') - - from pathlib import Path - import pickle - out_path = Path('debug_out') - out_path.mkdir(exist_ok=True, parents=True) - with (out_path / 'monitor.pick').open('wb') as f: - pickle.dump(monitor_list, f, protocol=pickle.HIGHEST_PROTOCOL) + with MonitorCallback(factory): + for epoch in range(100): + random_actions = [(random.randint(0, 8), random.randint(0, 8)) for _ in range(200)] + env_state, reward, done_bool, _ = factory.reset() + for agent_i_action in random_actions: + env_state, reward, done_bool, info_obj = factory.step(agent_i_action) + if render: + factory.render() + if done_bool: + break + print(f'Factory run {epoch} done, reward is:\n {reward}') diff --git a/environments/logging/__init__.py b/environments/logging/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/environments/logging/monitor.py b/environments/logging/monitor.py new file mode 100644 index 0000000..f8f5858 --- /dev/null +++ b/environments/logging/monitor.py @@ -0,0 +1,92 @@ +import pickle +from pathlib import Path +from collections import defaultdict + +from stable_baselines3.common.callbacks import BaseCallback + + +class FactoryMonitor: + + def __init__(self, env): + self._env = env + self._monitor = defaultdict(lambda: defaultdict(lambda: 0)) + self._last_vals = defaultdict(lambda: 0) + + def __iter__(self): + for key, value in self._monitor.items(): + yield key, dict(value) + + def add(self, key, value, step=None): + assert step is None or step >= 1 # Is this good practice? + step = step or self._env.steps + self._last_vals[key] = self._last_vals[key] + value + self._monitor[key][step] = self._last_vals[key] + return self._last_vals[key] + + def set(self, key, value, step=None): + assert step is None or step >= 1 # Is this good practice? + step = step or self._env.steps + self._last_vals[key] = value + self._monitor[key][step] = self._last_vals[key] + return self._last_vals[key] + + def remove(self, key, value, step=None): + assert step is None or step >= 1 # Is this good practice? + step = step or self._env.steps + self._last_vals[key] = self._last_vals[key] - value + self._monitor[key][step] = self._last_vals[key] + return self._last_vals[key] + + def to_dict(self): + return dict(self) + + def to_pd_dataframe(self): + import pandas as pd + df = pd.DataFrame.from_dict(self.to_dict()) + try: + df.loc[0] = df.iloc[0].fillna(0) + except IndexError: + return None + df = df.fillna(method='ffill') + return df + + def reset(self): + raise RuntimeError("DO NOT DO THIS! Always initalize a new Monitor per Env-Run.") + + +class MonitorCallback(BaseCallback): + + def __init__(self, env, outpath='debug_out', filename='monitor'): + super(MonitorCallback, self).__init__() + self._outpath = Path(outpath) + self._filename = filename + self.out_file = self._outpath / f'{self._filename.split(".")[0]}.pick' + self.env = env + self.started = False + self.closed = False + + def __enter__(self): + self._on_training_start() + + def __exit__(self, exc_type, exc_val, exc_tb): + self._on_training_end() + + def _on_training_start(self) -> None: + if self.started: + pass + else: + self.out_file.parent.mkdir(exist_ok=True, parents=True) + self.started = True + pass + + def _on_training_end(self) -> None: + if self.closed: + pass + else: + # self.out_file.unlink(missing_ok=True) + with self.out_file.open('wb') as f: + pickle.dump(self.env.monitor_as_df_list, f, protocol=pickle.HIGHEST_PROTOCOL) + self.closed = True + + def _on_step(self) -> bool: + pass