monitor Callback

Monitor Class
internal monitor list
This commit is contained in:
steffen-illium 2021-05-19 15:20:06 +02:00
parent 38ffb746e3
commit 575eec9ee6
6 changed files with 122 additions and 72 deletions

View File

@ -1,47 +0,0 @@
from collections import defaultdict
class FactoryMonitor:
def __init__(self, env):
self._env = env
self._monitor = defaultdict(lambda: defaultdict(lambda: 0))
self._last_vals = defaultdict(lambda: 0)
def __iter__(self):
for key, value in self._monitor.items():
yield key, dict(value)
def add(self, key, value, step=None):
assert step is None or step >= 1 # Is this good practice?
step = step or self._env.steps
self._last_vals[key] = self._last_vals[key] + value
self._monitor[key][step] = self._last_vals[key]
return self._last_vals[key]
def set(self, key, value, step=None):
assert step is None or step >= 1 # Is this good practice?
step = step or self._env.steps
self._last_vals[key] = value
self._monitor[key][step] = self._last_vals[key]
return self._last_vals[key]
def remove(self, key, value, step=None):
assert step is None or step >= 1 # Is this good practice?
step = step or self._env.steps
self._last_vals[key] = self._last_vals[key] - value
self._monitor[key][step] = self._last_vals[key]
return self._last_vals[key]
def to_dict(self):
return dict(self)
def to_pd_dataframe(self):
import pandas as pd
df = pd.DataFrame.from_dict(self.to_dict())
df.loc[0] = df.iloc[0].fillna(0)
df = df.fillna(method='ffill')
return df
def reset(self):
raise RuntimeError("DO NOT DO THIS! Always initalize a new Monitor per Env-Run.")

View File

@ -6,7 +6,7 @@ import numpy as np
from pathlib import Path
from environments import helpers as h
from environments.factory._factory_monitor import FactoryMonitor
from environments.logging.monitor import FactoryMonitor
class AgentState:
@ -18,6 +18,7 @@ class AgentState:
self.collision_vector = None
self.action_valid = None
self.pos = None
self.info = {}
@property
def collisions(self):
@ -41,6 +42,10 @@ class BaseFactory(gym.Env):
def observation_space(self):
return spaces.Box(low=-1, high=1, shape=self.state.shape, dtype=np.float32)
@property
def monitor_as_df_list(self):
return [x.to_pd_dataframe() for x in self._monitor_list]
@property
def movement_actions(self):
return (int(self.allow_vertical_movement) + int(self.allow_horizontal_movement)) * 4
@ -55,15 +60,16 @@ class BaseFactory(gym.Env):
self.allow_vertical_movement = True
self.allow_horizontal_movement = True
self.allow_no_OP = True
self._monitor_list = list()
self._registered_actions = self.movement_actions + int(self.allow_no_OP)
self.level = h.one_hot_level(
h.parse_level(Path(__file__).parent / h.LEVELS_DIR / f'{level}.txt')
)
self.slice_strings = {0: 'level', **{i: f'agent#{i}' for i in range(1, self.n_agents+1)}}
if not self.__class__.__subclasses__():
self.reset()
else:
self.register_additional_actions()
self.reset()
def __init_subclass__(cls):
print(cls)
def register_additional_actions(self):
raise NotImplementedError('Please register additional actions ')
@ -73,6 +79,7 @@ class BaseFactory(gym.Env):
self.steps = 0
self.cumulative_reward = 0
self.monitor = FactoryMonitor(self)
self._monitor_list.append(self.monitor)
self.agent_states = []
# Agent placement ...
agents = np.zeros((self.n_agents, *self.level.shape), dtype=np.int8)

View File

@ -37,7 +37,7 @@ if __name__ == '__main__':
import random
factory = SimpleFactory(n_agents=1, max_dirt=8)
monitor_list = list()
for epoch in range(100):
for epoch in range(5):
random_actions = [random.randint(0, 7) for _ in range(200)]
state, r, done, _ = factory.reset()
for action in random_actions:

View File

@ -10,6 +10,7 @@ from environments import helpers as h
from environments.factory.renderer import Renderer
from environments.factory.renderer import Entity
from environments.logging.monitor import MonitorCallback
DIRT_INDEX = -1
@ -24,9 +25,12 @@ class DirtProperties:
class GettingDirty(BaseFactory):
def register_additional_actions(self):
self._registered_actions += 1
return True
def _is_clean_up_action(self, action):
# Account for NoOP; remove -1 when activating NoOP
return self.movement_actions + 1 - 1 == action
return self.action_space.n - 1 == action
def __init__(self, *args, dirt_properties: DirtProperties, **kwargs):
self._dirt_properties = dirt_properties
@ -141,20 +145,14 @@ if __name__ == '__main__':
dirt_props = DirtProperties()
factory = GettingDirty(n_agents=2, dirt_properties=dirt_props)
monitor_list = list()
for epoch in range(100):
random_actions = [(random.randint(0, 8), random.randint(0, 8)) for _ in range(200)]
env_state, reward, done_bool, _ = factory.reset()
for agent_i_action in random_actions:
env_state, reward, done_bool, info_obj = factory.step(agent_i_action)
if render:
factory.render()
monitor_list.append(factory.monitor.to_pd_dataframe())
print(f'Factory run {epoch} done, reward is:\n {reward}')
from pathlib import Path
import pickle
out_path = Path('debug_out')
out_path.mkdir(exist_ok=True, parents=True)
with (out_path / 'monitor.pick').open('wb') as f:
pickle.dump(monitor_list, f, protocol=pickle.HIGHEST_PROTOCOL)
with MonitorCallback(factory):
for epoch in range(100):
random_actions = [(random.randint(0, 8), random.randint(0, 8)) for _ in range(200)]
env_state, reward, done_bool, _ = factory.reset()
for agent_i_action in random_actions:
env_state, reward, done_bool, info_obj = factory.step(agent_i_action)
if render:
factory.render()
if done_bool:
break
print(f'Factory run {epoch} done, reward is:\n {reward}')

View File

View File

@ -0,0 +1,92 @@
import pickle
from pathlib import Path
from collections import defaultdict
from stable_baselines3.common.callbacks import BaseCallback
class FactoryMonitor:
def __init__(self, env):
self._env = env
self._monitor = defaultdict(lambda: defaultdict(lambda: 0))
self._last_vals = defaultdict(lambda: 0)
def __iter__(self):
for key, value in self._monitor.items():
yield key, dict(value)
def add(self, key, value, step=None):
assert step is None or step >= 1 # Is this good practice?
step = step or self._env.steps
self._last_vals[key] = self._last_vals[key] + value
self._monitor[key][step] = self._last_vals[key]
return self._last_vals[key]
def set(self, key, value, step=None):
assert step is None or step >= 1 # Is this good practice?
step = step or self._env.steps
self._last_vals[key] = value
self._monitor[key][step] = self._last_vals[key]
return self._last_vals[key]
def remove(self, key, value, step=None):
assert step is None or step >= 1 # Is this good practice?
step = step or self._env.steps
self._last_vals[key] = self._last_vals[key] - value
self._monitor[key][step] = self._last_vals[key]
return self._last_vals[key]
def to_dict(self):
return dict(self)
def to_pd_dataframe(self):
import pandas as pd
df = pd.DataFrame.from_dict(self.to_dict())
try:
df.loc[0] = df.iloc[0].fillna(0)
except IndexError:
return None
df = df.fillna(method='ffill')
return df
def reset(self):
raise RuntimeError("DO NOT DO THIS! Always initalize a new Monitor per Env-Run.")
class MonitorCallback(BaseCallback):
def __init__(self, env, outpath='debug_out', filename='monitor'):
super(MonitorCallback, self).__init__()
self._outpath = Path(outpath)
self._filename = filename
self.out_file = self._outpath / f'{self._filename.split(".")[0]}.pick'
self.env = env
self.started = False
self.closed = False
def __enter__(self):
self._on_training_start()
def __exit__(self, exc_type, exc_val, exc_tb):
self._on_training_end()
def _on_training_start(self) -> None:
if self.started:
pass
else:
self.out_file.parent.mkdir(exist_ok=True, parents=True)
self.started = True
pass
def _on_training_end(self) -> None:
if self.closed:
pass
else:
# self.out_file.unlink(missing_ok=True)
with self.out_file.open('wb') as f:
pickle.dump(self.env.monitor_as_df_list, f, protocol=pickle.HIGHEST_PROTOCOL)
self.closed = True
def _on_step(self) -> bool:
pass