Monitor and Recorder are Wrappers.

This commit is contained in:
Steffen Illium 2021-11-24 17:39:26 +01:00
parent 59484f49c9
commit b0d6c2e1ef
10 changed files with 241 additions and 350 deletions

View File

@ -65,7 +65,7 @@ class BaseFactory(gym.Env):
def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2),
mv_prop: MovementProperties = MovementProperties(), mv_prop: MovementProperties = MovementProperties(),
obs_prop: ObservationProperties = ObservationProperties(), obs_prop: ObservationProperties = ObservationProperties(),
parse_doors=False, record_episodes=False, done_at_collision=False, parse_doors=False, done_at_collision=False,
verbose=False, doors_have_area=True, env_seed=time.time_ns(), individual_rewards=False, verbose=False, doors_have_area=True, env_seed=time.time_ns(), individual_rewards=False,
**kwargs): **kwargs):
@ -97,7 +97,7 @@ class BaseFactory(gym.Env):
self._pomdp_r = self.obs_prop.pomdp_r self._pomdp_r = self.obs_prop.pomdp_r
self.done_at_collision = done_at_collision self.done_at_collision = done_at_collision
self.record_episodes = record_episodes self._record_episodes = False
self.parse_doors = parse_doors self.parse_doors = parse_doors
self.doors_have_area = doors_have_area self.doors_have_area = doors_have_area
self.individual_rewards = individual_rewards self.individual_rewards = individual_rewards
@ -249,7 +249,7 @@ class BaseFactory(gym.Env):
if self._steps >= self.max_steps: if self._steps >= self.max_steps:
done = True done = True
info.update(step_reward=reward, step=self._steps) info.update(step_reward=reward, step=self._steps)
if self.record_episodes: if self._record_episodes:
info.update(self._summarize_state()) info.update(self._summarize_state())
# Post step Hook for later use # Post step Hook for later use
@ -280,7 +280,7 @@ class BaseFactory(gym.Env):
if self.n_agents == 1: if self.n_agents == 1:
obs = self._build_per_agent_obs(self[c.AGENT][0], state_array_dict) obs = self._build_per_agent_obs(self[c.AGENT][0], state_array_dict)
elif self.n_agents >= 2: elif self.n_agents >= 2:
obs = np.stack([self._build_per_agent_obs(agent, state_array_dict) for agent in self[c.AGENT]]) obs = np.stack(self._build_per_agent_obs(agent, state_array_dict) for agent in self[c.AGENT])
else: else:
raise ValueError('n_agents cannot be smaller than 1!!') raise ValueError('n_agents cannot be smaller than 1!!')
return obs return obs
@ -290,9 +290,6 @@ class BaseFactory(gym.Env):
agent_omit_idx = None agent_omit_idx = None
if self.obs_prop.omit_agent_self and self.n_agents == 1: if self.obs_prop.omit_agent_self and self.n_agents == 1:
# There is only a single agent and we want to omit the agent obs, so just remove the array.
# del state_array_dict[c.AGENT]
# Not Needed any more,
pass pass
elif self.obs_prop.omit_agent_self and self.obs_prop.render_agents in [a_obs.COMBINED, ] and self.n_agents > 1: elif self.obs_prop.omit_agent_self and self.obs_prop.render_agents in [a_obs.COMBINED, ] and self.n_agents > 1:
state_array_dict[c.AGENT][0, agent.x, agent.y] -= agent.encoding state_array_dict[c.AGENT][0, agent.x, agent.y] -= agent.encoding
@ -439,7 +436,7 @@ class BaseFactory(gym.Env):
tiles_with_collisions = list() tiles_with_collisions = list()
for tile in self[c.FLOOR]: for tile in self[c.FLOOR]:
if tile.is_occupied(): if tile.is_occupied():
guests = [guest for guest in tile.guests if guest.can_collide] guests = tile.guests_that_can_collide
if len(guests) >= 2: if len(guests) >= 2:
tiles_with_collisions.append(tile) tiles_with_collisions.append(tile)
return tiles_with_collisions return tiles_with_collisions
@ -521,7 +518,7 @@ class BaseFactory(gym.Env):
per_agent_info_dict[agent.name].update(no_op=1) per_agent_info_dict[agent.name].update(no_op=1)
# per_agent_reward -= 0.00 # per_agent_reward -= 0.00
# Monitor Notes # EnvMonitor Notes
if agent.temp_valid: if agent.temp_valid:
per_agent_info_dict[agent.name].update(valid_action=1) per_agent_info_dict[agent.name].update(valid_action=1)
per_agent_info_dict[agent.name].update({f'{agent.name}_valid_action': 1}) per_agent_info_dict[agent.name].update({f'{agent.name}_valid_action': 1})

View File

@ -209,7 +209,7 @@ class Tile(Object):
return not len(self._guests) return not len(self._guests)
def is_occupied(self): def is_occupied(self):
return len(self._guests) return bool(len(self._guests))
def enter(self, guest): def enter(self, guest):
if guest.name not in self._guests: if guest.name not in self._guests:

View File

@ -28,7 +28,7 @@ class DirtProperties(NamedTuple):
max_global_amount: int = 20 # Max dirt amount in the whole environment. max_global_amount: int = 20 # Max dirt amount in the whole environment.
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place. dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place.
agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment. agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment.
done_when_clean = True done_when_clean: bool = True
class Dirt(Entity): class Dirt(Entity):
@ -228,14 +228,14 @@ class DirtFactory(BaseFactory):
dirt = [dirt.amount for dirt in self[c.DIRT]] dirt = [dirt.amount for dirt in self[c.DIRT]]
current_dirt_amount = sum(dirt) current_dirt_amount = sum(dirt)
dirty_tile_count = len(dirt) dirty_tile_count = len(dirt)
if dirty_tile_count: # if dirty_tile_count:
dirt_distribution_score = entropy(softmax(np.asarray(dirt)) / dirty_tile_count) # dirt_distribution_score = entropy(softmax(np.asarray(dirt)) / dirty_tile_count)
else: #else:
dirt_distribution_score = 0 # dirt_distribution_score = 0
info_dict.update(dirt_amount=current_dirt_amount) info_dict.update(dirt_amount=current_dirt_amount)
info_dict.update(dirty_tile_count=dirty_tile_count) info_dict.update(dirty_tile_count=dirty_tile_count)
info_dict.update(dirt_distribution_score=dirt_distribution_score) # info_dict.update(dirt_distribution_score=dirt_distribution_score)
if agent.temp_action == CLEAN_UP_ACTION: if agent.temp_action == CLEAN_UP_ACTION:
if agent.temp_valid: if agent.temp_valid:

View File

@ -1,7 +1,7 @@
import pickle import pickle
from collections import defaultdict from collections import defaultdict
from pathlib import Path from pathlib import Path
from typing import List, Dict from typing import List, Dict, Union
from stable_baselines3.common.callbacks import BaseCallback from stable_baselines3.common.callbacks import BaseCallback
@ -10,57 +10,50 @@ from environments.helpers import IGNORED_DF_COLUMNS
import pandas as pd import pandas as pd
class MonitorCallback(BaseCallback): class EnvMonitor(BaseCallback):
ext = 'png' ext = 'png'
def __init__(self, filepath=Path('debug_out/monitor.pick')): def __init__(self, env):
super(MonitorCallback, self).__init__() super(EnvMonitor, self).__init__()
self.filepath = Path(filepath) self.unwrapped = env
self._monitor_df = pd.DataFrame() self._monitor_df = pd.DataFrame()
self._monitor_dicts = defaultdict(dict) self._monitor_dicts = defaultdict(dict)
self.started = False
self.closed = False
def __enter__(self): def __getattr__(self, item):
self.start() return getattr(self.unwrapped, item)
return self
def __exit__(self, exc_type, exc_val, exc_tb): def step(self, action):
self.stop() obs, reward, done, info = self.unwrapped.step(action)
self._read_info(0, info)
self._read_done(0, done)
return obs, reward, done, info
def reset(self):
return self.unwrapped.reset()
def _on_training_start(self) -> None: def _on_training_start(self) -> None:
if self.started:
pass
else:
self.start()
pass pass
def _on_training_end(self) -> None: def _on_training_end(self) -> None:
if self.closed: pass
pass
else:
self.stop()
def _on_step(self, alt_infos: List[Dict] = None, alt_dones: List[bool] = None) -> bool: def _on_step(self, alt_infos: List[Dict] = None, alt_dones: List[bool] = None) -> bool:
if self.started: for env_idx, info in enumerate(self.locals.get('infos', [])):
for env_idx, info in enumerate(self.locals.get('infos', [])): self._read_info(env_idx, info)
self.read_info(env_idx, info)
for env_idx, done in list( for env_idx, done in list(
enumerate(self.locals.get('dones', []))) + list(enumerate(self.locals.get('done', []))): enumerate(self.locals.get('dones', []))) + list(enumerate(self.locals.get('done', []))):
self.read_done(env_idx, done) self._read_done(env_idx, done)
else:
pass
return True return True
def read_info(self, env_idx, info: dict): def _read_info(self, env_idx, info: dict):
self._monitor_dicts[env_idx][len(self._monitor_dicts[env_idx])] = { self._monitor_dicts[env_idx][len(self._monitor_dicts[env_idx])] = {
key: val for key, val in info.items() if key: val for key, val in info.items() if
key not in ['terminal_observation', 'episode'] and not key.startswith('rec_')} key not in ['terminal_observation', 'episode'] and not key.startswith('rec_')}
return return
def read_done(self, env_idx, done): def _read_done(self, env_idx, done):
if done: if done:
env_monitor_df = pd.DataFrame.from_dict(self._monitor_dicts[env_idx], orient='index') env_monitor_df = pd.DataFrame.from_dict(self._monitor_dicts[env_idx], orient='index')
self._monitor_dicts[env_idx] = dict() self._monitor_dicts[env_idx] = dict()
@ -74,16 +67,8 @@ class MonitorCallback(BaseCallback):
pass pass
return return
def stop(self): def save_run(self, filepath: Union[Path, str]):
# self.out_file.unlink(missing_ok=True) filepath = Path(filepath)
with self.filepath.open('wb') as f: filepath.parent.mkdir(exist_ok=True, parents=True)
with filepath.open('wb') as f:
pickle.dump(self._monitor_df.reset_index(), f, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(self._monitor_df.reset_index(), f, protocol=pickle.HIGHEST_PROTOCOL)
self.closed = True
def start(self):
if self.started:
pass
else:
self.filepath.parent.mkdir(exist_ok=True, parents=True)
self.started = True
pass

View File

@ -1,4 +1,3 @@
import json
from collections import defaultdict from collections import defaultdict
from pathlib import Path from pathlib import Path
from typing import Union from typing import Union
@ -11,22 +10,13 @@ from stable_baselines3.common.callbacks import BaseCallback
from environments.factory.base.base_factory import REC_TAC from environments.factory.base.base_factory import REC_TAC
# noinspection PyAttributeOutsideInit class EnvRecorder(BaseCallback):
from environments.helpers import Constants as c
def __init__(self, env, entities='all'):
class RecorderCallback(BaseCallback): super(EnvRecorder, self).__init__()
self.unwrapped = env
def __init__(self, filepath: Union[str, Path], occupation_map: bool = False, trajectory_map: bool = False,
entities='all'):
super(RecorderCallback, self).__init__()
self.trajectory_map = trajectory_map
self.occupation_map = occupation_map
self.filepath = Path(filepath)
self._recorder_dict = defaultdict(list) self._recorder_dict = defaultdict(list)
self._recorder_out_list = list() self._recorder_out_list = list()
self._env_params = None
self.do_record: bool
if isinstance(entities, str): if isinstance(entities, str):
if entities.lower() == 'all': if entities.lower() == 'all':
self._entities = None self._entities = None
@ -37,10 +27,18 @@ class RecorderCallback(BaseCallback):
self.started = False self.started = False
self.closed = False self.closed = False
def read_params(self, params): def __getattr__(self, item):
self._env_params = params return getattr(self.unwrapped, item)
def read_info(self, env_idx, info: dict): def reset(self):
self.unwrapped._record_episodes = True
return self.unwrapped.reset()
def _on_training_start(self) -> None:
self.unwrapped._record_episodes = True
pass
def _read_info(self, env_idx, info: dict):
if info_dict := {key.replace(REC_TAC, ''): val for key, val in info.items() if key.startswith(f'{REC_TAC}')}: if info_dict := {key.replace(REC_TAC, ''): val for key, val in info.items() if key.startswith(f'{REC_TAC}')}:
if self._entities: if self._entities:
info_dict = {k: v for k, v in info_dict.items() if k in self._entities} info_dict = {k: v for k, v in info_dict.items() if k in self._entities}
@ -51,7 +49,7 @@ class RecorderCallback(BaseCallback):
pass pass
return return
def read_done(self, env_idx, done): def _read_done(self, env_idx, done):
if done: if done:
self._recorder_out_list.append({'steps': self._recorder_dict[env_idx], self._recorder_out_list.append({'steps': self._recorder_dict[env_idx],
'episode': len(self._recorder_out_list)}) 'episode': len(self._recorder_out_list)})
@ -59,77 +57,46 @@ class RecorderCallback(BaseCallback):
else: else:
pass pass
def start(self, force=False): def save_records(self, filepath: Union[Path, str], save_occupation_map=False, save_trajectory_map=False):
if (hasattr(self.training_env, 'record_episodes') and self.training_env.record_episodes) or force: filepath = Path(filepath)
self.do_record = True filepath.parent.mkdir(exist_ok=True, parents=True)
self.filepath.parent.mkdir(exist_ok=True, parents=True) # self.out_file.unlink(missing_ok=True)
self.started = True with filepath.open('w') as f:
else: out_dict = {'episodes': self._recorder_out_list, 'header': self.unwrapped.params}
self.do_record = False try:
simplejson.dump(out_dict, f, indent=4)
except TypeError:
print('Shit')
def stop(self): if save_occupation_map:
if self.do_record and self.started: a = np.zeros((15, 15))
# self.out_file.unlink(missing_ok=True) for episode in out_dict['episodes']:
with self.filepath.open('w') as f: df = pd.DataFrame([y for x in episode['steps'] for y in x['Agents']])
out_dict = {'episodes': self._recorder_out_list, 'header': self._env_params}
try:
simplejson.dump(out_dict, f, indent=4)
except TypeError:
print('Shit')
if self.occupation_map: b = list(df[['x', 'y']].to_records(index=False))
a = np.zeros((15, 15))
for episode in out_dict['episodes']:
df = pd.DataFrame([y for x in episode['steps'] for y in x['Agents']])
b = list(df[['x', 'y']].to_records(index=False)) np.add.at(a, tuple(zip(*b)), 1)
np.add.at(a, tuple(zip(*b)), 1) # a = np.rot90(a)
import seaborn as sns
from matplotlib import pyplot as plt
hm = sns.heatmap(data=a)
hm.set_title('Very Nice Heatmap')
plt.show()
# a = np.rot90(a) if save_trajectory_map:
import seaborn as sns raise NotImplementedError('This has not yet been implemented.')
from matplotlib import pyplot as plt
hm = sns.heatmap(data=a)
hm.set_title('Very Nice Heatmap')
plt.show()
if self.trajectory_map:
print('Recorder files were dumped to disk, now plotting the occupation map...')
self.closed = True
self.started = False
else:
pass
def _on_step(self) -> bool: def _on_step(self) -> bool:
if self.do_record and self.started: for env_idx, info in enumerate(self.locals.get('infos', [])):
for env_idx, info in enumerate(self.locals.get('infos', [])): self._read_info(env_idx, info)
self.read_info(env_idx, info)
dones = list(enumerate(self.locals.get('dones', [])))
dones.extend(list(enumerate(self.locals.get('done', []))))
for env_idx, done in dones:
self._read_done(env_idx, done)
for env_idx, done in list(
enumerate(self.locals.get('dones', []))) + list(
enumerate(self.locals.get('done', []))):
self.read_done(env_idx, done)
else:
pass
return True return True
def __enter__(self):
self.start(force=True)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.stop()
def _on_training_start(self) -> None:
if self.started:
pass
else:
self.start()
pass
def _on_training_end(self) -> None: def _on_training_end(self) -> None:
if self.closed: pass
pass
else:
self.stop()

View File

@ -1,54 +0,0 @@
from collections import defaultdict
from pathlib import Path
import numpy as np
import pandas as pd
from stable_baselines3.common.callbacks import BaseCallback
from environments.logging.plotting import prepare_plot
class TraningMonitor(BaseCallback):
def __init__(self, filepath, flush_interval=None):
super(TraningMonitor, self).__init__()
self.values = defaultdict(dict)
self.rewards = defaultdict(lambda: 0)
self.filepath = Path(filepath)
self.flush_interval = flush_interval
self.next_flush: int
pass
def _on_training_start(self) -> None:
self.flush_interval = self.flush_interval or (self.locals['total_timesteps'] * 0.1)
self.next_flush = self.flush_interval
def _flush(self):
df = pd.DataFrame.from_dict(self.values, orient='index')
if not self.filepath.exists():
df.to_csv(self.filepath, mode='wb', header=True)
else:
df.to_csv(self.filepath, mode='a', header=False)
def _on_step(self) -> bool:
for idx, done in np.ndenumerate(self.locals.get('dones', [])):
idx = idx[0]
# self.values[self.num_timesteps].update(**{f'reward_env_{idx}': self.locals['rewards'][idx]})
self.rewards[idx] += self.locals['rewards'][idx]
if done:
self.values[self.num_timesteps].update(**{f'acc_epispde_r_env_{idx}': self.rewards[idx]})
self.rewards[idx] = 0
if self.num_timesteps >= self.next_flush and self.values:
self._flush()
self.values = defaultdict(dict)
self.next_flush += self.flush_interval
return True
def on_training_end(self) -> None:
self._flush()
self.values = defaultdict(dict)
# prepare_plot()

View File

@ -1,4 +1,3 @@
from enum import Enum
from typing import NamedTuple, Union from typing import NamedTuple, Union

View File

@ -39,7 +39,9 @@ def prepare_plt(df, hue, style, hue_order):
plt.close('all') plt.close('all')
sns.set(rc={'text.usetex': False}, style='whitegrid') sns.set(rc={'text.usetex': False}, style='whitegrid')
lineplot = sns.lineplot(data=df, x='Episode', y='Score', hue=hue, style=style, lineplot = sns.lineplot(data=df, x='Episode', y='Score', hue=hue, style=style,
ci=95, palette=PALETTE, hue_order=hue_order) ci=95, palette=PALETTE, hue_order=hue_order, )
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
plt.tight_layout()
# lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}') # lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}')
return lineplot return lineplot

View File

@ -8,7 +8,7 @@ from environments import helpers as h
from environments.helpers import Constants as c from environments.helpers import Constants as c
from environments.factory.factory_dirt import DirtFactory from environments.factory.factory_dirt import DirtFactory
from environments.factory.combined_factories import DirtItemFactory from environments.factory.combined_factories import DirtItemFactory
from environments.logging.recorder import RecorderCallback from environments.logging.recorder import EnvRecorder
warnings.filterwarnings('ignore', category=FutureWarning) warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning) warnings.filterwarnings('ignore', category=UserWarning)
@ -16,14 +16,13 @@ warnings.filterwarnings('ignore', category=UserWarning)
if __name__ == '__main__': if __name__ == '__main__':
model_name = 'A2C_ItsDirt' determin = False
run_id = 0 render = True
determin = True
render=False
record = True record = True
seed = 67 seed = 67
n_agents = 1 n_agents = 2
out_path = Path('study_out/e_1_Now_with_doors/no_obs/dirt/A2C_Now_with_doors/0_A2C_Now_with_doors') out_path = Path('study_out/e_1_obs_stack_3_gae_0.25_n_steps_16/seperate_N/dirt/A2C_obs_stack_3_gae_0.25_n_steps_16/0_A2C_obs_stack_3_gae_0.25_n_steps_16')
out_path_2 = Path('study_out/e_1_obs_stack_3_gae_0.25_n_steps_16/seperate_N/dirt/A2C_obs_stack_3_gae_0.25_n_steps_16/1_A2C_obs_stack_3_gae_0.25_n_steps_16')
model_path = out_path model_path = out_path
with (out_path / f'env_params.json').open('r') as f: with (out_path / f'env_params.json').open('r') as f:
@ -33,42 +32,35 @@ if __name__ == '__main__':
env_kwargs['dirt_prop']['max_spawn_amount'] = gain_amount env_kwargs['dirt_prop']['max_spawn_amount'] = gain_amount
del env_kwargs['dirt_prop']['gain_amount'] del env_kwargs['dirt_prop']['gain_amount']
env_kwargs.update(record_episodes=record) env_kwargs.update(record_episodes=record, done_at_collision=True)
this_model = out_path / 'model.zip' this_model = out_path / 'model.zip'
other_model = out_path / 'model.zip'
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name) model_cls = next(val for key, val in h.MODEL_MAP.items() if key in out_path.parent.name)
models = [model_cls.load(this_model) for _ in range(n_agents)] models = [model_cls.load(this_model), model_cls.load(other_model)]
with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json', occupation_map=True, # Init Env
entities=['Agents']) as recorder: with DirtFactory(**env_kwargs) as env:
# Init Env env = EnvRecorder(env)
with DirtFactory(**env_kwargs) as env: obs_shape = env.observation_space.shape
obs_shape = env.observation_space.shape # Evaluation Loop for i in range(n Episodes)
# Evaluation Loop for i in range(n Episodes) for episode in range(50):
recorder.read_params(env.params) env_state = env.reset()
for episode in range(200): rew, done_bool = 0, False
env_state = env.reset() while not done_bool:
rew, done_bool = 0, False if n_agents > 1:
while not done_bool: actions = [model.predict(
if n_agents > 1: np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
actions = [model.predict( deterministic=determin)[0] for j, model in enumerate(models)]
np.stack([env_state[i][j] for i in range(env_state.shape[0])]), else:
deterministic=determin)[0] for j, model in enumerate(models)] actions = models[0].predict(env_state, deterministic=determin)[0]
else: env_state, step_r, done_bool, info_obj = env.step(actions)
actions = models[0].predict(env_state, deterministic=determin)[0]
if False:
if any([agent.pos in [door.pos for door in env.unwrapped[c.DOORS]]
for agent in env.unwrapped[c.AGENT]]):
print('On Door')
env_state, step_r, done_bool, info_obj = env.step(actions)
recorder.read_info(0, info_obj) rew += step_r
rew += step_r if render:
if render: env.render()
env.render() if done_bool:
if done_bool: break
recorder.read_done(0, done_bool)
break
print(f'Factory run {episode} done, reward is:\n {rew}') print(f'Factory run {episode} done, reward is:\n {rew}')
print('all done') print('all done')

View File

@ -18,7 +18,6 @@ except NameError:
import time import time
import simplejson import simplejson
from stable_baselines3.common.vec_env import SubprocVecEnv from stable_baselines3.common.vec_env import SubprocVecEnv
@ -26,13 +25,17 @@ from environments import helpers as h
from environments.factory.factory_dirt import DirtProperties, DirtFactory from environments.factory.factory_dirt import DirtProperties, DirtFactory
from environments.factory.combined_factories import DirtItemFactory from environments.factory.combined_factories import DirtItemFactory
from environments.factory.factory_item import ItemProperties, ItemFactory from environments.factory.factory_item import ItemProperties, ItemFactory
from environments.logging.monitor import MonitorCallback from environments.logging.envmonitor import EnvMonitor
from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
import pickle import pickle
from plotting.compare_runs import compare_seed_runs, compare_model_runs, compare_all_parameter_runs from plotting.compare_runs import compare_seed_runs, compare_model_runs, compare_all_parameter_runs
import pandas as pd import pandas as pd
import seaborn as sns import seaborn as sns
import multiprocessing as mp
# mp.set_start_method("spawn")
""" """
In this studie, we want to explore the macro behaviour of multi agents which are trained on the same task, In this studie, we want to explore the macro behaviour of multi agents which are trained on the same task,
but never saw each other in training. but never saw each other in training.
@ -69,9 +72,10 @@ n_agents = 4
ood_monitor_file = f'e_1_{n_agents}_agents' ood_monitor_file = f'e_1_{n_agents}_agents'
baseline_monitor_file = 'e_1_baseline' baseline_monitor_file = 'e_1_baseline'
from stable_baselines3 import A2C
def policy_model_kwargs(): def policy_model_kwargs():
return dict() return dict(gae_lambda=0.25, n_steps=16, max_grad_norm=0, use_rms_prop=False)
def dqn_model_kwargs(): def dqn_model_kwargs():
@ -102,27 +106,23 @@ def load_model_run_baseline(seed_path, env_to_run):
with next(seed_path.glob('*.json')).open('r') as f: with next(seed_path.glob('*.json')).open('r') as f:
env_kwargs = simplejson.load(f) env_kwargs = simplejson.load(f)
env_kwargs.update(done_at_collision=True) env_kwargs.update(done_at_collision=True)
# Monitor Init # Init Env
with MonitorCallback(filepath=seed_path / f'{baseline_monitor_file}.pick') as monitor: with env_to_run(**env_kwargs) as env_factory:
# Init Env monitored_env_factory = EnvMonitor(env_factory)
with env_to_run(**env_kwargs) as env_factory:
# Evaluation Loop for i in range(n Episodes) # Evaluation Loop for i in range(n Episodes)
for episode in range(100): for episode in range(100):
env_state = env_factory.reset() env_state = monitored_env_factory.reset()
rew, done_bool = 0, False rew, done_bool = 0, False
while not done_bool: while not done_bool:
action = model.predict(env_state, deterministic=True)[0] action = model.predict(env_state, deterministic=True)[0]
env_state, step_r, done_bool, info_obj = env_factory.step(action) env_state, step_r, done_bool, info_obj = monitored_env_factory.step(action)
monitor.read_info(0, info_obj) rew += step_r
rew += step_r if done_bool:
if done_bool: break
monitor.read_done(0, done_bool) print(f'Factory run {episode} done, reward is:\n {rew}')
break monitored_env_factory.save_run(filepath=seed_path / f'{ood_monitor_file}.pick')
print(f'Factory run {episode} done, reward is:\n {rew}')
# Eval monitor outputs are automatically stored by the monitor object
# del model, env_kwargs, env_factory
# import gc
# gc.collect()
def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict):
@ -138,33 +138,31 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict):
n_agents=n_agents, n_agents=n_agents,
done_at_collision=True, done_at_collision=True,
**additional_kwargs_dict.get('post_training_kwargs', {})) **additional_kwargs_dict.get('post_training_kwargs', {}))
# Monitor Init # Init Env
with MonitorCallback(filepath=seed_path / f'{ood_monitor_file}.pick') as monitor: with env_to_run(**env_kwargs) as env_factory:
# Init Env monitored_factory_env = EnvMonitor(env_factory)
with env_to_run(**env_kwargs) as env_factory: # Evaluation Loop for i in range(n Episodes)
# Evaluation Loop for i in range(n Episodes) for episode in range(50):
for episode in range(50): env_state = monitored_factory_env.reset()
env_state = env_factory.reset() rew, done_bool = 0, False
rew, done_bool = 0, False while not done_bool:
while not done_bool: try:
try: actions = [model.predict(
actions = [model.predict( np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
np.stack([env_state[i][j] for i in range(env_state.shape[0])]), deterministic=True)[0] for j, model in enumerate(models)]
deterministic=True)[0] for j, model in enumerate(models)] except ValueError as e:
except ValueError as e: print(e)
print(e) print('Env_Kwargs are:\n')
print('Env_Kwargs are:\n') print(env_kwargs)
print(env_kwargs) print('Path is:\n')
print('Path is:\n') print(seed_path)
print(seed_path) exit()
exit() env_state, step_r, done_bool, info_obj = monitored_factory_env.step(actions)
env_state, step_r, done_bool, info_obj = env_factory.step(actions) rew += step_r
monitor.read_info(0, info_obj) if done_bool:
rew += step_r break
if done_bool: print(f'Factory run {episode} done, reward is:\n {rew}')
monitor.read_done(0, done_bool) monitored_factory_env.save_run(filepath=seed_path / f'{ood_monitor_file}.pick')
break
print(f'Factory run {episode} done, reward is:\n {rew}')
# Eval monitor outputs are automatically stored by the monitor object # Eval monitor outputs are automatically stored by the monitor object
del models, env_kwargs, env_factory del models, env_kwargs, env_factory
import gc import gc
@ -174,27 +172,25 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict):
def start_mp_study_run(envs_map, policies_path): def start_mp_study_run(envs_map, policies_path):
paths = list(y for y in policies_path.iterdir() if y.is_dir() and not (y / f'{ood_monitor_file}.pick').exists()) paths = list(y for y in policies_path.iterdir() if y.is_dir() and not (y / f'{ood_monitor_file}.pick').exists())
if paths: if paths:
import multiprocessing as mp with mp.get_context("spawn").Pool(mp.cpu_count()) as pool:
pool = mp.Pool(mp.cpu_count()) print("Starting MP with: ", pool._processes, " Processes")
print("Starting MP with: ", pool._processes, " Processes") _ = pool.starmap(load_model_run_study,
_ = pool.starmap(load_model_run_study, it.product(paths,
it.product(paths, (envs_map[policies_path.parent.name][0],),
(envs_map[policies_path.parent.name][0],), (observation_modes[policies_path.parent.parent.name],))
(observation_modes[policies_path.parent.parent.name],)) )
)
def start_mp_baseline_run(envs_map, policies_path): def start_mp_baseline_run(envs_map, policies_path):
paths = list(y for y in policies_path.iterdir() if y.is_dir() and paths = list(y for y in policies_path.iterdir() if y.is_dir() and
not (y / f'{baseline_monitor_file}.pick').exists()) not (y / f'{baseline_monitor_file}.pick').exists())
if paths: if paths:
import multiprocessing as mp with mp.get_context("spawn").Pool(mp.cpu_count()) as pool:
pool = mp.Pool(mp.cpu_count()) print("Starting MP with: ", pool._processes, " Processes")
print("Starting MP with: ", pool._processes, " Processes") _ = pool.starmap(load_model_run_baseline,
_ = pool.starmap(load_model_run_baseline, it.product(paths,
it.product(paths, (envs_map[policies_path.parent.name][0],))
(envs_map[policies_path.parent.name][0],)) )
)
if __name__ == '__main__': if __name__ == '__main__':
@ -206,9 +202,10 @@ if __name__ == '__main__':
train_steps = 5e6 train_steps = 5e6
n_seeds = 3 n_seeds = 3
frames_to_stack = 3
# Define a global studi save path # Define a global studi save path
start_time = 'exploring_obs_stack' # int(time.time()) start_time = 'obs_stack_3_gae_0.25_n_steps_16' # int(time.time())
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
# Define Global Env Parameters # Define Global Env Parameters
@ -216,7 +213,7 @@ if __name__ == '__main__':
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,
omit_agent_self=True, omit_agent_self=True,
additional_agent_placeholder=None, additional_agent_placeholder=None,
frames_to_stack=6, frames_to_stack=frames_to_stack,
pomdp_r=2 pomdp_r=2
) )
move_props = MovementProperties(allow_diagonal_movement=True, move_props = MovementProperties(allow_diagonal_movement=True,
@ -234,7 +231,8 @@ if __name__ == '__main__':
level_name='rooms', record_episodes=False, doors_have_area=True, level_name='rooms', record_episodes=False, doors_have_area=True,
verbose=False, verbose=False,
mv_prop=move_props, mv_prop=move_props,
obs_prop=obs_props obs_prop=obs_props,
done_at_collision=True
) )
# Bundle both environments with global kwargs and parameters # Bundle both environments with global kwargs and parameters
@ -250,44 +248,45 @@ if __name__ == '__main__':
# Define parameter versions according with #1,2[1,0,N],3 # Define parameter versions according with #1,2[1,0,N],3
observation_modes = {} observation_modes = {}
observation_modes.update({ if False:
'seperate_1': dict( observation_modes.update({
post_training_kwargs= 'seperate_1': dict(
dict(obs_prop=ObservationProperties( post_training_kwargs=
render_agents=AgentRenderOptions.COMBINED, dict(obs_prop=ObservationProperties(
additional_agent_placeholder=None, render_agents=AgentRenderOptions.COMBINED,
omit_agent_self=True, additional_agent_placeholder=None,
frames_to_stack=3, omit_agent_self=True,
pomdp_r=2) frames_to_stack=frames_to_stack,
), pomdp_r=2)
additional_env_kwargs= ),
dict(obs_prop=ObservationProperties( additional_env_kwargs=
render_agents=AgentRenderOptions.NOT, dict(obs_prop=ObservationProperties(
additional_agent_placeholder=1, render_agents=AgentRenderOptions.NOT,
omit_agent_self=True, additional_agent_placeholder=1,
frames_to_stack=3, omit_agent_self=True,
pomdp_r=2) frames_to_stack=frames_to_stack,
) pomdp_r=2)
)}) )
observation_modes.update({ )})
'seperate_0': dict( observation_modes.update({
post_training_kwargs= 'seperate_0': dict(
dict(obs_prop=ObservationProperties( post_training_kwargs=
render_agents=AgentRenderOptions.COMBINED, dict(obs_prop=ObservationProperties(
additional_agent_placeholder=None, render_agents=AgentRenderOptions.COMBINED,
omit_agent_self=True, additional_agent_placeholder=None,
frames_to_stack=3, omit_agent_self=True,
pomdp_r=2) frames_to_stack=frames_to_stack,
), pomdp_r=2)
additional_env_kwargs= ),
dict(obs_prop=ObservationProperties( additional_env_kwargs=
render_agents=AgentRenderOptions.NOT, dict(obs_prop=ObservationProperties(
additional_agent_placeholder=0, render_agents=AgentRenderOptions.NOT,
omit_agent_self=True, additional_agent_placeholder=0,
frames_to_stack=3, omit_agent_self=True,
pomdp_r=2) frames_to_stack=frames_to_stack,
) pomdp_r=2)
)}) )
)})
observation_modes.update({ observation_modes.update({
'seperate_N': dict( 'seperate_N': dict(
post_training_kwargs= post_training_kwargs=
@ -295,7 +294,7 @@ if __name__ == '__main__':
render_agents=AgentRenderOptions.COMBINED, render_agents=AgentRenderOptions.COMBINED,
additional_agent_placeholder=None, additional_agent_placeholder=None,
omit_agent_self=True, omit_agent_self=True,
frames_to_stack=3, frames_to_stack=frames_to_stack,
pomdp_r=2) pomdp_r=2)
), ),
additional_env_kwargs= additional_env_kwargs=
@ -303,7 +302,7 @@ if __name__ == '__main__':
render_agents=AgentRenderOptions.NOT, render_agents=AgentRenderOptions.NOT,
additional_agent_placeholder='N', additional_agent_placeholder='N',
omit_agent_self=True, omit_agent_self=True,
frames_to_stack=3, frames_to_stack=frames_to_stack,
pomdp_r=2) pomdp_r=2)
) )
)}) )})
@ -314,7 +313,7 @@ if __name__ == '__main__':
render_agents=AgentRenderOptions.LEVEL, render_agents=AgentRenderOptions.LEVEL,
omit_agent_self=True, omit_agent_self=True,
additional_agent_placeholder=None, additional_agent_placeholder=None,
frames_to_stack=3, frames_to_stack=frames_to_stack,
pomdp_r=2) pomdp_r=2)
) )
)}) )})
@ -326,7 +325,7 @@ if __name__ == '__main__':
render_agents=AgentRenderOptions.NOT, render_agents=AgentRenderOptions.NOT,
additional_agent_placeholder=None, additional_agent_placeholder=None,
omit_agent_self=True, omit_agent_self=True,
frames_to_stack=3, frames_to_stack=frames_to_stack,
pomdp_r=2) pomdp_r=2)
) )
) )
@ -355,9 +354,6 @@ if __name__ == '__main__':
continue continue
seed_path.mkdir(parents=True, exist_ok=True) seed_path.mkdir(parents=True, exist_ok=True)
# Monitor Init
callbacks = [MonitorCallback(seed_path / 'monitor.pick')]
# Env Init & Model kwargs definition # Env Init & Model kwargs definition
if model_cls.__name__ in ["PPO", "A2C"]: if model_cls.__name__ in ["PPO", "A2C"]:
# env_factory = env_class(**env_kwargs) # env_factory = env_class(**env_kwargs)
@ -378,6 +374,9 @@ if __name__ == '__main__':
except AttributeError: except AttributeError:
env_factory.save_params(param_path) env_factory.save_params(param_path)
# EnvMonitor Init
callbacks = [EnvMonitor(env_factory)]
# Model Init # Model Init
model = model_cls("MlpPolicy", env_factory, model = model_cls("MlpPolicy", env_factory,
verbose=1, seed=seed, device='cpu', verbose=1, seed=seed, device='cpu',
@ -390,6 +389,9 @@ if __name__ == '__main__':
save_path = seed_path / f'model.zip' save_path = seed_path / f'model.zip'
model.save(save_path) model.save(save_path)
# Monitor Save
callbacks[0].save_run(seed_path / 'monitor.pick')
# Better be save then sorry: Clean up! # Better be save then sorry: Clean up!
del env_factory, model del env_factory, model
import gc import gc
@ -500,13 +502,14 @@ if __name__ == '__main__':
df['failed_cleanup'] = df.loc[:, df.columns.str.contains("]_failed_dirt_cleanup")].sum(1) df['failed_cleanup'] = df.loc[:, df.columns.str.contains("]_failed_dirt_cleanup")].sum(1)
df['coll_lvl'] = df.loc[:, df.columns.str.contains("]_vs_LEVEL")].sum(1) df['coll_lvl'] = df.loc[:, df.columns.str.contains("]_vs_LEVEL")].sum(1)
df['coll_agent'] = df.loc[:, df.columns.str.contains("]_vs_Agent")].sum(1) / 2 df['coll_agent'] = df.loc[:, df.columns.str.contains("]_vs_Agent")].sum(1) / 2
# df['collisions'] = df['coll_lvl'] + df['coll_agent'] # df['`collis`ions'] = df['coll_lvl'] + df['coll_agent']
value_vars = ['pick_up', 'drop_off', 'failed_item_action', 'failed_cleanup', value_vars = ['pick_up', 'drop_off', 'failed_item_action', 'failed_cleanup',
'coll_lvl', 'coll_agent', 'dirt_cleaned'] 'coll_lvl', 'coll_agent', 'dirt_cleaned']
df_grouped = df.groupby(id_cols + ['seed'] df_grouped = df.groupby(id_cols + ['seed']
).agg({key: 'sum' if "Agent" in key else 'mean' for key in df.columns # 'sum' if "agent" in key else 'mean'
).agg({key: 'sum' for key in df.columns
if key not in (id_cols + ['seed'])}) if key not in (id_cols + ['seed'])})
df_melted = df_grouped.reset_index().melt(id_vars=id_cols, df_melted = df_grouped.reset_index().melt(id_vars=id_cols,
value_vars=value_vars, # 'step_reward', value_vars=value_vars, # 'step_reward',