Occupation Map

This commit is contained in:
Steffen Illium 2021-11-18 18:32:42 +01:00
parent 65056b2c61
commit 59484f49c9
5 changed files with 119 additions and 62 deletions

View File

@ -50,6 +50,11 @@ class BaseFactory(gym.Env):
def movement_actions(self): def movement_actions(self):
return self._actions.movement_actions return self._actions.movement_actions
@property
def params(self) -> dict:
d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')}
return d
def __enter__(self): def __enter__(self):
return self if self.obs_prop.frames_to_stack == 0 else \ return self if self.obs_prop.frames_to_stack == 0 else \
FrameStack(self, self.obs_prop.frames_to_stack) FrameStack(self, self.obs_prop.frames_to_stack)
@ -576,8 +581,7 @@ class BaseFactory(gym.Env):
def save_params(self, filepath: Path): def save_params(self, filepath: Path):
# noinspection PyProtectedMember # noinspection PyProtectedMember
# d = {key: val._asdict() if hasattr(val, '_asdict') else val for key, val in self.__dict__.items() d = self.params
d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')}
filepath.parent.mkdir(parents=True, exist_ok=True) filepath.parent.mkdir(parents=True, exist_ok=True)
with filepath.open('w') as f: with filepath.open('w') as f:
simplejson.dump(d, f, indent=4, namedtuple_as_object=True) simplejson.dump(d, f, indent=4, namedtuple_as_object=True)
@ -587,6 +591,7 @@ class BaseFactory(gym.Env):
for entity_group in self._entities: for entity_group in self._entities:
summary.update({f'{REC_TAC}{entity_group.name}': entity_group.summarize_states(n_steps=self._steps)}) summary.update({f'{REC_TAC}{entity_group.name}': entity_group.summarize_states(n_steps=self._steps)})
return summary return summary
def print(self, string): def print(self, string):

View File

@ -239,6 +239,11 @@ class DirtFactory(BaseFactory):
if agent.temp_action == CLEAN_UP_ACTION: if agent.temp_action == CLEAN_UP_ACTION:
if agent.temp_valid: if agent.temp_valid:
# Reward if pickup succeds,
# 0.5 on every pickup
reward += 0.5
if self.dirt_prop.done_when_clean and (len(self[c.DIRT]) == 0):
# 0.5 additional reward for the very last pickup
reward += 0.5 reward += 0.5
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.') self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
info_dict.update(dirt_cleaned=1) info_dict.update(dirt_cleaned=1)

View File

@ -3,6 +3,8 @@ from collections import defaultdict
from pathlib import Path from pathlib import Path
from typing import Union from typing import Union
import numpy as np
import pandas as pd
import simplejson import simplejson
from stable_baselines3.common.callbacks import BaseCallback from stable_baselines3.common.callbacks import BaseCallback
@ -10,21 +12,39 @@ from environments.factory.base.base_factory import REC_TAC
# noinspection PyAttributeOutsideInit # noinspection PyAttributeOutsideInit
from environments.helpers import Constants as c
class RecorderCallback(BaseCallback): class RecorderCallback(BaseCallback):
def __init__(self, filepath: Union[str, Path], occupation_map: bool = False, trajectory_map: bool = False): def __init__(self, filepath: Union[str, Path], occupation_map: bool = False, trajectory_map: bool = False,
entities='all'):
super(RecorderCallback, self).__init__() super(RecorderCallback, self).__init__()
self.trajectory_map = trajectory_map self.trajectory_map = trajectory_map
self.occupation_map = occupation_map self.occupation_map = occupation_map
self.filepath = Path(filepath) self.filepath = Path(filepath)
self._recorder_dict = defaultdict(list) self._recorder_dict = defaultdict(list)
self._recorder_out_list = list() self._recorder_out_list = list()
self._env_params = None
self.do_record: bool self.do_record: bool
if isinstance(entities, str):
if entities.lower() == 'all':
self._entities = None
else:
self._entities = [entities]
else:
self._entities = entities
self.started = False self.started = False
self.closed = False self.closed = False
def read_params(self, params):
self._env_params = params
def read_info(self, env_idx, info: dict): def read_info(self, env_idx, info: dict):
if info_dict := {key.replace(REC_TAC, ''): val for key, val in info.items() if key.startswith(f'{REC_TAC}')}: if info_dict := {key.replace(REC_TAC, ''): val for key, val in info.items() if key.startswith(f'{REC_TAC}')}:
if self._entities:
info_dict = {k: v for k, v in info_dict.items() if k in self._entities}
info_dict.update(episode=(self.num_timesteps + env_idx)) info_dict.update(episode=(self.num_timesteps + env_idx))
self._recorder_dict[env_idx].append(info_dict) self._recorder_dict[env_idx].append(info_dict)
else: else:
@ -51,14 +71,27 @@ class RecorderCallback(BaseCallback):
if self.do_record and self.started: if self.do_record and self.started:
# self.out_file.unlink(missing_ok=True) # self.out_file.unlink(missing_ok=True)
with self.filepath.open('w') as f: with self.filepath.open('w') as f:
out_dict = {'episodes': self._recorder_out_list} out_dict = {'episodes': self._recorder_out_list, 'header': self._env_params}
try: try:
simplejson.dump(out_dict, f, indent=4) simplejson.dump(out_dict, f, indent=4)
except TypeError: except TypeError:
print('Shit') print('Shit')
if self.occupation_map: if self.occupation_map:
print('Recorder files were dumped to disk, now plotting the occupation map...') a = np.zeros((15, 15))
for episode in out_dict['episodes']:
df = pd.DataFrame([y for x in episode['steps'] for y in x['Agents']])
b = list(df[['x', 'y']].to_records(index=False))
np.add.at(a, tuple(zip(*b)), 1)
# a = np.rot90(a)
import seaborn as sns
from matplotlib import pyplot as plt
hm = sns.heatmap(data=a)
hm.set_title('Very Nice Heatmap')
plt.show()
if self.trajectory_map: if self.trajectory_map:
print('Recorder files were dumped to disk, now plotting the occupation map...') print('Recorder files were dumped to disk, now plotting the occupation map...')

View File

@ -19,6 +19,8 @@ if __name__ == '__main__':
model_name = 'A2C_ItsDirt' model_name = 'A2C_ItsDirt'
run_id = 0 run_id = 0
determin = True determin = True
render=False
record = True
seed = 67 seed = 67
n_agents = 1 n_agents = 1
out_path = Path('study_out/e_1_Now_with_doors/no_obs/dirt/A2C_Now_with_doors/0_A2C_Now_with_doors') out_path = Path('study_out/e_1_Now_with_doors/no_obs/dirt/A2C_Now_with_doors/0_A2C_Now_with_doors')
@ -31,19 +33,21 @@ if __name__ == '__main__':
env_kwargs['dirt_prop']['max_spawn_amount'] = gain_amount env_kwargs['dirt_prop']['max_spawn_amount'] = gain_amount
del env_kwargs['dirt_prop']['gain_amount'] del env_kwargs['dirt_prop']['gain_amount']
env_kwargs.update(record_episodes=False) env_kwargs.update(record_episodes=record)
this_model = out_path / 'model.zip' this_model = out_path / 'model.zip'
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name) model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name)
models = [model_cls.load(this_model) for _ in range(n_agents)] models = [model_cls.load(this_model) for _ in range(n_agents)]
with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json') as recorder: with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json', occupation_map=True,
entities=['Agents']) as recorder:
# Init Env # Init Env
with DirtFactory(**env_kwargs) as env: with DirtFactory(**env_kwargs) as env:
obs_shape = env.observation_space.shape obs_shape = env.observation_space.shape
# Evaluation Loop for i in range(n Episodes) # Evaluation Loop for i in range(n Episodes)
for episode in range(5): recorder.read_params(env.params)
for episode in range(200):
env_state = env.reset() env_state = env.reset()
rew, done_bool = 0, False rew, done_bool = 0, False
while not done_bool: while not done_bool:
@ -53,6 +57,7 @@ if __name__ == '__main__':
deterministic=determin)[0] for j, model in enumerate(models)] deterministic=determin)[0] for j, model in enumerate(models)]
else: else:
actions = models[0].predict(env_state, deterministic=determin)[0] actions = models[0].predict(env_state, deterministic=determin)[0]
if False:
if any([agent.pos in [door.pos for door in env.unwrapped[c.DOORS]] if any([agent.pos in [door.pos for door in env.unwrapped[c.DOORS]]
for agent in env.unwrapped[c.AGENT]]): for agent in env.unwrapped[c.AGENT]]):
print('On Door') print('On Door')
@ -60,6 +65,7 @@ if __name__ == '__main__':
recorder.read_info(0, info_obj) recorder.read_info(0, info_obj)
rew += step_r rew += step_r
if render:
env.render() env.render()
if done_bool: if done_bool:
recorder.read_done(0, done_bool) recorder.read_done(0, done_bool)

View File

@ -66,8 +66,8 @@ There are further distinctions to be made:
""" """
n_agents = 4 n_agents = 4
ood_monitor_file = f'e_1_monitor_{n_agents}_agents.pick' ood_monitor_file = f'e_1_{n_agents}_agents'
baseline_monitor_file = 'e_1_baseline_monitor.pick' baseline_monitor_file = 'e_1_baseline'
def policy_model_kwargs(): def policy_model_kwargs():
@ -103,7 +103,7 @@ def load_model_run_baseline(seed_path, env_to_run):
env_kwargs = simplejson.load(f) env_kwargs = simplejson.load(f)
env_kwargs.update(done_at_collision=True) env_kwargs.update(done_at_collision=True)
# Monitor Init # Monitor Init
with MonitorCallback(filepath=seed_path / baseline_monitor_file) as monitor: with MonitorCallback(filepath=seed_path / f'{baseline_monitor_file}.pick') as monitor:
# Init Env # Init Env
with env_to_run(**env_kwargs) as env_factory: with env_to_run(**env_kwargs) as env_factory:
# Evaluation Loop for i in range(n Episodes) # Evaluation Loop for i in range(n Episodes)
@ -139,7 +139,7 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict):
done_at_collision=True, done_at_collision=True,
**additional_kwargs_dict.get('post_training_kwargs', {})) **additional_kwargs_dict.get('post_training_kwargs', {}))
# Monitor Init # Monitor Init
with MonitorCallback(filepath=seed_path / ood_monitor_file) as monitor: with MonitorCallback(filepath=seed_path / f'{ood_monitor_file}.pick') as monitor:
# Init Env # Init Env
with env_to_run(**env_kwargs) as env_factory: with env_to_run(**env_kwargs) as env_factory:
# Evaluation Loop for i in range(n Episodes) # Evaluation Loop for i in range(n Episodes)
@ -172,7 +172,7 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict):
def start_mp_study_run(envs_map, policies_path): def start_mp_study_run(envs_map, policies_path):
paths = list(y for y in policies_path.iterdir() if y.is_dir() and not (y / ood_monitor_file).exists()) paths = list(y for y in policies_path.iterdir() if y.is_dir() and not (y / f'{ood_monitor_file}.pick').exists())
if paths: if paths:
import multiprocessing as mp import multiprocessing as mp
pool = mp.Pool(mp.cpu_count()) pool = mp.Pool(mp.cpu_count())
@ -185,7 +185,8 @@ def start_mp_study_run(envs_map, policies_path):
def start_mp_baseline_run(envs_map, policies_path): def start_mp_baseline_run(envs_map, policies_path):
paths = list(y for y in policies_path.iterdir() if y.is_dir() and not (y / baseline_monitor_file).exists()) paths = list(y for y in policies_path.iterdir() if y.is_dir() and
not (y / f'{baseline_monitor_file}.pick').exists())
if paths: if paths:
import multiprocessing as mp import multiprocessing as mp
pool = mp.Pool(mp.cpu_count()) pool = mp.Pool(mp.cpu_count())
@ -197,11 +198,17 @@ def start_mp_baseline_run(envs_map, policies_path):
if __name__ == '__main__': if __name__ == '__main__':
# What to do:
train = True
baseline_run = True
ood_run = True
plotting = True
train_steps = 5e6 train_steps = 5e6
n_seeds = 3 n_seeds = 3
# Define a global studi save path # Define a global studi save path
start_time = 'Now_with_doors' # int(time.time()) start_time = 'exploring_obs_stack' # int(time.time())
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
# Define Global Env Parameters # Define Global Env Parameters
@ -209,7 +216,7 @@ if __name__ == '__main__':
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,
omit_agent_self=True, omit_agent_self=True,
additional_agent_placeholder=None, additional_agent_placeholder=None,
frames_to_stack=3, frames_to_stack=6,
pomdp_r=2 pomdp_r=2
) )
move_props = MovementProperties(allow_diagonal_movement=True, move_props = MovementProperties(allow_diagonal_movement=True,
@ -327,7 +334,7 @@ if __name__ == '__main__':
# Train starts here ############################################################ # Train starts here ############################################################
# Build Major Loop parameters, parameter versions, Env Classes and models # Build Major Loop parameters, parameter versions, Env Classes and models
if False: if train:
for obs_mode in observation_modes.keys(): for obs_mode in observation_modes.keys():
for env_name in env_names: for env_name in env_names:
for model_cls in [h.MODEL_MAP['A2C']]: for model_cls in [h.MODEL_MAP['A2C']]:
@ -417,7 +424,7 @@ if __name__ == '__main__':
# Evaluation starts here ##################################################### # Evaluation starts here #####################################################
# First Iterate over every model and monitor "as trained" # First Iterate over every model and monitor "as trained"
if True: if baseline_run:
print('Start Baseline Tracking') print('Start Baseline Tracking')
for obs_mode in observation_modes: for obs_mode in observation_modes:
obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == obs_mode) obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == obs_mode)
@ -432,7 +439,7 @@ if __name__ == '__main__':
print('Baseline Tracking done') print('Baseline Tracking done')
# Then iterate over every model and monitor "ood behavior" - "is it ood?" # Then iterate over every model and monitor "ood behavior" - "is it ood?"
if True: if ood_run:
print('Start OOD Tracking') print('Start OOD Tracking')
for obs_mode in observation_modes: for obs_mode in observation_modes:
obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == obs_mode) obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == obs_mode)
@ -449,17 +456,18 @@ if __name__ == '__main__':
print('OOD Tracking Done') print('OOD Tracking Done')
# Plotting # Plotting
if True: if plotting:
# TODO: Plotting # TODO: Plotting
print('Start Plotting') print('Start Plotting')
for observation_folder in (x for x in study_root_path.iterdir() if x.is_dir()):
df_list = list() df_list = list()
for observation_folder in (x for x in study_root_path.iterdir() if x.is_dir()):
for env_folder in (x for x in observation_folder.iterdir() if x.is_dir()): for env_folder in (x for x in observation_folder.iterdir() if x.is_dir()):
for model_folder in (x for x in env_folder.iterdir() if x.is_dir()): for model_folder in (x for x in env_folder.iterdir() if x.is_dir()):
# Gather per seed results in this list # Gather per seed results in this list
for seed_folder in (x for x in model_folder.iterdir() if x.is_dir()): for seed_folder in (x for x in model_folder.iterdir() if x.is_dir()):
for monitor_file in [baseline_monitor_file, ood_monitor_file]: for monitor_file in [f'{baseline_monitor_file}.pick', f'{ood_monitor_file}.pick']:
with (seed_folder / monitor_file).open('rb') as f: with (seed_folder / monitor_file).open('rb') as f:
monitor_df = pickle.load(f) monitor_df = pickle.load(f)
@ -480,7 +488,7 @@ if __name__ == '__main__':
df = pd.concat(df_list, ignore_index=True) df = pd.concat(df_list, ignore_index=True)
df = df.fillna(0) df = df.fillna(0)
for env_name in env_names:
for id_col in id_cols: for id_col in id_cols:
df[id_col] = df[id_col].astype(str) df[id_col] = df[id_col].astype(str)
@ -509,14 +517,14 @@ if __name__ == '__main__':
# Plotting # Plotting
# fig, ax = plt.subplots(figsize=(11.7, 8.27)) # fig, ax = plt.subplots(figsize=(11.7, 8.27))
c = sns.catplot(data=df_melted[df_melted['obs_mode'] == observation_folder.name], c = sns.catplot(data=df_melted[df_melted['env'] == env_name],
x='Measurement', hue='monitor', row='model', col='env', y='Score', x='Measurement', hue='monitor', row='model', col='obs_mode', y='Score',
sharey=False, kind="box", height=4, aspect=.7, legend_out=False, legend=False, sharey=True, kind="box", height=4, aspect=.7, legend_out=False, legend=False,
showfliers=False) showfliers=False)
c.set_xticklabels(rotation=65, horizontalalignment='right') c.set_xticklabels(rotation=65, horizontalalignment='right')
# c.fig.subplots_adjust(top=0.9) # adjust the Figure in rp # c.fig.subplots_adjust(top=0.9) # adjust the Figure in rp
c.fig.suptitle(f"Cat plot for {observation_folder.name}") c.fig.suptitle(f"Cat plot for {env_name}")
# plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) # plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.tight_layout() plt.tight_layout()
plt.savefig(study_root_path / f'results_{n_agents}_agents_{observation_folder.name}.png') plt.savefig(study_root_path / f'results_{n_agents}_agents_{env_name}.png')
pass pass