new dirt paradigm -> clean everything up

This commit is contained in:
Steffen Illium
2021-10-27 18:47:57 +02:00
parent 35eae72a8d
commit b5c6105b7b
9 changed files with 210 additions and 114 deletions

View File

@ -1,5 +1,6 @@
import abc import abc
import time import time
from collections import defaultdict
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
from typing import List, Union, Iterable, Dict from typing import List, Union, Iterable, Dict
@ -230,8 +231,9 @@ class BaseFactory(gym.Env):
del this_collisions[i] del this_collisions[i]
guest.temp_collisions = this_collisions guest.temp_collisions = this_collisions
if self.done_at_collision and tiles_with_collisions: done = self.done_at_collision and tiles_with_collisions
done = True
done = done or self.check_additional_done()
# Step the door close intervall # Step the door close intervall
if self.parse_doors: if self.parse_doors:
@ -440,48 +442,61 @@ class BaseFactory(gym.Env):
def calculate_reward(self) -> (int, dict): def calculate_reward(self) -> (int, dict):
# Returns: Reward, Info # Returns: Reward, Info
info_dict = dict() per_agent_info_dict = defaultdict(dict)
reward = 0 reward = 0
for agent in self[c.AGENT]: for agent in self[c.AGENT]:
if self._actions.is_moving_action(agent.temp_action): if self._actions.is_moving_action(agent.temp_action):
if agent.temp_valid: if agent.temp_valid:
# info_dict.update(movement=1) # info_dict.update(movement=1)
# info_dict.update({f'{agent.name}_failed_action': 1})
# reward += 0.00 # reward += 0.00
pass pass
else: else:
# self.print('collision')
reward -= 0.01 reward -= 0.01
self.print(f'{agent.name} just hit the wall at {agent.pos}.') self.print(f'{agent.name} just hit the wall at {agent.pos}.')
info_dict.update({f'{agent.name}_vs_LEVEL': 1}) per_agent_info_dict[agent.name].update({f'{agent.name}_vs_LEVEL': 1})
elif h.EnvActions.USE_DOOR == agent.temp_action: elif h.EnvActions.USE_DOOR == agent.temp_action:
if agent.temp_valid: if agent.temp_valid:
# reward += 0.00 # reward += 0.00
self.print(f'{agent.name} did just use the door at {agent.pos}.') self.print(f'{agent.name} did just use the door at {agent.pos}.')
info_dict.update(door_used=1) per_agent_info_dict[agent.name].update(door_used=1)
else: else:
# reward -= 0.00 # reward -= 0.00
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.') self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1}) per_agent_info_dict[agent.name].update({f'{agent.name}_failed_door_open': 1})
info_dict.update({f'{agent.name}_failed_door_open': 1})
elif h.EnvActions.NOOP == agent.temp_action: elif h.EnvActions.NOOP == agent.temp_action:
info_dict.update(no_op=1) per_agent_info_dict[agent.name].update(no_op=1)
# reward -= 0.00 # reward -= 0.00
# Monitor Notes
if agent.temp_valid:
per_agent_info_dict[agent.name].update(valid_action=1)
per_agent_info_dict[agent.name].update({f'{agent.name}_valid_action': 1})
else:
per_agent_info_dict[agent.name].update(failed_action=1)
per_agent_info_dict[agent.name].update({f'{agent.name}_failed_action': 1})
additional_reward, additional_info_dict = self.calculate_additional_reward(agent) additional_reward, additional_info_dict = self.calculate_additional_reward(agent)
reward += additional_reward reward += additional_reward
info_dict.update(additional_info_dict) per_agent_info_dict[agent.name].update(additional_info_dict)
if agent.temp_collisions: if agent.temp_collisions:
self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}') self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
per_agent_info_dict[agent.name].update(collisions=1)
for other_agent in agent.temp_collisions: for other_agent in agent.temp_collisions:
info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1}) per_agent_info_dict[agent.name].update({f'{agent.name}_vs_{other_agent.name}': 1})
# Combine the per_agent_info_dict:
combined_info_dict = defaultdict(lambda: 0)
for info_dict in per_agent_info_dict.values():
for key, value in info_dict.items():
combined_info_dict[key] += value
combined_info_dict = dict(combined_info_dict)
self.print(f"reward is {reward}") self.print(f"reward is {reward}")
return reward, info_dict return reward, combined_info_dict
def render(self, mode='human'): def render(self, mode='human'):
if not self._renderer: # lazy init if not self._renderer: # lazy init
@ -565,6 +580,10 @@ class BaseFactory(gym.Env):
def do_additional_actions(self, agent: Agent, action: Action) -> Union[None, c]: def do_additional_actions(self, agent: Agent, action: Action) -> Union[None, c]:
return None return None
@abc.abstractmethod
def check_additional_done(self) -> bool:
return False
@abc.abstractmethod @abc.abstractmethod
def calculate_additional_reward(self, agent: Agent) -> (int, dict): def calculate_additional_reward(self, agent: Agent) -> (int, dict):
return 0, {} return 0, {}

View File

@ -20,14 +20,17 @@ CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP
class DirtProperties(NamedTuple): class DirtProperties(NamedTuple):
clean_amount: int = 1 # How much does the robot clean with one actions. initial_dirt_ratio: float = 0.3 # On INIT, on max how much tiles does the dirt spawn in percent.
max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent. initial_dirt_spawn_r_var: float = 0.05 # How much does the dirt spawn amount vary?
gain_amount: float = 0.3 # How much dirt does spawn per tile. clean_amount: float = 1 # How much does the robot clean with one actions.
spawn_frequency: int = 5 # Spawn Frequency in Steps. max_spawn_ratio: float = 0.20 # On max how much tiles does the dirt spawn in percent.
max_local_amount: int = 2 # Max dirt amount per tile. max_spawn_amount: float = 0.3 # How much dirt does spawn per tile at max.
max_global_amount: int = 20 # Max dirt amount in the whole environment. spawn_frequency: int = 0 # Spawn Frequency in Steps.
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place. max_local_amount: int = 2 # Max dirt amount per tile.
agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment. max_global_amount: int = 20 # Max dirt amount in the whole environment.
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place.
agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment.
done_when_clean = True
class Dirt(Entity): class Dirt(Entity):
@ -91,10 +94,10 @@ class DirtRegister(MovingEntityObjectRegister):
if not self.amount > self.dirt_properties.max_global_amount: if not self.amount > self.dirt_properties.max_global_amount:
dirt = self.by_pos(tile.pos) dirt = self.by_pos(tile.pos)
if dirt is None: if dirt is None:
dirt = Dirt(tile, amount=self.dirt_properties.gain_amount) dirt = Dirt(tile, amount=self.dirt_properties.max_spawn_amount)
self.register_item(dirt) self.register_item(dirt)
else: else:
new_value = dirt.amount + self.dirt_properties.gain_amount new_value = dirt.amount + self.dirt_properties.max_spawn_amount
dirt.set_new_amount(min(new_value, self.dirt_properties.max_local_amount)) dirt.set_new_amount(min(new_value, self.dirt_properties.max_local_amount))
else: else:
return c.NOT_VALID return c.NOT_VALID
@ -160,12 +163,17 @@ class DirtFactory(BaseFactory):
else: else:
return c.NOT_VALID return c.NOT_VALID
def trigger_dirt_spawn(self): def trigger_dirt_spawn(self, initial_spawn=False):
dirt_rng = self._dirt_rng
free_for_dirt = [x for x in self[c.FLOOR] free_for_dirt = [x for x in self[c.FLOOR]
if len(x.guests) == 0 or (len(x.guests) == 1 and isinstance(next(y for y in x.guests), Dirt)) if len(x.guests) == 0 or (len(x.guests) == 1 and isinstance(next(y for y in x.guests), Dirt))
] ]
self._dirt_rng.shuffle(free_for_dirt) self._dirt_rng.shuffle(free_for_dirt)
new_spawn = self._dirt_rng.uniform(0, self.dirt_properties.max_spawn_ratio) if initial_spawn:
var = self.dirt_properties.initial_dirt_spawn_r_var
new_spawn = self.dirt_properties.initial_dirt_ratio + dirt_rng.uniform(-var, var)
else:
new_spawn = dirt_rng.uniform(0, self.dirt_properties.max_spawn_ratio)
n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt))) n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt)))
self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles]) self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles])
@ -184,8 +192,9 @@ class DirtFactory(BaseFactory):
if self[c.DIRT].spawn_dirt(agent.tile): if self[c.DIRT].spawn_dirt(agent.tile):
new_pos_dirt = self[c.DIRT].by_pos(agent.pos) new_pos_dirt = self[c.DIRT].by_pos(agent.pos)
new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt)) new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt))
if self._next_dirt_spawn < 0:
if not self._next_dirt_spawn: pass # No Dirt Spawn
elif not self._next_dirt_spawn:
self.trigger_dirt_spawn() self.trigger_dirt_spawn()
self._next_dirt_spawn = self.dirt_properties.spawn_frequency self._next_dirt_spawn = self.dirt_properties.spawn_frequency
else: else:
@ -208,8 +217,13 @@ class DirtFactory(BaseFactory):
def do_additional_reset(self) -> None: def do_additional_reset(self) -> None:
super().do_additional_reset() super().do_additional_reset()
self.trigger_dirt_spawn() self.trigger_dirt_spawn(initial_spawn=True)
self._next_dirt_spawn = self.dirt_properties.spawn_frequency self._next_dirt_spawn = self.dirt_properties.spawn_frequency if self.dirt_properties.spawn_frequency else -1
def check_additional_done(self):
super_done = super().check_additional_done()
done = self.dirt_properties.done_when_clean and (len(self[c.DIRT]) == 0)
return super_done or done
def calculate_additional_reward(self, agent: Agent) -> (int, dict): def calculate_additional_reward(self, agent: Agent) -> (int, dict):
reward, info_dict = super().calculate_additional_reward(agent) reward, info_dict = super().calculate_additional_reward(agent)
@ -233,9 +247,8 @@ class DirtFactory(BaseFactory):
else: else:
reward -= 0.01 reward -= 0.01
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.') self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1}) info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
info_dict.update(failed_dirt_clean=1)
# Potential based rewards -> # Potential based rewards ->
# track the last reward , minus the current reward = potential # track the last reward , minus the current reward = potential
@ -243,12 +256,12 @@ class DirtFactory(BaseFactory):
if __name__ == '__main__': if __name__ == '__main__':
render = False render = True
dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0.0) dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0.0)
move_props = {'allow_square_movement': True, move_props = {'allow_square_movement': True,
'allow_diagonal_movement': False, 'allow_diagonal_movement': False,
'allow_no_op': False} #MovementProperties(True, True, False) 'allow_no_op': False} #MovementProperties(True, True, False)
with RecorderCallback(filepath=Path('debug_out') / f'recorder_xxxx.json', occupation_map=False, with RecorderCallback(filepath=Path('debug_out') / f'recorder_xxxx.json', occupation_map=False,
trajectory_map=False) as recorder: trajectory_map=False) as recorder:
@ -272,12 +285,12 @@ if __name__ == '__main__':
r = 0 r = 0
for agent_i_action in random_actions: for agent_i_action in random_actions:
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
recorder.read_info(0, info_obj) #recorder.read_info(0, info_obj)
r += step_r r += step_r
if render: if render:
factory.render() factory.render()
if done_bool: if done_bool:
recorder.read_done(0, done_bool) # recorder.read_done(0, done_bool)
break break
print(f'Factory run {epoch} done, reward is:\n {r}') print(f'Factory run {epoch} done, reward is:\n {r}')
pass pass

View File

@ -25,7 +25,7 @@ if __name__ == '__main__':
allow_square_movement=True, allow_square_movement=True,
allow_no_op=False) allow_no_op=False)
render = False render = True
factory = DirtItemFactory(n_agents=1, done_at_collision=False, frames_to_stack=0, factory = DirtItemFactory(n_agents=1, done_at_collision=False, frames_to_stack=0,
level_name='rooms', max_steps=200, combin_agent_obs=True, level_name='rooms', max_steps=200, combin_agent_obs=True,
@ -46,12 +46,12 @@ if __name__ == '__main__':
r = 0 r = 0
for agent_i_action in random_actions: for agent_i_action in random_actions:
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
recorder.read_info(0, info_obj) # recorder.read_info(0, info_obj)
r += step_r r += step_r
if render: if render:
factory.render() factory.render()
if done_bool: if done_bool:
recorder.read_done(0, done_bool) # recorder.read_done(0, done_bool)
break break
print(f'Factory run {epoch} done, reward is:\n {r}') print(f'Factory run {epoch} done, reward is:\n {r}')
pass pass

View File

@ -318,17 +318,26 @@ class ItemFactory(BaseFactory):
if h.EnvActions.ITEM_ACTION == agent.temp_action: if h.EnvActions.ITEM_ACTION == agent.temp_action:
if agent.temp_valid: if agent.temp_valid:
if drop_off := self[c.DROP_OFF].by_pos(agent.pos): if drop_off := self[c.DROP_OFF].by_pos(agent.pos):
info_dict.update({f'{agent.name}_item_dropoff': 1}) info_dict.update({f'{agent.name}_item_drop_off': 1})
info_dict.update(item_drop_off=1)
self.print(f'{agent.name} just dropped of an item at {drop_off.pos}.') self.print(f'{agent.name} just dropped of an item at {drop_off.pos}.')
reward += 0.5 reward += 0.5
else: else:
info_dict.update({f'{agent.name}_item_pickup': 1}) info_dict.update({f'{agent.name}_item_pickup': 1})
info_dict.update(item_pickup=1)
self.print(f'{agent.name} just picked up an item at {agent.pos}') self.print(f'{agent.name} just picked up an item at {agent.pos}')
reward += 0.1 reward += 0.1
else: else:
info_dict.update({f'{agent.name}_failed_item_action': 1}) if self[c.DROP_OFF].by_pos(agent.pos):
self.print(f'{agent.name} just tried to pick up an item at {agent.pos}, but failed.') info_dict.update({f'{agent.name}_failed_drop_off': 1})
reward -= 0.1 info_dict.update(failed_drop_off=1)
self.print(f'{agent.name} just tried to drop off at {agent.pos}, but failed.')
reward -= 0.1
else:
info_dict.update({f'{agent.name}_failed_item_action': 1})
info_dict.update(failed_pick_up=1)
self.print(f'{agent.name} just tried to pick up an item at {agent.pos}, but failed.')
reward -= 0.1
return reward, info_dict return reward, info_dict
def render_additional_assets(self, mode='human'): def render_additional_assets(self, mode='human'):
@ -343,7 +352,7 @@ class ItemFactory(BaseFactory):
if __name__ == '__main__': if __name__ == '__main__':
import random import random
render = False render = True
item_props = ItemProperties() item_props = ItemProperties()

View File

@ -33,7 +33,8 @@ class RecorderCallback(BaseCallback):
def read_done(self, env_idx, done): def read_done(self, env_idx, done):
if done: if done:
self._recorder_out_list.append({'steps': self._recorder_dict[env_idx]}) self._recorder_out_list.append({'steps': self._recorder_dict[env_idx],
'episode': len(self._recorder_out_list)})
self._recorder_dict[env_idx] = list() self._recorder_dict[env_idx] = list()
else: else:
pass pass

View File

@ -10,7 +10,7 @@ from environments.helpers import IGNORED_DF_COLUMNS, MODEL_MAP
from plotting.plotting import prepare_plot from plotting.plotting import prepare_plot
def compare_seed_runs(run_path: Union[str, PathLike]): def compare_seed_runs(run_path: Union[str, PathLike], use_tex: bool = False):
run_path = Path(run_path) run_path = Path(run_path)
df_list = list() df_list = list()
for run, monitor_file in enumerate(run_path.rglob('monitor*.pick')): for run, monitor_file in enumerate(run_path.rglob('monitor*.pick')):
@ -37,11 +37,12 @@ def compare_seed_runs(run_path: Union[str, PathLike]):
skip_n = round(df_melted['Episode'].max() * 0.02) skip_n = round(df_melted['Episode'].max() * 0.02)
df_melted = df_melted[df_melted['Episode'] % skip_n == 0] df_melted = df_melted[df_melted['Episode'] % skip_n == 0]
prepare_plot(run_path / f'{run_path.name}_monitor_lineplot.png', df_melted) prepare_plot(run_path / f'{run_path.name}_monitor_lineplot.png', df_melted, use_tex=use_tex)
print('Plotting done.') print('Plotting done.')
def compare_model_runs(run_path: Path, run_identifier: Union[str, int], parameter: Union[str, List[str]]): def compare_model_runs(run_path: Path, run_identifier: Union[str, int], parameter: Union[str, List[str]],
use_tex: bool = False):
run_path = Path(run_path) run_path = Path(run_path)
df_list = list() df_list = list()
parameter = [parameter] if isinstance(parameter, str) else parameter parameter = [parameter] if isinstance(parameter, str) else parameter
@ -75,12 +76,13 @@ def compare_model_runs(run_path: Path, run_identifier: Union[str, int], paramete
df_melted = df_melted[df_melted['Episode'] % skip_n == 0] df_melted = df_melted[df_melted['Episode'] % skip_n == 0]
style = 'Measurement' if len(columns) > 1 else None style = 'Measurement' if len(columns) > 1 else None
prepare_plot(run_path / f'{run_identifier}_compare_{parameter}.png', df_melted, hue='Model', style=style) prepare_plot(run_path / f'{run_identifier}_compare_{parameter}.png', df_melted, hue='Model', style=style,
use_tex=use_tex)
print('Plotting done.') print('Plotting done.')
def compare_all_parameter_runs(run_root_path: Path, parameter: Union[str, List[str]], def compare_all_parameter_runs(run_root_path: Path, parameter: Union[str, List[str]],
param_names: Union[List[str], None] = None, str_to_ignore=''): param_names: Union[List[str], None] = None, str_to_ignore='', use_tex: bool = False):
run_root_path = Path(run_root_path) run_root_path = Path(run_root_path)
df_list = list() df_list = list()
parameter = [parameter] if isinstance(parameter, str) else parameter parameter = [parameter] if isinstance(parameter, str) else parameter
@ -151,5 +153,6 @@ def compare_all_parameter_runs(run_root_path: Path, parameter: Union[str, List[s
value_name="Score") value_name="Score")
style = 'Measurement' if len(columns) > 1 else None style = 'Measurement' if len(columns) > 1 else None
prepare_plot(run_root_path / f'compare_{parameter}.png', df_melted, hue='Parameter Combination', style=style) prepare_plot(run_root_path / f'compare_{parameter}.png', df_melted, hue='Parameter Combination',
style=style, use_tex=use_tex)
print('Plotting done.') print('Plotting done.')

View File

@ -26,21 +26,35 @@ def plot(filepath, ext='png'):
plt.clf() plt.clf()
def prepare_plot(filepath, results_df, ext='png', hue='Measurement', style=None): def prepare_tex(df, hue, style, hue_order):
sns.set(rc={'text.usetex': True}, style='whitegrid')
lineplot = sns.lineplot(data=df, x='Episode', y='Score', ci=95, palette=PALETTE,
hue_order=hue_order, hue=hue, style=style)
# lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}')
return lineplot
def prepare_plt(df, hue, style, hue_order):
print('Struggling to plot Figure using LaTeX - going back to normal.')
plt.close('all')
sns.set(rc={'text.usetex': False}, style='whitegrid')
lineplot = sns.lineplot(data=df, x='Episode', y='Score', hue=hue, style=style,
ci=95, palette=PALETTE, hue_order=hue_order)
# lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}')
return lineplot
def prepare_plot(filepath, results_df, ext='png', hue='Measurement', style=None, use_tex=False):
df = results_df.copy() df = results_df.copy()
df[hue] = df[hue].str.replace('_', '-') df[hue] = df[hue].str.replace('_', '-')
hue_order = sorted(list(df[hue].unique())) hue_order = sorted(list(df[hue].unique()))
try: if use_tex:
sns.set(rc={'text.usetex': True}, style='whitegrid') try:
lineplot = sns.lineplot(data=df, x='Episode', y='Score', ci=95, palette=PALETTE, _ = prepare_tex(df, hue, style, hue_order)
hue_order=hue_order, hue=hue, style=style) plot(filepath, ext=ext) # plot raises errors not lineplot!
# lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}') except (FileNotFoundError, RuntimeError):
plot(filepath, ext=ext) # plot raises errors not lineplot! _ = prepare_plt(df, hue, style, hue_order)
except (FileNotFoundError, RuntimeError): plot(filepath, ext=ext)
print('Struggling to plot Figure using LaTeX - going back to normal.') else:
plt.close('all') _ = prepare_plt(df, hue, style, hue_order)
sns.set(rc={'text.usetex': False}, style='whitegrid')
lineplot = sns.lineplot(data=df, x='Episode', y='Score', hue=hue, style=style,
ci=95, palette=PALETTE, hue_order=hue_order)
# lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}')
plot(filepath, ext=ext) plot(filepath, ext=ext)

View File

@ -1,6 +1,7 @@
import warnings import warnings
from pathlib import Path from pathlib import Path
import numpy as np
import yaml import yaml
from environments import helpers as h from environments import helpers as h
@ -14,36 +15,42 @@ warnings.filterwarnings('ignore', category=UserWarning)
if __name__ == '__main__': if __name__ == '__main__':
model_name = 'PPO_1631187073' model_name = 'DQN_1631187073'
run_id = 0 run_id = 0
seed = 69 seed = 69
out_path = Path(__file__).parent / 'study_out' / 'e_1_1631709932' / 'no_obs' / 'dirt' / 'A2C_1631709932' / '0_A2C_1631709932' out_path = Path('debug_out/DQN_1635176929/0_DQN_1635176929')
model_path = out_path / model_name model_path = out_path
with (out_path / f'env_params.json').open('r') as f: with (out_path / f'env_params.json').open('r') as f:
env_kwargs = yaml.load(f, Loader=yaml.FullLoader) env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
env_kwargs.update(additional_agent_placeholder=None) env_kwargs.update(additional_agent_placeholder=None, n_agents=4)
# env_kwargs.update(verbose=False, env_seed=seed, record_episodes=True, parse_doors=True) if gain_amount := env_kwargs.get('dirt_properties', {}).get('gain_amount', None):
env_kwargs['dirt_properties']['max_spawn_amount'] = gain_amount
del env_kwargs['dirt_properties']['gain_amount']
env_kwargs.update(record_episodes=True)
this_model = out_path / 'model.zip' this_model = out_path / 'model.zip'
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name) model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name)
model = model_cls.load(this_model) models = [model_cls.load(this_model) for _ in range(4)]
with RecorderCallback(filepath=Path() / 'recorder_out_doors.json') as recorder: with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json') as recorder:
# Init Env # Init Env
with DirtFactory(**env_kwargs) as env: with DirtItemFactory(**env_kwargs) as env:
obs_shape = env.observation_space.shape obs_shape = env.observation_space.shape
# Evaluation Loop for i in range(n Episodes) # Evaluation Loop for i in range(n Episodes)
for episode in range(5): for episode in range(5):
obs = env.reset() env_state = env.reset()
rew, done_bool = 0, False rew, done_bool = 0, False
while not done_bool: while not done_bool:
action = model.predict(obs, deterministic=False)[0] actions = [model.predict(
env_state, step_r, done_bool, info_obj = env.step(action[0]) np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
deterministic=True)[0] for j, model in enumerate(models)]
env_state, step_r, done_bool, info_obj = env.step(actions)
recorder.read_info(0, info_obj) recorder.read_info(0, info_obj)
rew += step_r rew += step_r
env.render() # env.render()
if done_bool: if done_bool:
recorder.read_done(0, done_bool) recorder.read_done(0, done_bool)
break break

View File

@ -33,7 +33,7 @@ import pandas as pd
import seaborn as sns import seaborn as sns
# Define a global studi save path # Define a global studi save path
start_time = 1634134997 # int(time.time()) start_time = 163519000 # int(time.time())
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
""" """
@ -70,7 +70,7 @@ There are further distinctions to be made:
def policy_model_kwargs(): def policy_model_kwargs():
return dict(ent_coef=0.01) return dict(ent_coef=0.05)
def dqn_model_kwargs(): def dqn_model_kwargs():
@ -93,21 +93,23 @@ def encapsule_env_factory(env_fctry, env_kwrgs):
if __name__ == '__main__': if __name__ == '__main__':
train_steps = 5e5 train_steps = 8e5
# Define Global Env Parameters # Define Global Env Parameters
# Define properties object parameters # Define properties object parameters
move_props = MovementProperties(allow_diagonal_movement=True, move_props = MovementProperties(allow_diagonal_movement=True,
allow_square_movement=True, allow_square_movement=True,
allow_no_op=False) allow_no_op=False)
dirt_props = DirtProperties(clean_amount=2, gain_amount=0.1, max_global_amount=20, dirt_props = DirtProperties(initial_dirt_ratio=0.35, initial_dirt_spawn_r_var=0.1,
max_local_amount=1, spawn_frequency=15, max_spawn_ratio=0.05, clean_amount=0.34,
max_spawn_amount=0.1, max_global_amount=20,
max_local_amount=1, spawn_frequency=0, max_spawn_ratio=0.05,
dirt_smear_amount=0.0, agent_can_interact=True) dirt_smear_amount=0.0, agent_can_interact=True)
item_props = ItemProperties(n_items=10, agent_can_interact=True, item_props = ItemProperties(n_items=10, agent_can_interact=True,
spawn_frequency=30, n_drop_off_locations=2, spawn_frequency=30, n_drop_off_locations=2,
max_agent_inventory_capacity=15) max_agent_inventory_capacity=15)
factory_kwargs = dict(n_agents=1, factory_kwargs = dict(n_agents=1,
pomdp_r=2, max_steps=400, parse_doors=False, pomdp_r=2, max_steps=400, parse_doors=True,
level_name='rooms', frames_to_stack=3, level_name='rooms', frames_to_stack=3,
omit_agent_in_obs=True, combin_agent_obs=True, record_episodes=False, omit_agent_in_obs=True, combin_agent_obs=True, record_episodes=False,
cast_shadows=True, doors_have_area=False, verbose=False, cast_shadows=True, doors_have_area=False, verbose=False,
@ -124,9 +126,9 @@ if __name__ == '__main__':
# Define parameter versions according with #1,2[1,0,N],3 # Define parameter versions according with #1,2[1,0,N],3
observation_modes = { observation_modes = {
# Fill-value = 0 # Fill-value = 0
'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)), # DEACTIVATED 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)),
# Fill-value = 1 # Fill-value = 1
'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)), # DEACTIVATED 'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)),
# Fill-value = N(0, 1) # Fill-value = N(0, 1)
'seperate_N': dict(additional_env_kwargs=dict(additional_agent_placeholder='N')), 'seperate_N': dict(additional_env_kwargs=dict(additional_agent_placeholder='N')),
# Further Adjustments are done post-training # Further Adjustments are done post-training
@ -137,10 +139,10 @@ if __name__ == '__main__':
# Train starts here ############################################################ # Train starts here ############################################################
# Build Major Loop parameters, parameter versions, Env Classes and models # Build Major Loop parameters, parameter versions, Env Classes and models
if False: if True:
for observation_mode in observation_modes.keys(): for observation_mode in observation_modes.keys():
for env_name in env_names: for env_name in env_names:
for model_cls in h.MODEL_MAP.values(): for model_cls in [h.MODEL_MAP['A2C'], h.MODEL_MAP['DQN']]:
# Create an identifier, which is unique for every combination and easy to read in filesystem # Create an identifier, which is unique for every combination and easy to read in filesystem
identifier = f'{model_cls.__name__}_{start_time}' identifier = f'{model_cls.__name__}_{start_time}'
# Train each combination per seed # Train each combination per seed
@ -154,6 +156,8 @@ if __name__ == '__main__':
env_kwargs.update(env_seed=seed) env_kwargs.update(env_seed=seed)
# Output folder # Output folder
seed_path = combination_path / f'{str(seed)}_{identifier}' seed_path = combination_path / f'{str(seed)}_{identifier}'
if (seed_path / 'monitor.pick').exists():
continue
seed_path.mkdir(parents=True, exist_ok=True) seed_path.mkdir(parents=True, exist_ok=True)
# Monitor Init # Monitor Init
@ -163,7 +167,7 @@ if __name__ == '__main__':
if model_cls.__name__ in ["PPO", "A2C"]: if model_cls.__name__ in ["PPO", "A2C"]:
# env_factory = env_class(**env_kwargs) # env_factory = env_class(**env_kwargs)
env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs) env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs)
for _ in range(1)], start_method="spawn") for _ in range(6)], start_method="spawn")
model_kwargs = policy_model_kwargs() model_kwargs = policy_model_kwargs()
elif model_cls.__name__ in ["RegDQN", "DQN", "QRDQN"]: elif model_cls.__name__ in ["RegDQN", "DQN", "QRDQN"]:
@ -197,15 +201,20 @@ if __name__ == '__main__':
gc.collect() gc.collect()
# Compare performance runs, for each seed within a model # Compare performance runs, for each seed within a model
compare_seed_runs(combination_path) compare_seed_runs(combination_path, use_tex=False)
# Better be save then sorry: Clean up! # Better be save then sorry: Clean up!
del model_kwargs, env_kwargs try:
import gc del env_kwargs
gc.collect() del model_kwargs
import gc
gc.collect()
except NameError:
pass
# Compare performance runs, for each model # Compare performance runs, for each model
# FIXME: Check THIS!!!! # FIXME: Check THIS!!!!
compare_model_runs(study_root_path / observation_mode / env_name, f'{start_time}', 'step_reward') compare_model_runs(study_root_path / observation_mode / env_name, f'{start_time}', 'step_reward',
use_tex=False)
pass pass
pass pass
pass pass
@ -215,7 +224,7 @@ if __name__ == '__main__':
# Evaluation starts here ##################################################### # Evaluation starts here #####################################################
# First Iterate over every model and monitor "as trained" # First Iterate over every model and monitor "as trained"
baseline_monitor_file = 'e_1_baseline_monitor.pick' baseline_monitor_file = 'e_1_baseline_monitor.pick'
if False: if True:
render = False render = False
for observation_mode in observation_modes: for observation_mode in observation_modes:
obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode) obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode)
@ -312,8 +321,9 @@ if __name__ == '__main__':
# Plotting # Plotting
if True: if True:
# TODO: Plotting # TODO: Plotting
df_list = list()
for observation_folder in (x for x in study_root_path.iterdir() if x.is_dir()): for observation_folder in (x for x in study_root_path.iterdir() if x.is_dir()):
df_list = list()
for env_folder in (x for x in observation_folder.iterdir() if x.is_dir()): for env_folder in (x for x in observation_folder.iterdir() if x.is_dir()):
for model_folder in (x for x in env_folder.iterdir() if x.is_dir()): for model_folder in (x for x in env_folder.iterdir() if x.is_dir()):
# Gather per seed results in this list # Gather per seed results in this list
@ -334,28 +344,48 @@ if __name__ == '__main__':
monitor_df['obs_mode'] = monitor_df['obs_mode'].astype(str) monitor_df['obs_mode'] = monitor_df['obs_mode'].astype(str)
monitor_df['model'] = model_folder.name.split('_')[0] monitor_df['model'] = model_folder.name.split('_')[0]
df_list.append(monitor_df) df_list.append(monitor_df)
id_cols = ['monitor', 'env', 'obs_mode', 'model'] id_cols = ['monitor', 'env', 'obs_mode', 'model']
df = pd.concat(df_list, ignore_index=True) df = pd.concat(df_list, ignore_index=True)
df = df.fillna(0) df = df.fillna(0)
for id_col in id_cols: for id_col in id_cols:
df[id_col] = df[id_col].astype(str) df[id_col] = df[id_col].astype(str)
df_grouped = df.groupby(id_cols + ['seed'] if True:
).agg({key: 'sum' if "Agent" in key else 'mean' for key in df.columns # df['fail_sum'] = df.loc[:, df.columns.str.contains("failed")].sum(1)
if key not in (id_cols + ['seed'])}) df['pick_up'] = df.loc[:, df.columns.str.contains("]_item_pickup")].sum(1)
df_melted = df_grouped.reset_index().melt(id_vars=id_cols, df['drop_off'] = df.loc[:, df.columns.str.contains("]_item_dropoff")].sum(1)
value_vars='step_reward', var_name="Measurement", df['failed_item_action'] = df.loc[:, df.columns.str.contains("]_failed_item_action")].sum(1)
value_name="Score") df['failed_cleanup'] = df.loc[:, df.columns.str.contains("]_failed_dirt_cleanup")].sum(1)
df['coll_lvl'] = df.loc[:, df.columns.str.contains("]_vs_LEVEL")].sum(1)
df['coll_agent'] = df.loc[:, df.columns.str.contains("]_vs_Agent")].sum(1) / 2
# df['collisions'] = df['coll_lvl'] + df['coll_agent']
c = sns.catplot(data=df_melted, x='obs_mode', hue='monitor', row='model', col='env', y='Score', sharey=False, value_vars = ['pick_up', 'drop_off', 'failed_item_action', 'failed_cleanup',
kind="box", height=4, aspect=.7, legend_out=True) 'coll_lvl', 'coll_agent', 'dirt_cleaned']
c.set_xticklabels(rotation=65, horizontalalignment='right')
plt.tight_layout(pad=2)
plt.savefig(study_root_path / f'results_{n_agents}_agents.png')
pass df_grouped = df.groupby(id_cols + ['seed']
).agg({key: 'sum' if "Agent" in key else 'mean' for key in df.columns
if key not in (id_cols + ['seed'])})
df_melted = df_grouped.reset_index().melt(id_vars=id_cols,
value_vars=value_vars, # 'step_reward',
var_name="Measurement",
value_name="Score")
# df_melted["Measurements"] = df_melted["Measurement"] + " " + df_melted["monitor"]
# Plotting
fig, ax = plt.subplots(figsize=(11.7, 8.27))
c = sns.catplot(data=df_melted[df_melted['obs_mode'] == observation_folder.name],
x='Measurement', hue='monitor', row='model', col='env', y='Score',
sharey=False, kind="box", height=4, aspect=.7, legend_out=True,
showfliers=False)
c.set_xticklabels(rotation=65, horizontalalignment='right')
c.fig.subplots_adjust(top=0.9) # adjust the Figure in rp
c.fig.suptitle(f"Cat plot for {observation_folder.name}")
plt.tight_layout(pad=2)
plt.savefig(study_root_path / f'results_{n_agents}_agents_{observation_folder.name}.png')
pass