diff --git a/environments/policy_adaption/__init__.py b/algorithms/policy_adaption/__init__.py similarity index 100% rename from environments/policy_adaption/__init__.py rename to algorithms/policy_adaption/__init__.py diff --git a/environments/policy_adaption/natural_rl_environment/__init__.py b/algorithms/policy_adaption/natural_rl_environment/__init__.py similarity index 100% rename from environments/policy_adaption/natural_rl_environment/__init__.py rename to algorithms/policy_adaption/natural_rl_environment/__init__.py diff --git a/environments/policy_adaption/natural_rl_environment/imgsource.py b/algorithms/policy_adaption/natural_rl_environment/imgsource.py similarity index 100% rename from environments/policy_adaption/natural_rl_environment/imgsource.py rename to algorithms/policy_adaption/natural_rl_environment/imgsource.py diff --git a/environments/policy_adaption/natural_rl_environment/matting.py b/algorithms/policy_adaption/natural_rl_environment/matting.py similarity index 100% rename from environments/policy_adaption/natural_rl_environment/matting.py rename to algorithms/policy_adaption/natural_rl_environment/matting.py diff --git a/environments/policy_adaption/natural_rl_environment/natural_env.py b/algorithms/policy_adaption/natural_rl_environment/natural_env.py old mode 100755 new mode 100644 similarity index 100% rename from environments/policy_adaption/natural_rl_environment/natural_env.py rename to algorithms/policy_adaption/natural_rl_environment/natural_env.py diff --git a/environments/policy_adaption/natural_rl_environment/videos/stars.mp4 b/algorithms/policy_adaption/natural_rl_environment/videos/stars.mp4 similarity index 100% rename from environments/policy_adaption/natural_rl_environment/videos/stars.mp4 rename to algorithms/policy_adaption/natural_rl_environment/videos/stars.mp4 diff --git a/environments/policy_adaption/natural_rl_environment/videos/waves.mp4 b/algorithms/policy_adaption/natural_rl_environment/videos/waves.mp4 similarity index 100% rename from environments/policy_adaption/natural_rl_environment/videos/waves.mp4 rename to algorithms/policy_adaption/natural_rl_environment/videos/waves.mp4 diff --git a/environments/policy_adaption/test.py b/algorithms/policy_adaption/test.py similarity index 100% rename from environments/policy_adaption/test.py rename to algorithms/policy_adaption/test.py diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py index a9b3c1d..907ca61 100644 --- a/environments/factory/base/base_factory.py +++ b/environments/factory/base/base_factory.py @@ -61,7 +61,8 @@ class BaseFactory(gym.Env): mv_prop: MovementProperties = MovementProperties(), obs_prop: ObservationProperties = ObservationProperties(), parse_doors=False, record_episodes=False, done_at_collision=False, - verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs): + verbose=False, doors_have_area=True, env_seed=time.time_ns(), individual_rewards=False, + **kwargs): if isinstance(mv_prop, dict): mv_prop = MovementProperties(**mv_prop) @@ -94,6 +95,7 @@ class BaseFactory(gym.Env): self.record_episodes = record_episodes self.parse_doors = parse_doors self.doors_have_area = doors_have_area + self.individual_rewards = individual_rewards # Reset self.reset() @@ -487,31 +489,32 @@ class BaseFactory(gym.Env): def calculate_reward(self) -> (int, dict): # Returns: Reward, Info per_agent_info_dict = defaultdict(dict) - reward = 0 + reward = {} for agent in self[c.AGENT]: + per_agent_reward = 0 if self._actions.is_moving_action(agent.temp_action): if agent.temp_valid: # info_dict.update(movement=1) - reward -= 0.01 + per_agent_reward -= 0.01 pass else: - reward -= 0.05 + per_agent_reward -= 0.05 self.print(f'{agent.name} just hit the wall at {agent.pos}.') per_agent_info_dict[agent.name].update({f'{agent.name}_vs_LEVEL': 1}) elif h.EnvActions.USE_DOOR == agent.temp_action: if agent.temp_valid: - # reward += 0.00 + # per_agent_reward += 0.00 self.print(f'{agent.name} did just use the door at {agent.pos}.') per_agent_info_dict[agent.name].update(door_used=1) else: - # reward -= 0.00 + # per_agent_reward -= 0.00 self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.') per_agent_info_dict[agent.name].update({f'{agent.name}_failed_door_open': 1}) elif h.EnvActions.NOOP == agent.temp_action: per_agent_info_dict[agent.name].update(no_op=1) - # reward -= 0.00 + # per_agent_reward -= 0.00 # Monitor Notes if agent.temp_valid: @@ -522,7 +525,7 @@ class BaseFactory(gym.Env): per_agent_info_dict[agent.name].update({f'{agent.name}_failed_action': 1}) additional_reward, additional_info_dict = self.calculate_additional_reward(agent) - reward += additional_reward + per_agent_reward += additional_reward per_agent_info_dict[agent.name].update(additional_info_dict) if agent.temp_collisions: @@ -531,6 +534,7 @@ class BaseFactory(gym.Env): for other_agent in agent.temp_collisions: per_agent_info_dict[agent.name].update({f'{agent.name}_vs_{other_agent.name}': 1}) + reward[agent.name] = per_agent_reward # Combine the per_agent_info_dict: combined_info_dict = defaultdict(lambda: 0) @@ -539,7 +543,13 @@ class BaseFactory(gym.Env): combined_info_dict[key] += value combined_info_dict = dict(combined_info_dict) - self.print(f"reward is {reward}") + if self.individual_rewards: + self.print(f"rewards are {reward}") + reward = list(reward.values()) + return reward, combined_info_dict + else: + reward = sum(reward.values()) + self.print(f"reward is {reward}") return reward, combined_info_dict def render(self, mode='human'): diff --git a/reload_agent.py b/reload_agent.py index be2387f..b43f781 100644 --- a/reload_agent.py +++ b/reload_agent.py @@ -18,14 +18,15 @@ if __name__ == '__main__': model_name = 'A2C_ItsDirt' run_id = 0 + determin = True seed = 67 n_agents = 1 - out_path = Path('study_out/e_1_ItsDirt/no_obs/dirt/A2C_ItsDirt/0_A2C_ItsDirt') + out_path = Path('study_out/e_1_Now_with_doors/no_obs/dirt/A2C_Now_with_doors/0_A2C_Now_with_doors') model_path = out_path with (out_path / f'env_params.json').open('r') as f: env_kwargs = yaml.load(f, Loader=yaml.FullLoader) - env_kwargs.update(additional_agent_placeholder=None, n_agents=n_agents) + env_kwargs.update(additional_agent_placeholder=None, n_agents=n_agents, max_steps=150) if gain_amount := env_kwargs.get('dirt_prop', {}).get('gain_amount', None): env_kwargs['dirt_prop']['max_spawn_amount'] = gain_amount del env_kwargs['dirt_prop']['gain_amount'] @@ -49,9 +50,9 @@ if __name__ == '__main__': if n_agents > 1: actions = [model.predict( np.stack([env_state[i][j] for i in range(env_state.shape[0])]), - deterministic=True)[0] for j, model in enumerate(models)] + deterministic=determin)[0] for j, model in enumerate(models)] else: - actions = models[0].predict(env_state, deterministic=True)[0] + actions = models[0].predict(env_state, deterministic=determin)[0] if any([agent.pos in [door.pos for door in env.unwrapped[c.DOORS]] for agent in env.unwrapped[c.AGENT]]): print('On Door') diff --git a/studies/e_1.py b/studies/e_1.py index 07b8321..bf5e1eb 100644 --- a/studies/e_1.py +++ b/studies/e_1.py @@ -2,6 +2,7 @@ import sys from pathlib import Path from matplotlib import pyplot as plt import numpy as np +import itertools as it try: # noinspection PyUnboundLocalVariable @@ -70,7 +71,7 @@ baseline_monitor_file = 'e_1_baseline_monitor.pick' def policy_model_kwargs(): - return dict(ent_coef=0.05) + return dict() def dqn_model_kwargs(): @@ -100,6 +101,7 @@ def load_model_run_baseline(seed_path, env_to_run): # Load old env kwargs with next(seed_path.glob('*.json')).open('r') as f: env_kwargs = simplejson.load(f) + env_kwargs.update(done_at_collision=True) # Monitor Init with MonitorCallback(filepath=seed_path / baseline_monitor_file) as monitor: # Init Env @@ -134,6 +136,7 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): env_kwargs = simplejson.load(f) env_kwargs.update( n_agents=n_agents, + done_at_collision=True, **additional_kwargs_dict.get('post_training_kwargs', {})) # Monitor Init with MonitorCallback(filepath=seed_path / ood_monitor_file) as monitor: @@ -168,6 +171,31 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): gc.collect() +def start_mp_study_run(envs_map, policies_path): + paths = list(y for y in policies_path.iterdir() if y.is_dir() and not (y / ood_monitor_file).exists()) + if paths: + import multiprocessing as mp + pool = mp.Pool(mp.cpu_count()) + print("Starting MP with: ", pool._processes, " Processes") + _ = pool.starmap(load_model_run_study, + it.product(paths, + (envs_map[policies_path.parent.name][0],), + (observation_modes[policies_path.parent.parent.name],)) + ) + + +def start_mp_baseline_run(envs_map, policies_path): + paths = list(y for y in policies_path.iterdir() if y.is_dir() and not (y / baseline_monitor_file).exists()) + if paths: + import multiprocessing as mp + pool = mp.Pool(mp.cpu_count()) + print("Starting MP with: ", pool._processes, " Processes") + _ = pool.starmap(load_model_run_baseline, + it.product(paths, + (envs_map[policies_path.parent.name][0],)) + ) + + if __name__ == '__main__': train_steps = 5e6 n_seeds = 3 @@ -215,75 +243,74 @@ if __name__ == '__main__': # Define parameter versions according with #1,2[1,0,N],3 observation_modes = {} - if False: - observation_modes.update({ - 'seperate_1': dict( - post_training_kwargs= - dict(obs_prop=ObservationProperties( - render_agents=AgentRenderOptions.COMBINED, - additional_agent_placeholder=None, - omit_agent_self=True, - frames_to_stack=3, - pomdp_r=2) - ), - additional_env_kwargs= - dict(obs_prop=ObservationProperties( - render_agents=AgentRenderOptions.NOT, - additional_agent_placeholder=1, - omit_agent_self=True, - frames_to_stack=3, - pomdp_r=2) - ) - )}) - observation_modes.update({ - 'seperate_0': dict( - post_training_kwargs= - dict(obs_prop=ObservationProperties( - render_agents=AgentRenderOptions.COMBINED, - additional_agent_placeholder=None, - omit_agent_self=True, - frames_to_stack=3, - pomdp_r=2) - ), - additional_env_kwargs= - dict(obs_prop=ObservationProperties( - render_agents=AgentRenderOptions.NOT, - additional_agent_placeholder=0, - omit_agent_self=True, - frames_to_stack=3, - pomdp_r=2) - ) - )}) - observation_modes.update({ - 'seperate_N': dict( - post_training_kwargs= - dict(obs_prop=ObservationProperties( - render_agents=AgentRenderOptions.COMBINED, - additional_agent_placeholder=None, - omit_agent_self=True, - frames_to_stack=3, - pomdp_r=2) - ), - additional_env_kwargs= - dict(obs_prop=ObservationProperties( - render_agents=AgentRenderOptions.NOT, - additional_agent_placeholder='N', - omit_agent_self=True, - frames_to_stack=3, - pomdp_r=2) - ) - )}) - observation_modes.update({ - 'in_lvl_obs': dict( - post_training_kwargs= - dict(obs_prop=ObservationProperties( - render_agents=AgentRenderOptions.LEVEL, - omit_agent_self=True, - additional_agent_placeholder=None, - frames_to_stack=3, - pomdp_r=2) - ) - )}) + observation_modes.update({ + 'seperate_1': dict( + post_training_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.COMBINED, + additional_agent_placeholder=None, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ), + additional_env_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.NOT, + additional_agent_placeholder=1, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ) + )}) + observation_modes.update({ + 'seperate_0': dict( + post_training_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.COMBINED, + additional_agent_placeholder=None, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ), + additional_env_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.NOT, + additional_agent_placeholder=0, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ) + )}) + observation_modes.update({ + 'seperate_N': dict( + post_training_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.COMBINED, + additional_agent_placeholder=None, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ), + additional_env_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.NOT, + additional_agent_placeholder='N', + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ) + )}) + observation_modes.update({ + 'in_lvl_obs': dict( + post_training_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.LEVEL, + omit_agent_self=True, + additional_agent_placeholder=None, + frames_to_stack=3, + pomdp_r=2) + ) + )}) observation_modes.update({ # No further adjustment needed 'no_obs': dict( @@ -398,15 +425,7 @@ if __name__ == '__main__': for env_path in [x for x in obs_mode_path.iterdir() if x.is_dir()]: for policy_path in [x for x in env_path.iterdir() if x. is_dir()]: # Iteration - paths = list(y for y in policy_path.iterdir() if y.is_dir() \ - and not (y / baseline_monitor_file).exists()) - import multiprocessing as mp - import itertools as it - pool = mp.Pool(mp.cpu_count()) - result = pool.starmap(load_model_run_baseline, - it.product(paths, - (env_map[env_path.name][0],)) - ) + start_mp_baseline_run(env_map, policy_path) # for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): # load_model_run_baseline(seed_path) @@ -424,18 +443,9 @@ if __name__ == '__main__': # First seed path version # seed_path = next((y for y in policy_path.iterdir() if y.is_dir())) # Iteration - import multiprocessing as mp - import itertools as it - pool = mp.Pool(mp.cpu_count()) - paths = list(y for y in policy_path.iterdir() if y.is_dir() \ - and not (y / ood_monitor_file).exists()) - # result = pool.starmap(load_model_run_study, - # it.product(paths, - # (env_map[env_path.name][0],), - # (observation_modes[obs_mode],)) - # ) - for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): - load_model_run_study(seed_path, env_map[env_path.name][0], observation_modes[obs_mode]) + start_mp_study_run(env_map, policy_path) + #for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): + # load_model_run_study(seed_path, env_map[env_path.name][0], observation_modes[obs_mode]) print('OOD Tracking Done') # Plotting @@ -497,15 +507,16 @@ if __name__ == '__main__': # df_melted["Measurements"] = df_melted["Measurement"] + " " + df_melted["monitor"] # Plotting - fig, ax = plt.subplots(figsize=(11.7, 8.27)) + # fig, ax = plt.subplots(figsize=(11.7, 8.27)) c = sns.catplot(data=df_melted[df_melted['obs_mode'] == observation_folder.name], x='Measurement', hue='monitor', row='model', col='env', y='Score', - sharey=False, kind="box", height=4, aspect=.7, legend_out=True, + sharey=False, kind="box", height=4, aspect=.7, legend_out=False, legend=False, showfliers=False) c.set_xticklabels(rotation=65, horizontalalignment='right') - c.fig.subplots_adjust(top=0.9) # adjust the Figure in rp + # c.fig.subplots_adjust(top=0.9) # adjust the Figure in rp c.fig.suptitle(f"Cat plot for {observation_folder.name}") - plt.tight_layout(pad=2) + # plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) + plt.tight_layout() plt.savefig(study_root_path / f'results_{n_agents}_agents_{observation_folder.name}.png') pass