mirror of
				https://github.com/illiumst/marl-factory-grid.git
				synced 2025-10-31 04:37:25 +01:00 
			
		
		
		
	Individual Rewards
This commit is contained in:
		| @@ -61,7 +61,8 @@ class BaseFactory(gym.Env): | ||||
|                  mv_prop: MovementProperties = MovementProperties(), | ||||
|                  obs_prop: ObservationProperties = ObservationProperties(), | ||||
|                  parse_doors=False, record_episodes=False, done_at_collision=False, | ||||
|                  verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs): | ||||
|                  verbose=False, doors_have_area=True, env_seed=time.time_ns(), individual_rewards=False, | ||||
|                  **kwargs): | ||||
|  | ||||
|         if isinstance(mv_prop, dict): | ||||
|             mv_prop = MovementProperties(**mv_prop) | ||||
| @@ -94,6 +95,7 @@ class BaseFactory(gym.Env): | ||||
|         self.record_episodes = record_episodes | ||||
|         self.parse_doors = parse_doors | ||||
|         self.doors_have_area = doors_have_area | ||||
|         self.individual_rewards = individual_rewards | ||||
|  | ||||
|         # Reset | ||||
|         self.reset() | ||||
| @@ -487,31 +489,32 @@ class BaseFactory(gym.Env): | ||||
|     def calculate_reward(self) -> (int, dict): | ||||
|         # Returns: Reward, Info | ||||
|         per_agent_info_dict = defaultdict(dict) | ||||
|         reward = 0 | ||||
|         reward = {} | ||||
|  | ||||
|         for agent in self[c.AGENT]: | ||||
|             per_agent_reward = 0 | ||||
|             if self._actions.is_moving_action(agent.temp_action): | ||||
|                 if agent.temp_valid: | ||||
|                     # info_dict.update(movement=1) | ||||
|                     reward -= 0.01 | ||||
|                     per_agent_reward -= 0.01 | ||||
|                     pass | ||||
|                 else: | ||||
|                     reward -= 0.05 | ||||
|                     per_agent_reward -= 0.05 | ||||
|                     self.print(f'{agent.name} just hit the wall at {agent.pos}.') | ||||
|                     per_agent_info_dict[agent.name].update({f'{agent.name}_vs_LEVEL': 1}) | ||||
|  | ||||
|             elif h.EnvActions.USE_DOOR == agent.temp_action: | ||||
|                 if agent.temp_valid: | ||||
|                     # reward += 0.00 | ||||
|                     # per_agent_reward += 0.00 | ||||
|                     self.print(f'{agent.name} did just use the door at {agent.pos}.') | ||||
|                     per_agent_info_dict[agent.name].update(door_used=1) | ||||
|                 else: | ||||
|                     # reward -= 0.00 | ||||
|                     # per_agent_reward -= 0.00 | ||||
|                     self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.') | ||||
|                     per_agent_info_dict[agent.name].update({f'{agent.name}_failed_door_open': 1}) | ||||
|             elif h.EnvActions.NOOP == agent.temp_action: | ||||
|                 per_agent_info_dict[agent.name].update(no_op=1) | ||||
|                 # reward -= 0.00 | ||||
|                 # per_agent_reward -= 0.00 | ||||
|  | ||||
|             # Monitor Notes | ||||
|             if agent.temp_valid: | ||||
| @@ -522,7 +525,7 @@ class BaseFactory(gym.Env): | ||||
|                 per_agent_info_dict[agent.name].update({f'{agent.name}_failed_action': 1}) | ||||
|  | ||||
|             additional_reward, additional_info_dict = self.calculate_additional_reward(agent) | ||||
|             reward += additional_reward | ||||
|             per_agent_reward += additional_reward | ||||
|             per_agent_info_dict[agent.name].update(additional_info_dict) | ||||
|  | ||||
|             if agent.temp_collisions: | ||||
| @@ -531,6 +534,7 @@ class BaseFactory(gym.Env): | ||||
|  | ||||
|                 for other_agent in agent.temp_collisions: | ||||
|                     per_agent_info_dict[agent.name].update({f'{agent.name}_vs_{other_agent.name}': 1}) | ||||
|             reward[agent.name] = per_agent_reward | ||||
|  | ||||
|         # Combine the per_agent_info_dict: | ||||
|         combined_info_dict = defaultdict(lambda: 0) | ||||
| @@ -539,7 +543,13 @@ class BaseFactory(gym.Env): | ||||
|                 combined_info_dict[key] += value | ||||
|         combined_info_dict = dict(combined_info_dict) | ||||
|  | ||||
|         self.print(f"reward is {reward}") | ||||
|         if self.individual_rewards: | ||||
|             self.print(f"rewards are {reward}") | ||||
|             reward = list(reward.values()) | ||||
|             return reward, combined_info_dict | ||||
|         else: | ||||
|             reward = sum(reward.values()) | ||||
|             self.print(f"reward is {reward}") | ||||
|         return reward, combined_info_dict | ||||
|  | ||||
|     def render(self, mode='human'): | ||||
|   | ||||
| @@ -18,14 +18,15 @@ if __name__ == '__main__': | ||||
|  | ||||
|     model_name = 'A2C_ItsDirt' | ||||
|     run_id = 0 | ||||
|     determin = True | ||||
|     seed = 67 | ||||
|     n_agents = 1 | ||||
|     out_path = Path('study_out/e_1_ItsDirt/no_obs/dirt/A2C_ItsDirt/0_A2C_ItsDirt') | ||||
|     out_path = Path('study_out/e_1_Now_with_doors/no_obs/dirt/A2C_Now_with_doors/0_A2C_Now_with_doors') | ||||
|     model_path = out_path | ||||
|  | ||||
|     with (out_path / f'env_params.json').open('r') as f: | ||||
|         env_kwargs = yaml.load(f, Loader=yaml.FullLoader) | ||||
|         env_kwargs.update(additional_agent_placeholder=None, n_agents=n_agents) | ||||
|         env_kwargs.update(additional_agent_placeholder=None, n_agents=n_agents, max_steps=150) | ||||
|         if gain_amount := env_kwargs.get('dirt_prop', {}).get('gain_amount', None): | ||||
|             env_kwargs['dirt_prop']['max_spawn_amount'] = gain_amount | ||||
|             del env_kwargs['dirt_prop']['gain_amount'] | ||||
| @@ -49,9 +50,9 @@ if __name__ == '__main__': | ||||
|                     if n_agents > 1: | ||||
|                         actions = [model.predict( | ||||
|                             np.stack([env_state[i][j] for i in range(env_state.shape[0])]), | ||||
|                             deterministic=True)[0] for j, model in enumerate(models)] | ||||
|                             deterministic=determin)[0] for j, model in enumerate(models)] | ||||
|                     else: | ||||
|                         actions = models[0].predict(env_state, deterministic=True)[0] | ||||
|                         actions = models[0].predict(env_state, deterministic=determin)[0] | ||||
|                     if any([agent.pos in [door.pos for door in env.unwrapped[c.DOORS]] | ||||
|                             for agent in env.unwrapped[c.AGENT]]): | ||||
|                         print('On Door') | ||||
|   | ||||
							
								
								
									
										201
									
								
								studies/e_1.py
									
									
									
									
									
								
							
							
						
						
									
										201
									
								
								studies/e_1.py
									
									
									
									
									
								
							| @@ -2,6 +2,7 @@ import sys | ||||
| from pathlib import Path | ||||
| from matplotlib import pyplot as plt | ||||
| import numpy as np | ||||
| import itertools as it | ||||
|  | ||||
| try: | ||||
|     # noinspection PyUnboundLocalVariable | ||||
| @@ -70,7 +71,7 @@ baseline_monitor_file = 'e_1_baseline_monitor.pick' | ||||
|  | ||||
|  | ||||
| def policy_model_kwargs(): | ||||
|     return dict(ent_coef=0.05) | ||||
|     return dict() | ||||
|  | ||||
|  | ||||
| def dqn_model_kwargs(): | ||||
| @@ -100,6 +101,7 @@ def load_model_run_baseline(seed_path, env_to_run): | ||||
|     # Load old env kwargs | ||||
|     with next(seed_path.glob('*.json')).open('r') as f: | ||||
|         env_kwargs = simplejson.load(f) | ||||
|         env_kwargs.update(done_at_collision=True) | ||||
|     # Monitor Init | ||||
|     with MonitorCallback(filepath=seed_path / baseline_monitor_file) as monitor: | ||||
|         # Init Env | ||||
| @@ -134,6 +136,7 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): | ||||
|         env_kwargs = simplejson.load(f) | ||||
|         env_kwargs.update( | ||||
|             n_agents=n_agents, | ||||
|             done_at_collision=True, | ||||
|             **additional_kwargs_dict.get('post_training_kwargs', {})) | ||||
|     # Monitor Init | ||||
|     with MonitorCallback(filepath=seed_path / ood_monitor_file) as monitor: | ||||
| @@ -168,6 +171,31 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): | ||||
|     gc.collect() | ||||
|  | ||||
|  | ||||
| def start_mp_study_run(envs_map, policies_path): | ||||
|     paths = list(y for y in policies_path.iterdir() if y.is_dir() and not (y / ood_monitor_file).exists()) | ||||
|     if paths: | ||||
|         import multiprocessing as mp | ||||
|         pool = mp.Pool(mp.cpu_count()) | ||||
|         print("Starting MP with: ", pool._processes, " Processes") | ||||
|         _ = pool.starmap(load_model_run_study, | ||||
|                          it.product(paths, | ||||
|                                     (envs_map[policies_path.parent.name][0],), | ||||
|                                     (observation_modes[policies_path.parent.parent.name],)) | ||||
|                          ) | ||||
|  | ||||
|  | ||||
| def start_mp_baseline_run(envs_map, policies_path): | ||||
|     paths = list(y for y in policies_path.iterdir() if y.is_dir() and not (y / baseline_monitor_file).exists()) | ||||
|     if paths: | ||||
|         import multiprocessing as mp | ||||
|         pool = mp.Pool(mp.cpu_count()) | ||||
|         print("Starting MP with: ", pool._processes, " Processes") | ||||
|         _ = pool.starmap(load_model_run_baseline, | ||||
|                          it.product(paths, | ||||
|                                     (envs_map[policies_path.parent.name][0],)) | ||||
|                          ) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     train_steps = 5e6 | ||||
|     n_seeds = 3 | ||||
| @@ -215,75 +243,74 @@ if __name__ == '__main__': | ||||
|  | ||||
|     # Define parameter versions according with #1,2[1,0,N],3 | ||||
|     observation_modes = {} | ||||
|     if False: | ||||
|         observation_modes.update({ | ||||
|             'seperate_1': dict( | ||||
|                 post_training_kwargs= | ||||
|                 dict(obs_prop=ObservationProperties( | ||||
|                     render_agents=AgentRenderOptions.COMBINED, | ||||
|                     additional_agent_placeholder=None, | ||||
|                     omit_agent_self=True, | ||||
|                     frames_to_stack=3, | ||||
|                     pomdp_r=2) | ||||
|                 ), | ||||
|                 additional_env_kwargs= | ||||
|                 dict(obs_prop=ObservationProperties( | ||||
|                     render_agents=AgentRenderOptions.NOT, | ||||
|                     additional_agent_placeholder=1, | ||||
|                     omit_agent_self=True, | ||||
|                     frames_to_stack=3, | ||||
|                     pomdp_r=2) | ||||
|                 ) | ||||
|             )}) | ||||
|         observation_modes.update({ | ||||
|             'seperate_0': dict( | ||||
|                 post_training_kwargs= | ||||
|                 dict(obs_prop=ObservationProperties( | ||||
|                     render_agents=AgentRenderOptions.COMBINED, | ||||
|                     additional_agent_placeholder=None, | ||||
|                     omit_agent_self=True, | ||||
|                     frames_to_stack=3, | ||||
|                     pomdp_r=2) | ||||
|                 ), | ||||
|                 additional_env_kwargs= | ||||
|                 dict(obs_prop=ObservationProperties( | ||||
|                     render_agents=AgentRenderOptions.NOT, | ||||
|                     additional_agent_placeholder=0, | ||||
|                     omit_agent_self=True, | ||||
|                     frames_to_stack=3, | ||||
|                     pomdp_r=2) | ||||
|                 ) | ||||
|             )}) | ||||
|         observation_modes.update({ | ||||
|             'seperate_N': dict( | ||||
|                 post_training_kwargs= | ||||
|                 dict(obs_prop=ObservationProperties( | ||||
|                     render_agents=AgentRenderOptions.COMBINED, | ||||
|                     additional_agent_placeholder=None, | ||||
|                     omit_agent_self=True, | ||||
|                     frames_to_stack=3, | ||||
|                     pomdp_r=2) | ||||
|                 ), | ||||
|                 additional_env_kwargs= | ||||
|                 dict(obs_prop=ObservationProperties( | ||||
|                     render_agents=AgentRenderOptions.NOT, | ||||
|                     additional_agent_placeholder='N', | ||||
|                     omit_agent_self=True, | ||||
|                     frames_to_stack=3, | ||||
|                     pomdp_r=2) | ||||
|                 ) | ||||
|             )}) | ||||
|         observation_modes.update({ | ||||
|             'in_lvl_obs': dict( | ||||
|                 post_training_kwargs= | ||||
|                 dict(obs_prop=ObservationProperties( | ||||
|                     render_agents=AgentRenderOptions.LEVEL, | ||||
|                     omit_agent_self=True, | ||||
|                     additional_agent_placeholder=None, | ||||
|                     frames_to_stack=3, | ||||
|                     pomdp_r=2) | ||||
|                 ) | ||||
|             )}) | ||||
|     observation_modes.update({ | ||||
|         'seperate_1': dict( | ||||
|             post_training_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.COMBINED, | ||||
|                 additional_agent_placeholder=None, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ), | ||||
|             additional_env_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.NOT, | ||||
|                 additional_agent_placeholder=1, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ) | ||||
|         )}) | ||||
|     observation_modes.update({ | ||||
|         'seperate_0': dict( | ||||
|             post_training_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.COMBINED, | ||||
|                 additional_agent_placeholder=None, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ), | ||||
|             additional_env_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.NOT, | ||||
|                 additional_agent_placeholder=0, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ) | ||||
|         )}) | ||||
|     observation_modes.update({ | ||||
|         'seperate_N': dict( | ||||
|             post_training_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.COMBINED, | ||||
|                 additional_agent_placeholder=None, | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ), | ||||
|             additional_env_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.NOT, | ||||
|                 additional_agent_placeholder='N', | ||||
|                 omit_agent_self=True, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ) | ||||
|         )}) | ||||
|     observation_modes.update({ | ||||
|         'in_lvl_obs': dict( | ||||
|             post_training_kwargs= | ||||
|             dict(obs_prop=ObservationProperties( | ||||
|                 render_agents=AgentRenderOptions.LEVEL, | ||||
|                 omit_agent_self=True, | ||||
|                 additional_agent_placeholder=None, | ||||
|                 frames_to_stack=3, | ||||
|                 pomdp_r=2) | ||||
|             ) | ||||
|         )}) | ||||
|     observation_modes.update({ | ||||
|         #  No further adjustment needed | ||||
|         'no_obs': dict( | ||||
| @@ -398,15 +425,7 @@ if __name__ == '__main__': | ||||
|             for env_path in [x for x in obs_mode_path.iterdir() if x.is_dir()]: | ||||
|                 for policy_path in [x for x in env_path.iterdir() if x. is_dir()]: | ||||
|                     # Iteration | ||||
|                     paths = list(y for y in policy_path.iterdir() if y.is_dir() \ | ||||
|                                  and not (y / baseline_monitor_file).exists()) | ||||
|                     import multiprocessing as mp | ||||
|                     import itertools as it | ||||
|                     pool = mp.Pool(mp.cpu_count()) | ||||
|                     result = pool.starmap(load_model_run_baseline, | ||||
|                                           it.product(paths, | ||||
|                                                      (env_map[env_path.name][0],)) | ||||
|                                           ) | ||||
|                     start_mp_baseline_run(env_map, policy_path) | ||||
|  | ||||
|                     # for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): | ||||
|                     #    load_model_run_baseline(seed_path) | ||||
| @@ -424,18 +443,9 @@ if __name__ == '__main__': | ||||
|                     # First seed path version | ||||
|                     # seed_path = next((y for y in policy_path.iterdir() if y.is_dir())) | ||||
|                     # Iteration | ||||
|                     import multiprocessing as mp | ||||
|                     import itertools as it | ||||
|                     pool = mp.Pool(mp.cpu_count()) | ||||
|                     paths = list(y for y in policy_path.iterdir() if y.is_dir() \ | ||||
|                                  and not (y / ood_monitor_file).exists()) | ||||
|                     # result = pool.starmap(load_model_run_study, | ||||
|                     #                      it.product(paths, | ||||
|                     #                                 (env_map[env_path.name][0],), | ||||
|                     #                                 (observation_modes[obs_mode],)) | ||||
|                     #                      ) | ||||
|                     for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): | ||||
|                         load_model_run_study(seed_path, env_map[env_path.name][0], observation_modes[obs_mode]) | ||||
|                     start_mp_study_run(env_map, policy_path) | ||||
|                     #for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): | ||||
|                     #    load_model_run_study(seed_path, env_map[env_path.name][0], observation_modes[obs_mode]) | ||||
|         print('OOD Tracking Done') | ||||
|  | ||||
|     # Plotting | ||||
| @@ -497,15 +507,16 @@ if __name__ == '__main__': | ||||
|             # df_melted["Measurements"] = df_melted["Measurement"] + " " + df_melted["monitor"] | ||||
|  | ||||
|             # Plotting | ||||
|             fig, ax = plt.subplots(figsize=(11.7, 8.27)) | ||||
|             # fig, ax = plt.subplots(figsize=(11.7, 8.27)) | ||||
|  | ||||
|             c = sns.catplot(data=df_melted[df_melted['obs_mode'] == observation_folder.name], | ||||
|                             x='Measurement', hue='monitor', row='model', col='env', y='Score', | ||||
|                             sharey=False, kind="box", height=4, aspect=.7, legend_out=True, | ||||
|                             sharey=False, kind="box", height=4, aspect=.7, legend_out=False, legend=False, | ||||
|                             showfliers=False) | ||||
|             c.set_xticklabels(rotation=65, horizontalalignment='right') | ||||
|             c.fig.subplots_adjust(top=0.9)  # adjust the Figure in rp | ||||
|             # c.fig.subplots_adjust(top=0.9)  # adjust the Figure in rp | ||||
|             c.fig.suptitle(f"Cat plot for {observation_folder.name}") | ||||
|             plt.tight_layout(pad=2) | ||||
|             # plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) | ||||
|             plt.tight_layout() | ||||
|             plt.savefig(study_root_path / f'results_{n_agents}_agents_{observation_folder.name}.png') | ||||
|         pass | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Steffen Illium
					Steffen Illium