From 6287380f60f6091103264998c873c85f1bf876e7 Mon Sep 17 00:00:00 2001 From: Steffen Illium Date: Thu, 11 Nov 2021 10:58:11 +0100 Subject: [PATCH] Debugging and collision rendering --- environments/factory/base/base_factory.py | 4 +- environments/factory/factory_dirt.py | 2 +- environments/helpers.py | 2 +- reload_agent.py | 21 ++- studies/e_1.py | 155 ++++++++++++++-------- 5 files changed, 117 insertions(+), 67 deletions(-) diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py index 36d58ed..f788ebe 100644 --- a/environments/factory/base/base_factory.py +++ b/environments/factory/base/base_factory.py @@ -494,10 +494,10 @@ class BaseFactory(gym.Env): if self._actions.is_moving_action(agent.temp_action): if agent.temp_valid: # info_dict.update(movement=1) - reward -= 0.001 + reward -= 0.01 pass else: - reward -= 0.01 + reward -= 0.05 self.print(f'{agent.name} just hit the wall at {agent.pos}.') per_agent_info_dict[agent.name].update({f'{agent.name}_vs_LEVEL': 1}) diff --git a/environments/factory/factory_dirt.py b/environments/factory/factory_dirt.py index 449e5aa..32cc80e 100644 --- a/environments/factory/factory_dirt.py +++ b/environments/factory/factory_dirt.py @@ -65,7 +65,7 @@ class DirtRegister(MovingEntityObjectRegister): def as_array(self): if self._array is not None: self._array[:] = c.FREE_CELL.value - for dirt in self.values(): + for dirt in list(self.values()): if dirt.amount == 0: self.delete_item(dirt) self._array[0, dirt.x, dirt.y] = dirt.amount diff --git a/environments/helpers.py b/environments/helpers.py index 64a0c87..119b860 100644 --- a/environments/helpers.py +++ b/environments/helpers.py @@ -136,7 +136,7 @@ def asset_str(agent): # if any([x is None for x in [self._slices[j] for j in agent.collisions]]): # print('error') col_names = [x.name for x in agent.temp_collisions] - if c.AGENT.value in col_names: + if any(c.AGENT.value in name for name in col_names): return 'agent_collision', 'blank' elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names: return c.AGENT.value, 'invalid' diff --git a/reload_agent.py b/reload_agent.py index acc75a9..5849469 100644 --- a/reload_agent.py +++ b/reload_agent.py @@ -16,11 +16,11 @@ warnings.filterwarnings('ignore', category=UserWarning) if __name__ == '__main__': - model_name = 'DQN_163519000' + model_name = 'A2C_ItsDirt' run_id = 0 - seed = 69 - n_agents = 2 - out_path = Path('debug_out/DQN_163519000/1_DQN_163519000') + seed = 67 + n_agents = 1 + out_path = Path('study_out/e_1_ItsDirt/no_obs/dirt/A2C_ItsDirt/0_A2C_ItsDirt') model_path = out_path with (out_path / f'env_params.json').open('r') as f: @@ -46,10 +46,17 @@ if __name__ == '__main__': env_state = env.reset() rew, done_bool = 0, False while not done_bool: - actions = [model.predict( - np.stack([env_state[i][j] for i in range(env_state.shape[0])]), - deterministic=False)[0] for j, model in enumerate(models)] + if n_agents > 1: + actions = [model.predict( + np.stack([env_state[i][j] for i in range(env_state.shape[0])]), + deterministic=True)[0] for j, model in enumerate(models)] + else: + actions = models[0].predict(env_state, deterministic=True)[0] + if any([agent.pos in [door.pos for door in env.unwrapped[c.DOORS]] + for agent in env.unwrapped[c.AGENT]]): + print('On Door') env_state, step_r, done_bool, info_obj = env.step(actions) + recorder.read_info(0, info_obj) rew += step_r env.render() diff --git a/studies/e_1.py b/studies/e_1.py index 7e12d6c..3370204 100644 --- a/studies/e_1.py +++ b/studies/e_1.py @@ -147,7 +147,7 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): try: actions = [model.predict( np.stack([env_state[i][j] for i in range(env_state.shape[0])]), - deterministic=False)[0] for j, model in enumerate(models)] + deterministic=True)[0] for j, model in enumerate(models)] except ValueError as e: print(e) print('Env_Kwargs are:\n') @@ -169,10 +169,11 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict): if __name__ == '__main__': - train_steps = 8e5 + train_steps = 5e6 + n_seeds = 3 # Define a global studi save path - start_time = '900000' # int(time.time()) + start_time = 'Now_with_doors' # int(time.time()) study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' # Define Global Env Parameters @@ -195,57 +196,95 @@ if __name__ == '__main__': spawn_frequency=30, n_drop_off_locations=2, max_agent_inventory_capacity=15) factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True, - level_name='rooms', record_episodes=False, doors_have_area=False, + level_name='rooms', record_episodes=False, doors_have_area=True, verbose=False, mv_prop=move_props, obs_prop=obs_props ) # Bundle both environments with global kwargs and parameters - env_map = {'dirt': (DirtFactory, dict(dirt_prop=dirt_props, - **factory_kwargs.copy())), - 'item': (ItemFactory, dict(item_prop=item_props, - **factory_kwargs.copy())), - 'itemdirt': (DirtItemFactory, dict(dirt_prop=dirt_props, - item_prop=item_props, - **factory_kwargs.copy()))} + env_map = {} + env_map.update({'dirt': (DirtFactory, dict(dirt_prop=dirt_props, + **factory_kwargs.copy()))}) + if False: + env_map.update({'item': (ItemFactory, dict(item_prop=item_props, + **factory_kwargs.copy()))}) + env_map.update({'itemdirt': (DirtItemFactory, dict(dirt_prop=dirt_props, item_prop=item_props, + **factory_kwargs.copy()))}) env_names = list(env_map.keys()) # Define parameter versions according with #1,2[1,0,N],3 - observation_modes = { - # Fill-value = 0 - # DEACTIVATED 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)), - # Fill-value = 1 - # DEACTIVATED 'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)), - # Fill-value = N(0, 1) - 'seperate_N': dict( - post_training_kwargs= - dict(obs_prop=ObservationProperties( - render_agents=AgentRenderOptions.COMBINED, - additional_agent_placeholder=None, - omit_agent_self=True, - frames_to_stack=3, - pomdp_r=2) - ), - additional_env_kwargs= - dict(obs_prop=ObservationProperties( - render_agents=AgentRenderOptions.NOT, - additional_agent_placeholder='N', - omit_agent_self=True, - frames_to_stack=3, - pomdp_r=2) - ) - ), - 'in_lvl_obs': dict( - post_training_kwargs= - dict(obs_prop=ObservationProperties( - render_agents=AgentRenderOptions.LEVEL, - omit_agent_self=True, - additional_agent_placeholder=None, - frames_to_stack=3, - pomdp_r=2) - ) - ), + observation_modes = {} + if False: + observation_modes.update({ + 'seperate_1': dict( + post_training_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.COMBINED, + additional_agent_placeholder=None, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ), + additional_env_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.NOT, + additional_agent_placeholder=1, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ) + )}) + observation_modes.update({ + 'seperate_0': dict( + post_training_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.COMBINED, + additional_agent_placeholder=None, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ), + additional_env_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.NOT, + additional_agent_placeholder=0, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ) + )}) + observation_modes.update({ + 'seperate_N': dict( + post_training_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.COMBINED, + additional_agent_placeholder=None, + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ), + additional_env_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.NOT, + additional_agent_placeholder='N', + omit_agent_self=True, + frames_to_stack=3, + pomdp_r=2) + ) + )}) + observation_modes.update({ + 'in_lvl_obs': dict( + post_training_kwargs= + dict(obs_prop=ObservationProperties( + render_agents=AgentRenderOptions.LEVEL, + omit_agent_self=True, + additional_agent_placeholder=None, + frames_to_stack=3, + pomdp_r=2) + ) + )}) + observation_modes.update({ # No further adjustment needed 'no_obs': dict( post_training_kwargs= @@ -257,14 +296,14 @@ if __name__ == '__main__': pomdp_r=2) ) ) - } + }) # Train starts here ############################################################ # Build Major Loop parameters, parameter versions, Env Classes and models if True: for obs_mode in observation_modes.keys(): for env_name in env_names: - for model_cls in [h.MODEL_MAP['A2C'], h.MODEL_MAP['DQN']]: + for model_cls in [h.MODEL_MAP['A2C']]: # Create an identifier, which is unique for every combination and easy to read in filesystem identifier = f'{model_cls.__name__}_{start_time}' # Train each combination per seed @@ -274,7 +313,7 @@ if __name__ == '__main__': # Retrieve and set the observation mode specific env parameters additional_kwargs = observation_modes.get(obs_mode, {}).get("additional_env_kwargs", {}) env_kwargs.update(additional_kwargs) - for seed in range(5): + for seed in range(n_seeds): env_kwargs.update(env_seed=seed) # Output folder seed_path = combination_path / f'{str(seed)}_{identifier}' @@ -352,6 +391,7 @@ if __name__ == '__main__': # Evaluation starts here ##################################################### # First Iterate over every model and monitor "as trained" if True: + print('Start Baseline Tracking') for obs_mode in observation_modes: obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == obs_mode) # For trained policy in study_root_path / identifier @@ -370,9 +410,11 @@ if __name__ == '__main__': # for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): # load_model_run_baseline(seed_path) + print('Baseline Tracking done') # Then iterate over every model and monitor "ood behavior" - "is it ood?" if True: + print('Start OOD Tracking') for obs_mode in observation_modes: obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == obs_mode) # For trained policy in study_root_path / identifier @@ -387,18 +429,19 @@ if __name__ == '__main__': pool = mp.Pool(mp.cpu_count()) paths = list(y for y in policy_path.iterdir() if y.is_dir() \ and not (y / ood_monitor_file).exists()) - result = pool.starmap(load_model_run_study, - it.product(paths, - (env_map[env_path.name][0],), - (observation_modes[obs_mode],)) - ) - # for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): - # load_model_run_study(seed_path) + # result = pool.starmap(load_model_run_study, + # it.product(paths, + # (env_map[env_path.name][0],), + # (observation_modes[obs_mode],)) + # ) + for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): + load_model_run_study(seed_path, env_map[env_path.name][0], observation_modes[obs_mode]) + print('OOD Tracking Done') # Plotting if True: # TODO: Plotting - + print('Start Plotting') for observation_folder in (x for x in study_root_path.iterdir() if x.is_dir()): df_list = list() for env_folder in (x for x in observation_folder.iterdir() if x.is_dir()):