diff --git a/environments/factory/base_factory.py b/environments/factory/base_factory.py index db05acd..b8d5da6 100644 --- a/environments/factory/base_factory.py +++ b/environments/factory/base_factory.py @@ -265,7 +265,6 @@ class BaseFactory(gym.Env): # d = {key: val._asdict() if hasattr(val, '_asdict') else val for key, val in self.__dict__.items() d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')} filepath.parent.mkdir(parents=True, exist_ok=True) - super(BaseFactory, self).save_params() with filepath.open('w') as f: yaml.dump(d, f) # pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL) diff --git a/environments/factory/simple_factory.py b/environments/factory/simple_factory.py index 22e7a4b..15f9d6d 100644 --- a/environments/factory/simple_factory.py +++ b/environments/factory/simple_factory.py @@ -192,11 +192,6 @@ if __name__ == '__main__': factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=10, combin_agent_slices_in_obs=True, omit_agent_slice_in_obs=False, level_name='rooms') - # dirt_props = DirtProperties() - # move_props = MovementProperties(allow_diagonal_movement=False, allow_no_op=False) - # factory = SimpleFactory(n_agents=2, dirt_properties=dirt_props, movement_properties=move_props, level='rooms', - # pomdp_radius=2) - n_actions = factory.action_space.n - 1 for epoch in range(100): diff --git a/environments/logging/plotting.py b/environments/logging/plotting.py index 323b99f..1fa5b15 100644 --- a/environments/logging/plotting.py +++ b/environments/logging/plotting.py @@ -41,6 +41,6 @@ def prepare_plot(filepath, results_df, ext='png', hue='Measurement', style=None) plt.close('all') sns.set(rc={'text.usetex': False}, style='whitegrid') lineplot = sns.lineplot(data=df, x='Episode', y='Score', hue=hue, style=style, - ci=95, palette=PALETTE, hue_order=hue_order) + ci=95, palette=PALETTE, hue_order=hue_order) lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}') plot(filepath, ext=ext) diff --git a/main.py b/main.py index 937bd51..e844f4e 100644 --- a/main.py +++ b/main.py @@ -101,14 +101,20 @@ if __name__ == '__main__': out_path = None - for modeL_type in [PPO, A2C]: # , RegDQN, DQN]: + for modeL_type in [A2C, PPO, RegDQN, DQN]: # , QRDQN]: for seed in range(3): with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, movement_properties=move_props, level_name='rooms', frames_to_stack=4, omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True) as env: - kwargs = dict(ent_coef=0.01) if isinstance(modeL_type, (PPO, A2C)) else {} + if modeL_type.__name__ in ["PPO", "A2C"]: + kwargs = dict(ent_coef=0.01) + elif modeL_type.__name__ in ["RegDQN", "DQN", "QRDQN"]: + kwargs = dict(target_update_interval=500, buffer_size=30000, learning_starts=5000, + exploration_final_eps=0.01, batch_size=96) + else: + raise NameError(f'The model "{model.__name__}" has the wrong name.') model = modeL_type("MlpPolicy", env, verbose=1, seed=seed, device='cpu', **kwargs) out_path = Path('debug_out') / f'{model.__class__.__name__}_{time_stamp}'