added simple example

2022-03-28 16:01:55 +02:00
parent a9a4274370
commit 4fe43a23b8
3 changed files with 133 additions and 14 deletions
--- a/environments/factory/factory_dirt.py
+++ b/environments/factory/factory_dirt.py
@ -198,19 +198,19 @@ class DirtFactory(BaseFactory):
    def step_hook(self) -> (List[dict], dict):
        super_reward_info = super().step_hook()
-        if smear_amount := self.dirt_prop.dirt_smear_amount:
+        # if smear_amount := self.dirt_prop.dirt_smear_amount:
-            for agent in self[c.AGENT]:
+        #     for agent in self[c.AGENT]:
-                if agent.temp_valid and agent.last_pos != c.NO_POS:
+        #         if agent.temp_valid and agent.last_pos != c.NO_POS:
-                    if self._actions.is_moving_action(agent.temp_action):
+        #             if self._actions.is_moving_action(agent.temp_action):
-                        if old_pos_dirt := self[c.DIRT].by_pos(agent.last_pos):
+        #                 if old_pos_dirt := self[c.DIRT].by_pos(agent.last_pos):
-                            if smeared_dirt := round(old_pos_dirt.amount * smear_amount, 2):
+        #                     if smeared_dirt := round(old_pos_dirt.amount * smear_amount, 2):
-                                old_pos_dirt.set_new_amount(max(0, old_pos_dirt.amount-smeared_dirt))
+        #                         old_pos_dirt.set_new_amount(max(0, old_pos_dirt.amount-smeared_dirt))
-                                if new_pos_dirt := self[c.DIRT].by_pos(agent.pos):
+        #                         if new_pos_dirt := self[c.DIRT].by_pos(agent.pos):
-                                    new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt))
+        #                             new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt))
-                                else:
+        #                         else:
-                                    if self[c.DIRT].spawn_dirt(agent.tile):
+        #                             if self[c.DIRT].spawn_dirt(agent.tile):
-                                        new_pos_dirt = self[c.DIRT].by_pos(agent.pos)
+        #                                 new_pos_dirt = self[c.DIRT].by_pos(agent.pos)
-                                        new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt))
+        #                                 new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt))
        if self._next_dirt_spawn < 0:
            pass  # No Dirt Spawn
        elif not self._next_dirt_spawn:
--- a/environments/logging/envmonitor.py
+++ b/environments/logging/envmonitor.py
@ -47,7 +47,7 @@ class EnvMonitor(BaseCallback):
            self._read_info(env_idx, info)
        for env_idx, done in list(
-                enumerate(self.locals.get('dones', []))) + list(enumerate(self.locals.get('done', []))):
+                enumerate(self.locals.get('dones', []))): # + list(enumerate(self.locals.get('done', []))):
            self._read_done(env_idx, done)
        return True
--- a/experiments/simple_example.py
+++ b/experiments/simple_example.py
@ -0,0 +1,119 @@
 import warnings
 from pathlib import Path
 import yaml
 from stable_baselines3 import PPO
 from environments.factory.factory_dirt import DirtProperties, DirtFactory, RewardsDirt
 from environments.logging.envmonitor import EnvMonitor
 from environments.logging.recorder import EnvRecorder
 from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
 from environments.factory.factory_dirt import Constants as c
 warnings.filterwarnings('ignore', category=FutureWarning)
 warnings.filterwarnings('ignore', category=UserWarning)
 if __name__ == '__main__':
    TRAIN_AGENT = True
    LOAD_AND_REPLAY = True
    record = True
    render = False
    study_root_path = Path(__file__).parent.parent / 'experiment_out'
    parameter_path = Path(__file__).parent.parent / 'environments' / 'factory' / 'levels' / 'parameters' / 'DirtyFactory-v0.yaml'
    save_path = study_root_path / f'model.zip'
    # Output folder
    study_root_path.mkdir(parents=True, exist_ok=True)
    train_steps = 2*1e5
    frames_to_stack = 0
    u = dict(
        show_global_position_info=True,
        pomdp_r=3,
        cast_shadows=True,
        allow_diagonal_movement=False,
        parse_doors=True,
        doors_have_area=False,
        done_at_collision=True
    )
    obs_props = ObservationProperties(render_agents=AgentRenderOptions.SEPERATE,
                                      additional_agent_placeholder=None,
                                      omit_agent_self=True,
                                      frames_to_stack=frames_to_stack,
                                      pomdp_r=u['pomdp_r'], cast_shadows=u['cast_shadows'],
                                      show_global_position_info=u['show_global_position_info'])
    move_props = MovementProperties(allow_diagonal_movement=u['allow_diagonal_movement'],
                                    allow_square_movement=True,
                                    allow_no_op=False)
    dirt_props = DirtProperties(initial_dirt_ratio=0.35, initial_dirt_spawn_r_var=0.1,
                                clean_amount=0.34,
                                max_spawn_amount=0.1, max_global_amount=20,
                                max_local_amount=1, spawn_frequency=0, max_spawn_ratio=0.05,
                                dirt_smear_amount=0.0)
    rewards_dirt = RewardsDirt(CLEAN_UP_FAIL=-0.5, CLEAN_UP_VALID=1, CLEAN_UP_LAST_PIECE=5)
    factory_kwargs = dict(n_agents=1, max_steps=500, parse_doors=u['parse_doors'],
                          level_name='rooms', doors_have_area=u['doors_have_area'],
                          verbose=True,
                          mv_prop=move_props,
                          obs_prop=obs_props,
                          rewards_dirt=rewards_dirt,
                          done_at_collision=u['done_at_collision']
                          )
    # with (parameter_path).open('r') as f:
    #     factory_kwargs = yaml.load(f, Loader=yaml.FullLoader)
    #     factory_kwargs.update(n_agents=1, done_at_collision=False, verbose=True)
    if TRAIN_AGENT:
        env = DirtFactory(**factory_kwargs)
        callbacks = EnvMonitor(env)
        obs_shape = env.observation_space.shape
        model = PPO("MlpPolicy", env, verbose=1, device='cpu')
        model.learn(total_timesteps=train_steps, callback=callbacks)
        callbacks.save_run(study_root_path / 'monitor.pick', auto_plotting_keys=['step_reward', 'collision'] + ['cleanup_valid', 'cleanup_fail']) # + env_plot_keys)
        model.save(save_path)
    if LOAD_AND_REPLAY:
        with DirtFactory(**factory_kwargs) as env:
            env = EnvMonitor(env)
            env = EnvRecorder(env) if record else env
            obs_shape = env.observation_space.shape
            model = PPO.load(save_path)
            # Evaluation Loop for i in range(n Episodes)
            for episode in range(10):
                env_state = env.reset()
                rew, done_bool = 0, False
                while not done_bool:
                    actions = model.predict(env_state, deterministic=True)[0]
                    env_state, step_r, done_bool, info_obj = env.step(actions)
                    rew += step_r
                    if render:
                        env.render()
                    try:
                        door = next(x for x in env.unwrapped.unwrapped.unwrapped[c.DOORS] if x.is_open)
                        print('openDoor found')
                    except StopIteration:
                        pass
                    if done_bool:
                        break
                print(
                    f'Factory run {episode} done, steps taken {env.unwrapped.unwrapped.unwrapped._steps}, reward is:\n    {rew}')
            env.save_records(study_root_path / 'reload_recorder.pick', save_occupation_map=False)
            #env.save_run(study_root_path / 'reload_monitor.pick',
            #             auto_plotting_keys=['step_reward', 'cleanup_valid', 'cleanup_fail'])