diff --git a/environments/factory/factory_dirt.py b/environments/factory/factory_dirt.py index bdf1834..fec7985 100644 --- a/environments/factory/factory_dirt.py +++ b/environments/factory/factory_dirt.py @@ -198,19 +198,19 @@ class DirtFactory(BaseFactory): def step_hook(self) -> (List[dict], dict): super_reward_info = super().step_hook() - if smear_amount := self.dirt_prop.dirt_smear_amount: - for agent in self[c.AGENT]: - if agent.temp_valid and agent.last_pos != c.NO_POS: - if self._actions.is_moving_action(agent.temp_action): - if old_pos_dirt := self[c.DIRT].by_pos(agent.last_pos): - if smeared_dirt := round(old_pos_dirt.amount * smear_amount, 2): - old_pos_dirt.set_new_amount(max(0, old_pos_dirt.amount-smeared_dirt)) - if new_pos_dirt := self[c.DIRT].by_pos(agent.pos): - new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt)) - else: - if self[c.DIRT].spawn_dirt(agent.tile): - new_pos_dirt = self[c.DIRT].by_pos(agent.pos) - new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt)) + # if smear_amount := self.dirt_prop.dirt_smear_amount: + # for agent in self[c.AGENT]: + # if agent.temp_valid and agent.last_pos != c.NO_POS: + # if self._actions.is_moving_action(agent.temp_action): + # if old_pos_dirt := self[c.DIRT].by_pos(agent.last_pos): + # if smeared_dirt := round(old_pos_dirt.amount * smear_amount, 2): + # old_pos_dirt.set_new_amount(max(0, old_pos_dirt.amount-smeared_dirt)) + # if new_pos_dirt := self[c.DIRT].by_pos(agent.pos): + # new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt)) + # else: + # if self[c.DIRT].spawn_dirt(agent.tile): + # new_pos_dirt = self[c.DIRT].by_pos(agent.pos) + # new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt)) if self._next_dirt_spawn < 0: pass # No Dirt Spawn elif not self._next_dirt_spawn: diff --git a/environments/logging/envmonitor.py b/environments/logging/envmonitor.py index 2e18922..925b995 100644 --- a/environments/logging/envmonitor.py +++ b/environments/logging/envmonitor.py @@ -47,7 +47,7 @@ class EnvMonitor(BaseCallback): self._read_info(env_idx, info) for env_idx, done in list( - enumerate(self.locals.get('dones', []))) + list(enumerate(self.locals.get('done', []))): + enumerate(self.locals.get('dones', []))): # + list(enumerate(self.locals.get('done', []))): self._read_done(env_idx, done) return True diff --git a/experiments/simple_example.py b/experiments/simple_example.py new file mode 100644 index 0000000..af05105 --- /dev/null +++ b/experiments/simple_example.py @@ -0,0 +1,119 @@ +import warnings +from pathlib import Path + +import yaml + +from stable_baselines3 import PPO + +from environments.factory.factory_dirt import DirtProperties, DirtFactory, RewardsDirt +from environments.logging.envmonitor import EnvMonitor +from environments.logging.recorder import EnvRecorder +from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions +from environments.factory.factory_dirt import Constants as c + +warnings.filterwarnings('ignore', category=FutureWarning) +warnings.filterwarnings('ignore', category=UserWarning) + +if __name__ == '__main__': + TRAIN_AGENT = True + LOAD_AND_REPLAY = True + record = True + render = False + + study_root_path = Path(__file__).parent.parent / 'experiment_out' + + parameter_path = Path(__file__).parent.parent / 'environments' / 'factory' / 'levels' / 'parameters' / 'DirtyFactory-v0.yaml' + + save_path = study_root_path / f'model.zip' + + # Output folder + + study_root_path.mkdir(parents=True, exist_ok=True) + + train_steps = 2*1e5 + frames_to_stack = 0 + + u = dict( + show_global_position_info=True, + pomdp_r=3, + cast_shadows=True, + allow_diagonal_movement=False, + parse_doors=True, + doors_have_area=False, + done_at_collision=True + ) + obs_props = ObservationProperties(render_agents=AgentRenderOptions.SEPERATE, + additional_agent_placeholder=None, + omit_agent_self=True, + frames_to_stack=frames_to_stack, + pomdp_r=u['pomdp_r'], cast_shadows=u['cast_shadows'], + show_global_position_info=u['show_global_position_info']) + move_props = MovementProperties(allow_diagonal_movement=u['allow_diagonal_movement'], + allow_square_movement=True, + allow_no_op=False) + dirt_props = DirtProperties(initial_dirt_ratio=0.35, initial_dirt_spawn_r_var=0.1, + clean_amount=0.34, + max_spawn_amount=0.1, max_global_amount=20, + max_local_amount=1, spawn_frequency=0, max_spawn_ratio=0.05, + dirt_smear_amount=0.0) + rewards_dirt = RewardsDirt(CLEAN_UP_FAIL=-0.5, CLEAN_UP_VALID=1, CLEAN_UP_LAST_PIECE=5) + factory_kwargs = dict(n_agents=1, max_steps=500, parse_doors=u['parse_doors'], + level_name='rooms', doors_have_area=u['doors_have_area'], + verbose=True, + mv_prop=move_props, + obs_prop=obs_props, + rewards_dirt=rewards_dirt, + done_at_collision=u['done_at_collision'] + ) + + # with (parameter_path).open('r') as f: + # factory_kwargs = yaml.load(f, Loader=yaml.FullLoader) + # factory_kwargs.update(n_agents=1, done_at_collision=False, verbose=True) + + if TRAIN_AGENT: + env = DirtFactory(**factory_kwargs) + callbacks = EnvMonitor(env) + obs_shape = env.observation_space.shape + + model = PPO("MlpPolicy", env, verbose=1, device='cpu') + + model.learn(total_timesteps=train_steps, callback=callbacks) + + callbacks.save_run(study_root_path / 'monitor.pick', auto_plotting_keys=['step_reward', 'collision'] + ['cleanup_valid', 'cleanup_fail']) # + env_plot_keys) + + + model.save(save_path) + + if LOAD_AND_REPLAY: + with DirtFactory(**factory_kwargs) as env: + env = EnvMonitor(env) + env = EnvRecorder(env) if record else env + obs_shape = env.observation_space.shape + model = PPO.load(save_path) + # Evaluation Loop for i in range(n Episodes) + for episode in range(10): + env_state = env.reset() + rew, done_bool = 0, False + while not done_bool: + actions = model.predict(env_state, deterministic=True)[0] + env_state, step_r, done_bool, info_obj = env.step(actions) + + rew += step_r + + if render: + env.render() + + try: + door = next(x for x in env.unwrapped.unwrapped.unwrapped[c.DOORS] if x.is_open) + print('openDoor found') + except StopIteration: + pass + + if done_bool: + break + print( + f'Factory run {episode} done, steps taken {env.unwrapped.unwrapped.unwrapped._steps}, reward is:\n {rew}') + + env.save_records(study_root_path / 'reload_recorder.pick', save_occupation_map=False) + #env.save_run(study_root_path / 'reload_monitor.pick', + # auto_plotting_keys=['step_reward', 'cleanup_valid', 'cleanup_fail']) \ No newline at end of file