Compare commits
1 Commits
main
...
jannis_exp
Author | SHA1 | Date | |
---|---|---|---|
![]() |
4fe43a23b8 |
@ -198,19 +198,19 @@ class DirtFactory(BaseFactory):
|
||||
|
||||
def step_hook(self) -> (List[dict], dict):
|
||||
super_reward_info = super().step_hook()
|
||||
if smear_amount := self.dirt_prop.dirt_smear_amount:
|
||||
for agent in self[c.AGENT]:
|
||||
if agent.temp_valid and agent.last_pos != c.NO_POS:
|
||||
if self._actions.is_moving_action(agent.temp_action):
|
||||
if old_pos_dirt := self[c.DIRT].by_pos(agent.last_pos):
|
||||
if smeared_dirt := round(old_pos_dirt.amount * smear_amount, 2):
|
||||
old_pos_dirt.set_new_amount(max(0, old_pos_dirt.amount-smeared_dirt))
|
||||
if new_pos_dirt := self[c.DIRT].by_pos(agent.pos):
|
||||
new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt))
|
||||
else:
|
||||
if self[c.DIRT].spawn_dirt(agent.tile):
|
||||
new_pos_dirt = self[c.DIRT].by_pos(agent.pos)
|
||||
new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt))
|
||||
# if smear_amount := self.dirt_prop.dirt_smear_amount:
|
||||
# for agent in self[c.AGENT]:
|
||||
# if agent.temp_valid and agent.last_pos != c.NO_POS:
|
||||
# if self._actions.is_moving_action(agent.temp_action):
|
||||
# if old_pos_dirt := self[c.DIRT].by_pos(agent.last_pos):
|
||||
# if smeared_dirt := round(old_pos_dirt.amount * smear_amount, 2):
|
||||
# old_pos_dirt.set_new_amount(max(0, old_pos_dirt.amount-smeared_dirt))
|
||||
# if new_pos_dirt := self[c.DIRT].by_pos(agent.pos):
|
||||
# new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt))
|
||||
# else:
|
||||
# if self[c.DIRT].spawn_dirt(agent.tile):
|
||||
# new_pos_dirt = self[c.DIRT].by_pos(agent.pos)
|
||||
# new_pos_dirt.set_new_amount(max(0, new_pos_dirt.amount + smeared_dirt))
|
||||
if self._next_dirt_spawn < 0:
|
||||
pass # No Dirt Spawn
|
||||
elif not self._next_dirt_spawn:
|
||||
|
@ -47,7 +47,7 @@ class EnvMonitor(BaseCallback):
|
||||
self._read_info(env_idx, info)
|
||||
|
||||
for env_idx, done in list(
|
||||
enumerate(self.locals.get('dones', []))) + list(enumerate(self.locals.get('done', []))):
|
||||
enumerate(self.locals.get('dones', []))): # + list(enumerate(self.locals.get('done', []))):
|
||||
self._read_done(env_idx, done)
|
||||
return True
|
||||
|
||||
|
119
experiments/simple_example.py
Normal file
119
experiments/simple_example.py
Normal file
@ -0,0 +1,119 @@
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from stable_baselines3 import PPO
|
||||
|
||||
from environments.factory.factory_dirt import DirtProperties, DirtFactory, RewardsDirt
|
||||
from environments.logging.envmonitor import EnvMonitor
|
||||
from environments.logging.recorder import EnvRecorder
|
||||
from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
|
||||
from environments.factory.factory_dirt import Constants as c
|
||||
|
||||
warnings.filterwarnings('ignore', category=FutureWarning)
|
||||
warnings.filterwarnings('ignore', category=UserWarning)
|
||||
|
||||
if __name__ == '__main__':
|
||||
TRAIN_AGENT = True
|
||||
LOAD_AND_REPLAY = True
|
||||
record = True
|
||||
render = False
|
||||
|
||||
study_root_path = Path(__file__).parent.parent / 'experiment_out'
|
||||
|
||||
parameter_path = Path(__file__).parent.parent / 'environments' / 'factory' / 'levels' / 'parameters' / 'DirtyFactory-v0.yaml'
|
||||
|
||||
save_path = study_root_path / f'model.zip'
|
||||
|
||||
# Output folder
|
||||
|
||||
study_root_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
train_steps = 2*1e5
|
||||
frames_to_stack = 0
|
||||
|
||||
u = dict(
|
||||
show_global_position_info=True,
|
||||
pomdp_r=3,
|
||||
cast_shadows=True,
|
||||
allow_diagonal_movement=False,
|
||||
parse_doors=True,
|
||||
doors_have_area=False,
|
||||
done_at_collision=True
|
||||
)
|
||||
obs_props = ObservationProperties(render_agents=AgentRenderOptions.SEPERATE,
|
||||
additional_agent_placeholder=None,
|
||||
omit_agent_self=True,
|
||||
frames_to_stack=frames_to_stack,
|
||||
pomdp_r=u['pomdp_r'], cast_shadows=u['cast_shadows'],
|
||||
show_global_position_info=u['show_global_position_info'])
|
||||
move_props = MovementProperties(allow_diagonal_movement=u['allow_diagonal_movement'],
|
||||
allow_square_movement=True,
|
||||
allow_no_op=False)
|
||||
dirt_props = DirtProperties(initial_dirt_ratio=0.35, initial_dirt_spawn_r_var=0.1,
|
||||
clean_amount=0.34,
|
||||
max_spawn_amount=0.1, max_global_amount=20,
|
||||
max_local_amount=1, spawn_frequency=0, max_spawn_ratio=0.05,
|
||||
dirt_smear_amount=0.0)
|
||||
rewards_dirt = RewardsDirt(CLEAN_UP_FAIL=-0.5, CLEAN_UP_VALID=1, CLEAN_UP_LAST_PIECE=5)
|
||||
factory_kwargs = dict(n_agents=1, max_steps=500, parse_doors=u['parse_doors'],
|
||||
level_name='rooms', doors_have_area=u['doors_have_area'],
|
||||
verbose=True,
|
||||
mv_prop=move_props,
|
||||
obs_prop=obs_props,
|
||||
rewards_dirt=rewards_dirt,
|
||||
done_at_collision=u['done_at_collision']
|
||||
)
|
||||
|
||||
# with (parameter_path).open('r') as f:
|
||||
# factory_kwargs = yaml.load(f, Loader=yaml.FullLoader)
|
||||
# factory_kwargs.update(n_agents=1, done_at_collision=False, verbose=True)
|
||||
|
||||
if TRAIN_AGENT:
|
||||
env = DirtFactory(**factory_kwargs)
|
||||
callbacks = EnvMonitor(env)
|
||||
obs_shape = env.observation_space.shape
|
||||
|
||||
model = PPO("MlpPolicy", env, verbose=1, device='cpu')
|
||||
|
||||
model.learn(total_timesteps=train_steps, callback=callbacks)
|
||||
|
||||
callbacks.save_run(study_root_path / 'monitor.pick', auto_plotting_keys=['step_reward', 'collision'] + ['cleanup_valid', 'cleanup_fail']) # + env_plot_keys)
|
||||
|
||||
|
||||
model.save(save_path)
|
||||
|
||||
if LOAD_AND_REPLAY:
|
||||
with DirtFactory(**factory_kwargs) as env:
|
||||
env = EnvMonitor(env)
|
||||
env = EnvRecorder(env) if record else env
|
||||
obs_shape = env.observation_space.shape
|
||||
model = PPO.load(save_path)
|
||||
# Evaluation Loop for i in range(n Episodes)
|
||||
for episode in range(10):
|
||||
env_state = env.reset()
|
||||
rew, done_bool = 0, False
|
||||
while not done_bool:
|
||||
actions = model.predict(env_state, deterministic=True)[0]
|
||||
env_state, step_r, done_bool, info_obj = env.step(actions)
|
||||
|
||||
rew += step_r
|
||||
|
||||
if render:
|
||||
env.render()
|
||||
|
||||
try:
|
||||
door = next(x for x in env.unwrapped.unwrapped.unwrapped[c.DOORS] if x.is_open)
|
||||
print('openDoor found')
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
if done_bool:
|
||||
break
|
||||
print(
|
||||
f'Factory run {episode} done, steps taken {env.unwrapped.unwrapped.unwrapped._steps}, reward is:\n {rew}')
|
||||
|
||||
env.save_records(study_root_path / 'reload_recorder.pick', save_occupation_map=False)
|
||||
#env.save_run(study_root_path / 'reload_monitor.pick',
|
||||
# auto_plotting_keys=['step_reward', 'cleanup_valid', 'cleanup_fail'])
|
Loading…
x
Reference in New Issue
Block a user