Debugging and collision rendering
This commit is contained in:
parent
ab9cde7853
commit
6287380f60
@ -494,10 +494,10 @@ class BaseFactory(gym.Env):
|
|||||||
if self._actions.is_moving_action(agent.temp_action):
|
if self._actions.is_moving_action(agent.temp_action):
|
||||||
if agent.temp_valid:
|
if agent.temp_valid:
|
||||||
# info_dict.update(movement=1)
|
# info_dict.update(movement=1)
|
||||||
reward -= 0.001
|
reward -= 0.01
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
reward -= 0.01
|
reward -= 0.05
|
||||||
self.print(f'{agent.name} just hit the wall at {agent.pos}.')
|
self.print(f'{agent.name} just hit the wall at {agent.pos}.')
|
||||||
per_agent_info_dict[agent.name].update({f'{agent.name}_vs_LEVEL': 1})
|
per_agent_info_dict[agent.name].update({f'{agent.name}_vs_LEVEL': 1})
|
||||||
|
|
||||||
|
@ -65,7 +65,7 @@ class DirtRegister(MovingEntityObjectRegister):
|
|||||||
def as_array(self):
|
def as_array(self):
|
||||||
if self._array is not None:
|
if self._array is not None:
|
||||||
self._array[:] = c.FREE_CELL.value
|
self._array[:] = c.FREE_CELL.value
|
||||||
for dirt in self.values():
|
for dirt in list(self.values()):
|
||||||
if dirt.amount == 0:
|
if dirt.amount == 0:
|
||||||
self.delete_item(dirt)
|
self.delete_item(dirt)
|
||||||
self._array[0, dirt.x, dirt.y] = dirt.amount
|
self._array[0, dirt.x, dirt.y] = dirt.amount
|
||||||
|
@ -136,7 +136,7 @@ def asset_str(agent):
|
|||||||
# if any([x is None for x in [self._slices[j] for j in agent.collisions]]):
|
# if any([x is None for x in [self._slices[j] for j in agent.collisions]]):
|
||||||
# print('error')
|
# print('error')
|
||||||
col_names = [x.name for x in agent.temp_collisions]
|
col_names = [x.name for x in agent.temp_collisions]
|
||||||
if c.AGENT.value in col_names:
|
if any(c.AGENT.value in name for name in col_names):
|
||||||
return 'agent_collision', 'blank'
|
return 'agent_collision', 'blank'
|
||||||
elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names:
|
elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names:
|
||||||
return c.AGENT.value, 'invalid'
|
return c.AGENT.value, 'invalid'
|
||||||
|
@ -16,11 +16,11 @@ warnings.filterwarnings('ignore', category=UserWarning)
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
model_name = 'DQN_163519000'
|
model_name = 'A2C_ItsDirt'
|
||||||
run_id = 0
|
run_id = 0
|
||||||
seed = 69
|
seed = 67
|
||||||
n_agents = 2
|
n_agents = 1
|
||||||
out_path = Path('debug_out/DQN_163519000/1_DQN_163519000')
|
out_path = Path('study_out/e_1_ItsDirt/no_obs/dirt/A2C_ItsDirt/0_A2C_ItsDirt')
|
||||||
model_path = out_path
|
model_path = out_path
|
||||||
|
|
||||||
with (out_path / f'env_params.json').open('r') as f:
|
with (out_path / f'env_params.json').open('r') as f:
|
||||||
@ -46,10 +46,17 @@ if __name__ == '__main__':
|
|||||||
env_state = env.reset()
|
env_state = env.reset()
|
||||||
rew, done_bool = 0, False
|
rew, done_bool = 0, False
|
||||||
while not done_bool:
|
while not done_bool:
|
||||||
|
if n_agents > 1:
|
||||||
actions = [model.predict(
|
actions = [model.predict(
|
||||||
np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
|
np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
|
||||||
deterministic=False)[0] for j, model in enumerate(models)]
|
deterministic=True)[0] for j, model in enumerate(models)]
|
||||||
|
else:
|
||||||
|
actions = models[0].predict(env_state, deterministic=True)[0]
|
||||||
|
if any([agent.pos in [door.pos for door in env.unwrapped[c.DOORS]]
|
||||||
|
for agent in env.unwrapped[c.AGENT]]):
|
||||||
|
print('On Door')
|
||||||
env_state, step_r, done_bool, info_obj = env.step(actions)
|
env_state, step_r, done_bool, info_obj = env.step(actions)
|
||||||
|
|
||||||
recorder.read_info(0, info_obj)
|
recorder.read_info(0, info_obj)
|
||||||
rew += step_r
|
rew += step_r
|
||||||
env.render()
|
env.render()
|
||||||
|
103
studies/e_1.py
103
studies/e_1.py
@ -147,7 +147,7 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict):
|
|||||||
try:
|
try:
|
||||||
actions = [model.predict(
|
actions = [model.predict(
|
||||||
np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
|
np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
|
||||||
deterministic=False)[0] for j, model in enumerate(models)]
|
deterministic=True)[0] for j, model in enumerate(models)]
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
print(e)
|
print(e)
|
||||||
print('Env_Kwargs are:\n')
|
print('Env_Kwargs are:\n')
|
||||||
@ -169,10 +169,11 @@ def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
train_steps = 8e5
|
train_steps = 5e6
|
||||||
|
n_seeds = 3
|
||||||
|
|
||||||
# Define a global studi save path
|
# Define a global studi save path
|
||||||
start_time = '900000' # int(time.time())
|
start_time = 'Now_with_doors' # int(time.time())
|
||||||
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
|
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
|
||||||
|
|
||||||
# Define Global Env Parameters
|
# Define Global Env Parameters
|
||||||
@ -195,29 +196,65 @@ if __name__ == '__main__':
|
|||||||
spawn_frequency=30, n_drop_off_locations=2,
|
spawn_frequency=30, n_drop_off_locations=2,
|
||||||
max_agent_inventory_capacity=15)
|
max_agent_inventory_capacity=15)
|
||||||
factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True,
|
factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True,
|
||||||
level_name='rooms', record_episodes=False, doors_have_area=False,
|
level_name='rooms', record_episodes=False, doors_have_area=True,
|
||||||
verbose=False,
|
verbose=False,
|
||||||
mv_prop=move_props,
|
mv_prop=move_props,
|
||||||
obs_prop=obs_props
|
obs_prop=obs_props
|
||||||
)
|
)
|
||||||
|
|
||||||
# Bundle both environments with global kwargs and parameters
|
# Bundle both environments with global kwargs and parameters
|
||||||
env_map = {'dirt': (DirtFactory, dict(dirt_prop=dirt_props,
|
env_map = {}
|
||||||
**factory_kwargs.copy())),
|
env_map.update({'dirt': (DirtFactory, dict(dirt_prop=dirt_props,
|
||||||
'item': (ItemFactory, dict(item_prop=item_props,
|
**factory_kwargs.copy()))})
|
||||||
**factory_kwargs.copy())),
|
if False:
|
||||||
'itemdirt': (DirtItemFactory, dict(dirt_prop=dirt_props,
|
env_map.update({'item': (ItemFactory, dict(item_prop=item_props,
|
||||||
item_prop=item_props,
|
**factory_kwargs.copy()))})
|
||||||
**factory_kwargs.copy()))}
|
env_map.update({'itemdirt': (DirtItemFactory, dict(dirt_prop=dirt_props, item_prop=item_props,
|
||||||
|
**factory_kwargs.copy()))})
|
||||||
env_names = list(env_map.keys())
|
env_names = list(env_map.keys())
|
||||||
|
|
||||||
# Define parameter versions according with #1,2[1,0,N],3
|
# Define parameter versions according with #1,2[1,0,N],3
|
||||||
observation_modes = {
|
observation_modes = {}
|
||||||
# Fill-value = 0
|
if False:
|
||||||
# DEACTIVATED 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)),
|
observation_modes.update({
|
||||||
# Fill-value = 1
|
'seperate_1': dict(
|
||||||
# DEACTIVATED 'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)),
|
post_training_kwargs=
|
||||||
# Fill-value = N(0, 1)
|
dict(obs_prop=ObservationProperties(
|
||||||
|
render_agents=AgentRenderOptions.COMBINED,
|
||||||
|
additional_agent_placeholder=None,
|
||||||
|
omit_agent_self=True,
|
||||||
|
frames_to_stack=3,
|
||||||
|
pomdp_r=2)
|
||||||
|
),
|
||||||
|
additional_env_kwargs=
|
||||||
|
dict(obs_prop=ObservationProperties(
|
||||||
|
render_agents=AgentRenderOptions.NOT,
|
||||||
|
additional_agent_placeholder=1,
|
||||||
|
omit_agent_self=True,
|
||||||
|
frames_to_stack=3,
|
||||||
|
pomdp_r=2)
|
||||||
|
)
|
||||||
|
)})
|
||||||
|
observation_modes.update({
|
||||||
|
'seperate_0': dict(
|
||||||
|
post_training_kwargs=
|
||||||
|
dict(obs_prop=ObservationProperties(
|
||||||
|
render_agents=AgentRenderOptions.COMBINED,
|
||||||
|
additional_agent_placeholder=None,
|
||||||
|
omit_agent_self=True,
|
||||||
|
frames_to_stack=3,
|
||||||
|
pomdp_r=2)
|
||||||
|
),
|
||||||
|
additional_env_kwargs=
|
||||||
|
dict(obs_prop=ObservationProperties(
|
||||||
|
render_agents=AgentRenderOptions.NOT,
|
||||||
|
additional_agent_placeholder=0,
|
||||||
|
omit_agent_self=True,
|
||||||
|
frames_to_stack=3,
|
||||||
|
pomdp_r=2)
|
||||||
|
)
|
||||||
|
)})
|
||||||
|
observation_modes.update({
|
||||||
'seperate_N': dict(
|
'seperate_N': dict(
|
||||||
post_training_kwargs=
|
post_training_kwargs=
|
||||||
dict(obs_prop=ObservationProperties(
|
dict(obs_prop=ObservationProperties(
|
||||||
@ -235,7 +272,8 @@ if __name__ == '__main__':
|
|||||||
frames_to_stack=3,
|
frames_to_stack=3,
|
||||||
pomdp_r=2)
|
pomdp_r=2)
|
||||||
)
|
)
|
||||||
),
|
)})
|
||||||
|
observation_modes.update({
|
||||||
'in_lvl_obs': dict(
|
'in_lvl_obs': dict(
|
||||||
post_training_kwargs=
|
post_training_kwargs=
|
||||||
dict(obs_prop=ObservationProperties(
|
dict(obs_prop=ObservationProperties(
|
||||||
@ -245,7 +283,8 @@ if __name__ == '__main__':
|
|||||||
frames_to_stack=3,
|
frames_to_stack=3,
|
||||||
pomdp_r=2)
|
pomdp_r=2)
|
||||||
)
|
)
|
||||||
),
|
)})
|
||||||
|
observation_modes.update({
|
||||||
# No further adjustment needed
|
# No further adjustment needed
|
||||||
'no_obs': dict(
|
'no_obs': dict(
|
||||||
post_training_kwargs=
|
post_training_kwargs=
|
||||||
@ -257,14 +296,14 @@ if __name__ == '__main__':
|
|||||||
pomdp_r=2)
|
pomdp_r=2)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
}
|
})
|
||||||
|
|
||||||
# Train starts here ############################################################
|
# Train starts here ############################################################
|
||||||
# Build Major Loop parameters, parameter versions, Env Classes and models
|
# Build Major Loop parameters, parameter versions, Env Classes and models
|
||||||
if True:
|
if True:
|
||||||
for obs_mode in observation_modes.keys():
|
for obs_mode in observation_modes.keys():
|
||||||
for env_name in env_names:
|
for env_name in env_names:
|
||||||
for model_cls in [h.MODEL_MAP['A2C'], h.MODEL_MAP['DQN']]:
|
for model_cls in [h.MODEL_MAP['A2C']]:
|
||||||
# Create an identifier, which is unique for every combination and easy to read in filesystem
|
# Create an identifier, which is unique for every combination and easy to read in filesystem
|
||||||
identifier = f'{model_cls.__name__}_{start_time}'
|
identifier = f'{model_cls.__name__}_{start_time}'
|
||||||
# Train each combination per seed
|
# Train each combination per seed
|
||||||
@ -274,7 +313,7 @@ if __name__ == '__main__':
|
|||||||
# Retrieve and set the observation mode specific env parameters
|
# Retrieve and set the observation mode specific env parameters
|
||||||
additional_kwargs = observation_modes.get(obs_mode, {}).get("additional_env_kwargs", {})
|
additional_kwargs = observation_modes.get(obs_mode, {}).get("additional_env_kwargs", {})
|
||||||
env_kwargs.update(additional_kwargs)
|
env_kwargs.update(additional_kwargs)
|
||||||
for seed in range(5):
|
for seed in range(n_seeds):
|
||||||
env_kwargs.update(env_seed=seed)
|
env_kwargs.update(env_seed=seed)
|
||||||
# Output folder
|
# Output folder
|
||||||
seed_path = combination_path / f'{str(seed)}_{identifier}'
|
seed_path = combination_path / f'{str(seed)}_{identifier}'
|
||||||
@ -352,6 +391,7 @@ if __name__ == '__main__':
|
|||||||
# Evaluation starts here #####################################################
|
# Evaluation starts here #####################################################
|
||||||
# First Iterate over every model and monitor "as trained"
|
# First Iterate over every model and monitor "as trained"
|
||||||
if True:
|
if True:
|
||||||
|
print('Start Baseline Tracking')
|
||||||
for obs_mode in observation_modes:
|
for obs_mode in observation_modes:
|
||||||
obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == obs_mode)
|
obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == obs_mode)
|
||||||
# For trained policy in study_root_path / identifier
|
# For trained policy in study_root_path / identifier
|
||||||
@ -370,9 +410,11 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
# for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
|
# for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
|
||||||
# load_model_run_baseline(seed_path)
|
# load_model_run_baseline(seed_path)
|
||||||
|
print('Baseline Tracking done')
|
||||||
|
|
||||||
# Then iterate over every model and monitor "ood behavior" - "is it ood?"
|
# Then iterate over every model and monitor "ood behavior" - "is it ood?"
|
||||||
if True:
|
if True:
|
||||||
|
print('Start OOD Tracking')
|
||||||
for obs_mode in observation_modes:
|
for obs_mode in observation_modes:
|
||||||
obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == obs_mode)
|
obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == obs_mode)
|
||||||
# For trained policy in study_root_path / identifier
|
# For trained policy in study_root_path / identifier
|
||||||
@ -387,18 +429,19 @@ if __name__ == '__main__':
|
|||||||
pool = mp.Pool(mp.cpu_count())
|
pool = mp.Pool(mp.cpu_count())
|
||||||
paths = list(y for y in policy_path.iterdir() if y.is_dir() \
|
paths = list(y for y in policy_path.iterdir() if y.is_dir() \
|
||||||
and not (y / ood_monitor_file).exists())
|
and not (y / ood_monitor_file).exists())
|
||||||
result = pool.starmap(load_model_run_study,
|
# result = pool.starmap(load_model_run_study,
|
||||||
it.product(paths,
|
# it.product(paths,
|
||||||
(env_map[env_path.name][0],),
|
# (env_map[env_path.name][0],),
|
||||||
(observation_modes[obs_mode],))
|
# (observation_modes[obs_mode],))
|
||||||
)
|
# )
|
||||||
# for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
|
for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
|
||||||
# load_model_run_study(seed_path)
|
load_model_run_study(seed_path, env_map[env_path.name][0], observation_modes[obs_mode])
|
||||||
|
print('OOD Tracking Done')
|
||||||
|
|
||||||
# Plotting
|
# Plotting
|
||||||
if True:
|
if True:
|
||||||
# TODO: Plotting
|
# TODO: Plotting
|
||||||
|
print('Start Plotting')
|
||||||
for observation_folder in (x for x in study_root_path.iterdir() if x.is_dir()):
|
for observation_folder in (x for x in study_root_path.iterdir() if x.is_dir()):
|
||||||
df_list = list()
|
df_list = list()
|
||||||
for env_folder in (x for x in observation_folder.iterdir() if x.is_dir()):
|
for env_folder in (x for x in observation_folder.iterdir() if x.is_dir()):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user