Debugging

This commit is contained in:
Steffen Illium 2022-01-11 10:54:02 +01:00
parent 435056f373
commit 3150757347
6 changed files with 67 additions and 58 deletions

View File

@ -35,7 +35,7 @@ class BaseFactory(gym.Env):
@property @property
def named_action_space(self): def named_action_space(self):
return {x.identifier.value: idx for idx, x in enumerate(self._actions.values())} return {x.identifier: idx for idx, x in enumerate(self._actions.values())}
@property @property
def observation_space(self): def observation_space(self):
@ -287,7 +287,7 @@ class BaseFactory(gym.Env):
doors.tick_doors() doors.tick_doors()
# Finalize # Finalize
reward, reward_info = self.build_reward_result() reward, reward_info = self.build_reward_result(rewards)
info.update(reward_info) info.update(reward_info)
if self._steps >= self.max_steps: if self._steps >= self.max_steps:
@ -313,8 +313,8 @@ class BaseFactory(gym.Env):
if door is not None: if door is not None:
door.use() door.use()
valid = c.VALID valid = c.VALID
self.print(f'{agent.name} just used a door {door.name}') self.print(f'{agent.name} just used a {door.name} at {door.pos}')
info_dict = {f'{agent.name}_door_use_{door.name}': 1} info_dict = {f'{agent.name}_door_use': 1}
# When he doesn't... # When he doesn't...
else: else:
valid = c.NOT_VALID valid = c.NOT_VALID
@ -478,8 +478,7 @@ class BaseFactory(gym.Env):
return oobs return oobs
def get_all_tiles_with_collisions(self) -> List[Tile]: def get_all_tiles_with_collisions(self) -> List[Tile]:
tiles = [x.tile for y in self._entities for x in y if tiles = [x for x in self[c.FLOOR] if len(x.guests_that_can_collide) > 1]
y.can_collide and not isinstance(y, WallTiles) and x.can_collide and len(x.tile.guests) > 1]
if False: if False:
tiles_with_collisions = list() tiles_with_collisions = list()
for tile in self[c.FLOOR]: for tile in self[c.FLOOR]:
@ -503,11 +502,11 @@ class BaseFactory(gym.Env):
else: else:
valid = c.NOT_VALID valid = c.NOT_VALID
self.print(f'{agent.name} just hit the wall at {agent.pos}.') self.print(f'{agent.name} just hit the wall at {agent.pos}.')
info_dict.update({f'{agent.pos}_wall_collide': 1}) info_dict.update({f'{agent.name}_wall_collide': 1})
else: else:
# Agent seems to be trying to Leave the level # Agent seems to be trying to Leave the level
self.print(f'{agent.name} tried to leave the level {agent.pos}.') self.print(f'{agent.name} tried to leave the level {agent.pos}.')
info_dict.update({f'{agent.pos}_wall_collide': 1}) info_dict.update({f'{agent.name}_wall_collide': 1})
reward_value = r.MOVEMENTS_VALID if valid else r.MOVEMENTS_FAIL reward_value = r.MOVEMENTS_VALID if valid else r.MOVEMENTS_FAIL
reward = {'value': reward_value, 'reason': action.identifier, 'info': info_dict} reward = {'value': reward_value, 'reason': action.identifier, 'info': info_dict}
return valid, reward return valid, reward
@ -554,7 +553,7 @@ class BaseFactory(gym.Env):
def additional_per_agent_rewards(self, agent) -> List[dict]: def additional_per_agent_rewards(self, agent) -> List[dict]:
return [] return []
def build_reward_result(self) -> (int, dict): def build_reward_result(self, global_env_rewards: list) -> (int, dict):
# Returns: Reward, Info # Returns: Reward, Info
info = defaultdict(lambda: 0.0) info = defaultdict(lambda: 0.0)
@ -584,12 +583,14 @@ class BaseFactory(gym.Env):
combined_info_dict = dict(combined_info_dict) combined_info_dict = dict(combined_info_dict)
combined_info_dict.update(info) combined_info_dict.update(info)
global_reward_sum = sum(global_env_rewards)
if self.individual_rewards: if self.individual_rewards:
self.print(f"rewards are {comb_rewards}") self.print(f"rewards are {comb_rewards}")
reward = list(comb_rewards.values()) reward = list(comb_rewards.values())
reward = [x + global_reward_sum for x in reward]
return reward, combined_info_dict return reward, combined_info_dict
else: else:
reward = sum(comb_rewards.values()) reward = sum(comb_rewards.values()) + global_reward_sum
self.print(f"reward is {reward}") self.print(f"reward is {reward}")
return reward, combined_info_dict return reward, combined_info_dict

View File

@ -268,7 +268,7 @@ class DirtFactory(BaseFactory):
if __name__ == '__main__': if __name__ == '__main__':
from environments.utility_classes import AgentRenderOptions as aro from environments.utility_classes import AgentRenderOptions as aro
render = False render = True
dirt_props = DirtProperties( dirt_props = DirtProperties(
initial_dirt_ratio=0.35, initial_dirt_ratio=0.35,
@ -293,11 +293,11 @@ if __name__ == '__main__':
global_timings = [] global_timings = []
for i in range(10): for i in range(10):
factory = DirtFactory(n_agents=2, done_at_collision=False, factory = DirtFactory(n_agents=4, done_at_collision=False,
level_name='rooms', max_steps=1000, level_name='rooms', max_steps=1000,
doors_have_area=False, doors_have_area=False,
obs_prop=obs_props, parse_doors=True, obs_prop=obs_props, parse_doors=True,
verbose=False, verbose=True,
mv_prop=move_props, dirt_prop=dirt_props, mv_prop=move_props, dirt_prop=dirt_props,
# inject_agents=[TSPDirtAgent], # inject_agents=[TSPDirtAgent],
) )
@ -307,6 +307,7 @@ if __name__ == '__main__':
_ = factory.observation_space _ = factory.observation_space
obs_space = factory.observation_space obs_space = factory.observation_space
obs_space_named = factory.named_observation_space obs_space_named = factory.named_observation_space
action_space_named = factory.named_action_space
times = [] times = []
for epoch in range(10): for epoch in range(10):
start_time = time.time() start_time = time.time()

View File

@ -78,12 +78,12 @@ class EnvActions:
class Rewards: class Rewards:
MOVEMENTS_VALID = -0.001 MOVEMENTS_VALID = -0.01
MOVEMENTS_FAIL = -0.001 MOVEMENTS_FAIL = -0.1
NOOP = -0.1 NOOP = -0.01
USE_DOOR_VALID = -0.001 USE_DOOR_VALID = -0.01
USE_DOOR_FAIL = -0.001 USE_DOOR_FAIL = -0.1
COLLISION = -1 COLLISION = -0.5
m = EnvActions m = EnvActions
@ -120,7 +120,7 @@ class ObservationTranslator:
def translate_observation(self, agent_idx: int, obs: np.ndarray): def translate_observation(self, agent_idx: int, obs: np.ndarray):
target_obs_space = self._per_agent_named_obs_space[agent_idx] target_obs_space = self._per_agent_named_obs_space[agent_idx]
translation = [idx_space_dict['explained_idxs'] for name, idx_space_dict in target_obs_space.items()] translation = [idx_space_dict for name, idx_space_dict in target_obs_space.items()]
flat_translation = [x for y in translation for x in y] flat_translation = [x for y in translation for x in y]
return np.take(obs, flat_translation, axis=1 if obs.ndim == 4 else 0) return np.take(obs, flat_translation, axis=1 if obs.ndim == 4 else 0)

View File

@ -22,6 +22,7 @@ if __name__ == '__main__':
record = False record = False
seed = 67 seed = 67
n_agents = 1 n_agents = 1
# out_path = Path('study_out/e_1_new_reward/no_obs/dirt/A2C_new_reward/0_A2C_new_reward')
out_path = Path('study_out/single_run_with_export/dirt') out_path = Path('study_out/single_run_with_export/dirt')
model_path = out_path model_path = out_path
@ -49,7 +50,7 @@ if __name__ == '__main__':
rew, done_bool = 0, False rew, done_bool = 0, False
while not done_bool: while not done_bool:
if n_agents > 1: if n_agents > 1:
actions = [model.predict(env_state[model_idx], deterministic=True)[0] actions = [model.predict(env_state[model_idx], deterministic=determin)[0]
for model_idx, model in enumerate(models)] for model_idx, model in enumerate(models)]
else: else:
actions = models[0].predict(env_state, deterministic=determin)[0] actions = models[0].predict(env_state, deterministic=determin)[0]
@ -58,8 +59,6 @@ if __name__ == '__main__':
rew += step_r rew += step_r
if render: if render:
env.render() env.render()
if not env.unwrapped.unwrapped[c.AGENT][0].temp_valid:
print('Invalid ACtions')
if done_bool: if done_bool:
break break
print(f'Factory run {episode} done, reward is:\n {rew}') print(f'Factory run {episode} done, reward is:\n {rew}')

View File

@ -1,7 +1,6 @@
import sys import sys
from pathlib import Path from pathlib import Path
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import numpy as np
import itertools as it import itertools as it
try: try:
@ -16,8 +15,6 @@ except NameError:
DIR = None DIR = None
pass pass
import time
import simplejson import simplejson
from stable_baselines3.common.vec_env import SubprocVecEnv from stable_baselines3.common.vec_env import SubprocVecEnv
@ -28,14 +25,12 @@ from environments.factory.factory_item import ItemProperties, ItemFactory
from environments.logging.envmonitor import EnvMonitor from environments.logging.envmonitor import EnvMonitor
from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
import pickle import pickle
from plotting.compare_runs import compare_seed_runs, compare_model_runs, compare_all_parameter_runs from plotting.compare_runs import compare_seed_runs, compare_model_runs
import pandas as pd import pandas as pd
import seaborn as sns import seaborn as sns
import multiprocessing as mp import multiprocessing as mp
# mp.set_start_method("spawn")
""" """
In this studie, we want to explore the macro behaviour of multi agents which are trained on the same task, In this studie, we want to explore the macro behaviour of multi agents which are trained on the same task,
but never saw each other in training. but never saw each other in training.
@ -72,10 +67,9 @@ n_agents = 4
ood_monitor_file = f'e_1_{n_agents}_agents' ood_monitor_file = f'e_1_{n_agents}_agents'
baseline_monitor_file = 'e_1_baseline' baseline_monitor_file = 'e_1_baseline'
from stable_baselines3 import A2C
def policy_model_kwargs(): def policy_model_kwargs():
return dict() # gae_lambda=0.25, n_steps=16, max_grad_norm=0.25, use_rms_prop=True) return dict() # gae_lambda=0.25, n_steps=16, max_grad_norm=0.25, use_rms_prop=True)
def dqn_model_kwargs(): def dqn_model_kwargs():
@ -198,7 +192,7 @@ if __name__ == '__main__':
ood_run = True ood_run = True
plotting = True plotting = True
train_steps = 1e7 train_steps = 1e6
n_seeds = 3 n_seeds = 3
frames_to_stack = 3 frames_to_stack = 3
@ -222,7 +216,7 @@ if __name__ == '__main__':
max_spawn_amount=0.1, max_global_amount=20, max_spawn_amount=0.1, max_global_amount=20,
max_local_amount=1, spawn_frequency=0, max_spawn_ratio=0.05, max_local_amount=1, spawn_frequency=0, max_spawn_ratio=0.05,
dirt_smear_amount=0.0, agent_can_interact=True) dirt_smear_amount=0.0, agent_can_interact=True)
item_props = ItemProperties(n_items=10, agent_can_interact=True, item_props = ItemProperties(n_items=10,
spawn_frequency=30, n_drop_off_locations=2, spawn_frequency=30, n_drop_off_locations=2,
max_agent_inventory_capacity=15) max_agent_inventory_capacity=15)
factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True, factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True,

View File

@ -1,6 +1,8 @@
import sys import sys
from pathlib import Path from pathlib import Path
from stable_baselines3.common.vec_env import SubprocVecEnv
try: try:
# noinspection PyUnboundLocalVariable # noinspection PyUnboundLocalVariable
if __package__ is None: if __package__ is None:
@ -44,7 +46,7 @@ def load_model_run_baseline(policy_path, env_to_run):
# Load both agents # Load both agents
model = model_cls.load(policy_path / 'model.zip', device='cpu') model = model_cls.load(policy_path / 'model.zip', device='cpu')
# Load old env kwargs # Load old env kwargs
with next(policy_path.glob('*.json')).open('r') as f: with next(policy_path.glob('*params.json')).open('r') as f:
env_kwargs = simplejson.load(f) env_kwargs = simplejson.load(f)
env_kwargs.update(done_at_collision=True) env_kwargs.update(done_at_collision=True)
# Init Env # Init Env
@ -103,8 +105,8 @@ def load_model_run_combined(root_path, env_to_run, env_kwargs):
if done_bool: if done_bool:
break break
print(f'Factory run {episode} done, reward is:\n {rew}') print(f'Factory run {episode} done, reward is:\n {rew}')
recorded_env_factory.save_run(filepath=policy_path / f'monitor.pick') recorded_env_factory.save_run(filepath=root_path / f'monitor.pick')
recorded_env_factory.save_records(filepath=policy_path / f'recorder.json') recorded_env_factory.save_records(filepath=root_path / f'recorder.json')
if __name__ == '__main__': if __name__ == '__main__':
@ -113,12 +115,15 @@ if __name__ == '__main__':
individual_run = True individual_run = True
combined_run = True combined_run = True
train_steps = 2e6 train_steps = 2e5
frames_to_stack = 3 frames_to_stack = 3
# Define a global studi save path # Define a global studi save path
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}' study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}'
def policy_model_kwargs():
return dict(learning_rate=0.0003, n_steps=10, gamma=0.95, gae_lambda=0.0, ent_coef=0.01, vf_coef=0.5)
# Define Global Env Parameters # Define Global Env Parameters
# Define properties object parameters # Define properties object parameters
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,
@ -138,11 +143,11 @@ if __name__ == '__main__':
max_agent_inventory_capacity=15) max_agent_inventory_capacity=15)
dest_props = DestProperties(n_dests=4, spawn_mode=DestModeOptions.GROUPED, spawn_frequency=1) dest_props = DestProperties(n_dests=4, spawn_mode=DestModeOptions.GROUPED, spawn_frequency=1)
factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True, factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True,
level_name='rooms', doors_have_area=True, level_name='rooms', doors_have_area=False,
verbose=False, verbose=False,
mv_prop=move_props, mv_prop=move_props,
obs_prop=obs_props, obs_prop=obs_props,
done_at_collision=False done_at_collision=True
) )
# Bundle both environments with global kwargs and parameters # Bundle both environments with global kwargs and parameters
@ -172,33 +177,42 @@ if __name__ == '__main__':
continue continue
combination_path.mkdir(parents=True, exist_ok=True) combination_path.mkdir(parents=True, exist_ok=True)
with env_class(**env_kwargs) as env_factory: env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs)
param_path = combination_path / f'env_params.json' for _ in range(6)], start_method="spawn")
param_path = combination_path / f'env_params.json'
try:
env_factory.env_method('save_params', param_path)
except AttributeError:
env_factory.save_params(param_path) env_factory.save_params(param_path)
# EnvMonitor Init # EnvMonitor Init
callbacks = [EnvMonitor(env_factory)] callbacks = [EnvMonitor(env_factory)]
# Model Init # Model Init
model = model_cls("MlpPolicy", env_factory, model = model_cls("MlpPolicy", env_factory, **policy_model_kwargs(),
verbose=1, seed=69, device='cpu') verbose=1, seed=69, device='cpu')
# Model train # Model train
model.learn(total_timesteps=int(train_steps), callback=callbacks) model.learn(total_timesteps=int(train_steps), callback=callbacks)
# Model save # Model save
try:
model.named_action_space = env_factory.unwrapped.named_action_space model.named_action_space = env_factory.unwrapped.named_action_space
model.named_observation_space = env_factory.unwrapped.named_observation_space model.named_observation_space = env_factory.unwrapped.named_observation_space
save_path = combination_path / f'model.zip' except AttributeError:
model.save(save_path) model.named_action_space = env_factory.get_attr("named_action_space")[0]
model.named_observation_space = env_factory.get_attr("named_observation_space")[0]
save_path = combination_path / f'model.zip'
model.save(save_path)
# Monitor Save # Monitor Save
callbacks[0].save_run(combination_path / 'monitor.pick') callbacks[0].save_run(combination_path / 'monitor.pick')
# Better be save then sorry: Clean up! # Better be save then sorry: Clean up!
del env_factory, model del env_factory, model
import gc import gc
gc.collect() gc.collect()
# Train ends here ############################################################ # Train ends here ############################################################
@ -213,7 +227,7 @@ if __name__ == '__main__':
# for policy_path in (y for y in policy_path.iterdir() if y.is_dir()): # for policy_path in (y for y in policy_path.iterdir() if y.is_dir()):
# load_model_run_baseline(policy_path) # load_model_run_baseline(policy_path)
print('Start Individual Training') print('Done Individual Recording')
# Then iterate over every model and monitor "ood behavior" - "is it ood?" # Then iterate over every model and monitor "ood behavior" - "is it ood?"
if combined_run: if combined_run: