Debugging

This commit is contained in:
Steffen Illium 2022-01-11 13:45:00 +01:00
parent 3150757347
commit b6c8cbd2e3
5 changed files with 70 additions and 47 deletions

View File

@ -5,11 +5,25 @@ from networkx.algorithms.approximation import traveling_salesman as tsp
from environments.factory.base.objects import Agent from environments.factory.base.objects import Agent
from environments.helpers import points_to_graph from environments.helpers import points_to_graph
from environments import helpers as h from environments import helpers as h
from environments.helpers import Constants as c
from environments.helpers import Constants as BaseConstants
from environments.helpers import EnvActions as BaseActions
class Constants(BaseConstants):
DIRT = 'Dirt'
class Actions(BaseActions):
CLEAN_UP = 'do_cleanup_action'
a = Actions
c = Constants
future_planning = 7 future_planning = 7
class TSPDirtAgent(Agent): class TSPDirtAgent(Agent):
def __init__(self, env, *args, def __init__(self, env, *args,
@ -26,7 +40,7 @@ class TSPDirtAgent(Agent):
def predict(self, *_, **__): def predict(self, *_, **__):
if self._env[c.DIRT].by_pos(self.pos) is not None: if self._env[c.DIRT].by_pos(self.pos) is not None:
# Translate the action_object to an integer to have the same output as any other model # Translate the action_object to an integer to have the same output as any other model
action = h.EnvActions.CLEAN_UP action = a.CLEAN_UP
elif any('door' in x.name.lower() for x in self.tile.guests): elif any('door' in x.name.lower() for x in self.tile.guests):
door = next(x for x in self.tile.guests if 'door' in x.name.lower()) door = next(x for x in self.tile.guests if 'door' in x.name.lower())
if door.is_closed: if door.is_closed:
@ -37,7 +51,7 @@ class TSPDirtAgent(Agent):
else: else:
action = self._predict_move() action = self._predict_move()
# Translate the action_object to an integer to have the same output as any other model # Translate the action_object to an integer to have the same output as any other model
action_obj = next(action_i for action_i, action_obj in enumerate(self._env._actions) if action_obj == action) action_obj = next(action_i for action_name, action_i in self._env.named_action_space.items() if action_name == action)
return action_obj return action_obj
def _predict_move(self): def _predict_move(self):

View File

@ -181,11 +181,11 @@ class BaseFactory(gym.Env):
if agents_to_spawn: if agents_to_spawn:
agents = Agents.from_tiles(floor.empty_tiles[:agents_to_spawn], self._level_shape, **agents_kwargs) agents = Agents.from_tiles(floor.empty_tiles[:agents_to_spawn], self._level_shape, **agents_kwargs)
else: else:
agents = Agents(**agents_kwargs) agents = Agents(self._level_shape, **agents_kwargs)
if self._injected_agents: if self._injected_agents:
initialized_injections = list() initialized_injections = list()
for i, injection in enumerate(self._injected_agents): for i, injection in enumerate(self._injected_agents):
agents.register_item(injection(self, floor.empty_tiles[agents_to_spawn+i+1], static_problem=False)) agents.register_item(injection(self, floor.empty_tiles[0], agents, static_problem=False))
initialized_injections.append(agents[-1]) initialized_injections.append(agents[-1])
self._initialized_injections = initialized_injections self._initialized_injections = initialized_injections
self._entities.register_additional_items({c.AGENT: agents}) self._entities.register_additional_items({c.AGENT: agents})
@ -335,7 +335,12 @@ class BaseFactory(gym.Env):
# Generel Observations # Generel Observations
lvl_obs = self[c.WALLS].as_array() lvl_obs = self[c.WALLS].as_array()
door_obs = self[c.DOORS].as_array() door_obs = self[c.DOORS].as_array()
global_agent_obs = self[c.AGENT].as_array() if self.obs_prop.render_agents != a_obs.NOT else None if self.obs_prop.render_agents == a_obs.NOT:
global_agent_obs = None
elif self.obs_prop.omit_agent_self and self.n_agents == 1:
global_agent_obs = None
else:
global_agent_obs = self[c.AGENT].as_array().copy()
placeholder_obs = self[c.AGENT_PLACEHOLDER].as_array() if self[c.AGENT_PLACEHOLDER] else None placeholder_obs = self[c.AGENT_PLACEHOLDER].as_array() if self[c.AGENT_PLACEHOLDER] else None
add_obs_dict = self._additional_observations() add_obs_dict = self._additional_observations()
@ -343,7 +348,7 @@ class BaseFactory(gym.Env):
obs_dict = dict() obs_dict = dict()
# Build Agent Observations # Build Agent Observations
if self.obs_prop.render_agents != a_obs.NOT: if self.obs_prop.render_agents != a_obs.NOT:
if self.obs_prop.omit_agent_self: if self.obs_prop.omit_agent_self and self.n_agents >= 2:
if self.obs_prop.render_agents == a_obs.SEPERATE: if self.obs_prop.render_agents == a_obs.SEPERATE:
other_agent_obs_idx = [x for x in range(self.n_agents) if x != agent_idx] other_agent_obs_idx = [x for x in range(self.n_agents) if x != agent_idx]
agent_obs = np.take(global_agent_obs, other_agent_obs_idx, axis=0) agent_obs = np.take(global_agent_obs, other_agent_obs_idx, axis=0)
@ -361,11 +366,12 @@ class BaseFactory(gym.Env):
lvl_obs += global_agent_obs lvl_obs += global_agent_obs
obs_dict[c.WALLS] = lvl_obs obs_dict[c.WALLS] = lvl_obs
if self.obs_prop.render_agents in [a_obs.SEPERATE, a_obs.COMBINED]: if self.obs_prop.render_agents in [a_obs.SEPERATE, a_obs.COMBINED] and agent_obs is not None:
obs_dict[c.AGENT] = agent_obs obs_dict[c.AGENT] = agent_obs
if self[c.AGENT_PLACEHOLDER]: if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None:
obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs
obs_dict[c.DOORS] = door_obs if self.parse_doors and door_obs is not None:
obs_dict[c.DOORS] = door_obs
obs_dict.update(add_obs_dict) obs_dict.update(add_obs_dict)
obsn = np.vstack(list(obs_dict.values())) obsn = np.vstack(list(obs_dict.values()))
if self.obs_prop.pomdp_r: if self.obs_prop.pomdp_r:
@ -381,20 +387,21 @@ class BaseFactory(gym.Env):
zip(keys, idxs, list(idxs[1:]) + [idxs[-1]+1, ])} zip(keys, idxs, list(idxs[1:]) + [idxs[-1]+1, ])}
# Shadow Casting # Shadow Casting
try:
light_block_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
if self[key].is_blocking_light]
# Flatten
light_block_obs = [x for y in light_block_obs for x in y]
shadowed_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
if self[key].can_be_shadowed]
# Flatten
shadowed_obs = [x for y in shadowed_obs for x in y]
except AttributeError as e:
print('Check your Keys! Only use Constants as Keys!')
print(e)
raise e
if self.obs_prop.cast_shadows: if self.obs_prop.cast_shadows:
try:
light_block_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
if self[key].is_blocking_light]
# Flatten
light_block_obs = [x for y in light_block_obs for x in y]
shadowed_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
if self[key].can_be_shadowed]
# Flatten
shadowed_obs = [x for y in shadowed_obs for x in y]
except AttributeError as e:
print('Check your Keys! Only use Constants as Keys!')
print(e)
raise e
obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL
door_shadowing = False door_shadowing = False
if self.parse_doors: if self.parse_doors:

View File

@ -6,6 +6,7 @@ import random
import numpy as np import numpy as np
# from algorithms.TSP_dirt_agent import TSPDirtAgent # from algorithms.TSP_dirt_agent import TSPDirtAgent
from algorithms.TSP_dirt_agent import TSPDirtAgent
from environments.helpers import Constants as BaseConstants from environments.helpers import Constants as BaseConstants
from environments.helpers import EnvActions as BaseActions from environments.helpers import EnvActions as BaseActions
from environments.helpers import Rewards as BaseRewards from environments.helpers import Rewards as BaseRewards
@ -27,9 +28,9 @@ class Actions(BaseActions):
class Rewards(BaseRewards): class Rewards(BaseRewards):
CLEAN_UP_VALID = 0.5 CLEAN_UP_VALID = 1
CLEAN_UP_FAIL = -0.1 CLEAN_UP_FAIL = -0.1
CLEAN_UP_LAST_PIECE = 4.5 CLEAN_UP_LAST_PIECE = 4
class DirtProperties(NamedTuple): class DirtProperties(NamedTuple):
@ -293,13 +294,13 @@ if __name__ == '__main__':
global_timings = [] global_timings = []
for i in range(10): for i in range(10):
factory = DirtFactory(n_agents=4, done_at_collision=False, factory = DirtFactory(n_agents=1, done_at_collision=False,
level_name='rooms', max_steps=1000, level_name='rooms', max_steps=1000,
doors_have_area=False, doors_have_area=False,
obs_prop=obs_props, parse_doors=True, obs_prop=obs_props, parse_doors=True,
verbose=True, verbose=True,
mv_prop=move_props, dirt_prop=dirt_props, mv_prop=move_props, dirt_prop=dirt_props,
# inject_agents=[TSPDirtAgent], inject_agents=[TSPDirtAgent],
) )
# noinspection DuplicatedCode # noinspection DuplicatedCode
@ -317,10 +318,11 @@ if __name__ == '__main__':
env_state = factory.reset() env_state = factory.reset()
if render: if render:
factory.render() factory.render()
# tsp_agent = factory.get_injected_agents()[0] tsp_agent = factory.get_injected_agents()[0]
rwrd = 0 rwrd = 0
for agent_i_action in random_actions: for agent_i_action in random_actions:
agent_i_action = tsp_agent.predict()
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action) env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
rwrd += step_rwrd rwrd += step_rwrd
if render: if render:

View File

@ -1,14 +1,10 @@
import warnings import warnings
from pathlib import Path from pathlib import Path
import numpy as np
import yaml import yaml
from stable_baselines3 import A2C from stable_baselines3 import A2C, PPO, DQN
from environments import helpers as h
from environments.helpers import Constants as c
from environments.factory.factory_dirt import DirtFactory from environments.factory.factory_dirt import DirtFactory
from environments.factory.combined_factories import DirtItemFactory
from environments.logging.recorder import EnvRecorder from environments.logging.recorder import EnvRecorder
warnings.filterwarnings('ignore', category=FutureWarning) warnings.filterwarnings('ignore', category=FutureWarning)
@ -17,7 +13,7 @@ warnings.filterwarnings('ignore', category=UserWarning)
if __name__ == '__main__': if __name__ == '__main__':
determin = True determin = False
render = True render = True
record = False record = False
seed = 67 seed = 67
@ -37,7 +33,7 @@ if __name__ == '__main__':
this_model = out_path / 'model.zip' this_model = out_path / 'model.zip'
model_cls = A2C # next(val for key, val in h.MODEL_MAP.items() if key in out_path.parent.name) model_cls = PPO # next(val for key, val in h.MODEL_MAP.items() if key in out_path.parent.name)
models = [model_cls.load(this_model)] models = [model_cls.load(this_model)]
# Init Env # Init Env

View File

@ -114,6 +114,7 @@ if __name__ == '__main__':
train = True train = True
individual_run = True individual_run = True
combined_run = True combined_run = True
multi_env = False
train_steps = 2e5 train_steps = 2e5
frames_to_stack = 3 frames_to_stack = 3
@ -122,7 +123,7 @@ if __name__ == '__main__':
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}' study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}'
def policy_model_kwargs(): def policy_model_kwargs():
return dict(learning_rate=0.0003, n_steps=10, gamma=0.95, gae_lambda=0.0, ent_coef=0.01, vf_coef=0.5) return dict()
# Define Global Env Parameters # Define Global Env Parameters
# Define properties object parameters # Define properties object parameters
@ -142,22 +143,22 @@ if __name__ == '__main__':
item_props = ItemProperties(n_items=10, spawn_frequency=30, n_drop_off_locations=2, item_props = ItemProperties(n_items=10, spawn_frequency=30, n_drop_off_locations=2,
max_agent_inventory_capacity=15) max_agent_inventory_capacity=15)
dest_props = DestProperties(n_dests=4, spawn_mode=DestModeOptions.GROUPED, spawn_frequency=1) dest_props = DestProperties(n_dests=4, spawn_mode=DestModeOptions.GROUPED, spawn_frequency=1)
factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True, factory_kwargs = dict(n_agents=1, max_steps=500, parse_doors=True,
level_name='rooms', doors_have_area=False, level_name='rooms', doors_have_area=True,
verbose=False, verbose=False,
mv_prop=move_props, mv_prop=move_props,
obs_prop=obs_props, obs_prop=obs_props,
done_at_collision=True done_at_collision=False
) )
# Bundle both environments with global kwargs and parameters # Bundle both environments with global kwargs and parameters
env_map = {} env_map = {}
env_map.update({'dirt': (DirtFactory, dict(dirt_prop=dirt_props, env_map.update({'dirt': (DirtFactory, dict(dirt_prop=dirt_props,
**factory_kwargs.copy()))}) **factory_kwargs.copy()))})
env_map.update({'item': (ItemFactory, dict(item_prop=item_props, # env_map.update({'item': (ItemFactory, dict(item_prop=item_props,
**factory_kwargs.copy()))}) # **factory_kwargs.copy()))})
env_map.update({'dest': (DestFactory, dict(dest_prop=dest_props, # env_map.update({'dest': (DestFactory, dict(dest_prop=dest_props,
**factory_kwargs.copy()))}) # **factory_kwargs.copy()))})
env_map.update({'combined': (DirtDestItemFactory, dict(dest_prop=dest_props, env_map.update({'combined': (DirtDestItemFactory, dict(dest_prop=dest_props,
item_prop=item_props, item_prop=item_props,
dirt_prop=dirt_props, dirt_prop=dirt_props,
@ -168,7 +169,7 @@ if __name__ == '__main__':
# Build Major Loop parameters, parameter versions, Env Classes and models # Build Major Loop parameters, parameter versions, Env Classes and models
if train: if train:
for env_key in (env_key for env_key in env_map if 'combined' != env_key): for env_key in (env_key for env_key in env_map if 'combined' != env_key):
model_cls = h.MODEL_MAP['A2C'] model_cls = h.MODEL_MAP['PPO']
combination_path = study_root_path / env_key combination_path = study_root_path / env_key
env_class, env_kwargs = env_map[env_key] env_class, env_kwargs = env_map[env_key]
@ -177,8 +178,11 @@ if __name__ == '__main__':
continue continue
combination_path.mkdir(parents=True, exist_ok=True) combination_path.mkdir(parents=True, exist_ok=True)
env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs) if not multi_env:
for _ in range(6)], start_method="spawn") env_factory = encapsule_env_factory(env_class, env_kwargs)()
else:
env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs)
for _ in range(6)], start_method="spawn")
param_path = combination_path / f'env_params.json' param_path = combination_path / f'env_params.json'
try: try: