diff --git a/algorithms/TSP_dirt_agent.py b/algorithms/TSP_dirt_agent.py index ed564bf..c11c2b5 100644 --- a/algorithms/TSP_dirt_agent.py +++ b/algorithms/TSP_dirt_agent.py @@ -5,11 +5,25 @@ from networkx.algorithms.approximation import traveling_salesman as tsp from environments.factory.base.objects import Agent from environments.helpers import points_to_graph from environments import helpers as h -from environments.helpers import Constants as c +from environments.helpers import Constants as BaseConstants +from environments.helpers import EnvActions as BaseActions + + +class Constants(BaseConstants): + DIRT = 'Dirt' + + +class Actions(BaseActions): + CLEAN_UP = 'do_cleanup_action' + + +a = Actions +c = Constants future_planning = 7 + class TSPDirtAgent(Agent): def __init__(self, env, *args, @@ -26,7 +40,7 @@ class TSPDirtAgent(Agent): def predict(self, *_, **__): if self._env[c.DIRT].by_pos(self.pos) is not None: # Translate the action_object to an integer to have the same output as any other model - action = h.EnvActions.CLEAN_UP + action = a.CLEAN_UP elif any('door' in x.name.lower() for x in self.tile.guests): door = next(x for x in self.tile.guests if 'door' in x.name.lower()) if door.is_closed: @@ -37,7 +51,7 @@ class TSPDirtAgent(Agent): else: action = self._predict_move() # Translate the action_object to an integer to have the same output as any other model - action_obj = next(action_i for action_i, action_obj in enumerate(self._env._actions) if action_obj == action) + action_obj = next(action_i for action_name, action_i in self._env.named_action_space.items() if action_name == action) return action_obj def _predict_move(self): diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py index 6270d52..f2ec6e0 100644 --- a/environments/factory/base/base_factory.py +++ b/environments/factory/base/base_factory.py @@ -181,11 +181,11 @@ class BaseFactory(gym.Env): if agents_to_spawn: agents = Agents.from_tiles(floor.empty_tiles[:agents_to_spawn], self._level_shape, **agents_kwargs) else: - agents = Agents(**agents_kwargs) + agents = Agents(self._level_shape, **agents_kwargs) if self._injected_agents: initialized_injections = list() for i, injection in enumerate(self._injected_agents): - agents.register_item(injection(self, floor.empty_tiles[agents_to_spawn+i+1], static_problem=False)) + agents.register_item(injection(self, floor.empty_tiles[0], agents, static_problem=False)) initialized_injections.append(agents[-1]) self._initialized_injections = initialized_injections self._entities.register_additional_items({c.AGENT: agents}) @@ -335,7 +335,12 @@ class BaseFactory(gym.Env): # Generel Observations lvl_obs = self[c.WALLS].as_array() door_obs = self[c.DOORS].as_array() - global_agent_obs = self[c.AGENT].as_array() if self.obs_prop.render_agents != a_obs.NOT else None + if self.obs_prop.render_agents == a_obs.NOT: + global_agent_obs = None + elif self.obs_prop.omit_agent_self and self.n_agents == 1: + global_agent_obs = None + else: + global_agent_obs = self[c.AGENT].as_array().copy() placeholder_obs = self[c.AGENT_PLACEHOLDER].as_array() if self[c.AGENT_PLACEHOLDER] else None add_obs_dict = self._additional_observations() @@ -343,7 +348,7 @@ class BaseFactory(gym.Env): obs_dict = dict() # Build Agent Observations if self.obs_prop.render_agents != a_obs.NOT: - if self.obs_prop.omit_agent_self: + if self.obs_prop.omit_agent_self and self.n_agents >= 2: if self.obs_prop.render_agents == a_obs.SEPERATE: other_agent_obs_idx = [x for x in range(self.n_agents) if x != agent_idx] agent_obs = np.take(global_agent_obs, other_agent_obs_idx, axis=0) @@ -361,11 +366,12 @@ class BaseFactory(gym.Env): lvl_obs += global_agent_obs obs_dict[c.WALLS] = lvl_obs - if self.obs_prop.render_agents in [a_obs.SEPERATE, a_obs.COMBINED]: + if self.obs_prop.render_agents in [a_obs.SEPERATE, a_obs.COMBINED] and agent_obs is not None: obs_dict[c.AGENT] = agent_obs - if self[c.AGENT_PLACEHOLDER]: + if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None: obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs - obs_dict[c.DOORS] = door_obs + if self.parse_doors and door_obs is not None: + obs_dict[c.DOORS] = door_obs obs_dict.update(add_obs_dict) obsn = np.vstack(list(obs_dict.values())) if self.obs_prop.pomdp_r: @@ -381,20 +387,21 @@ class BaseFactory(gym.Env): zip(keys, idxs, list(idxs[1:]) + [idxs[-1]+1, ])} # Shadow Casting - try: - light_block_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items() - if self[key].is_blocking_light] - # Flatten - light_block_obs = [x for y in light_block_obs for x in y] - shadowed_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items() - if self[key].can_be_shadowed] - # Flatten - shadowed_obs = [x for y in shadowed_obs for x in y] - except AttributeError as e: - print('Check your Keys! Only use Constants as Keys!') - print(e) - raise e if self.obs_prop.cast_shadows: + try: + light_block_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items() + if self[key].is_blocking_light] + # Flatten + light_block_obs = [x for y in light_block_obs for x in y] + shadowed_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items() + if self[key].can_be_shadowed] + # Flatten + shadowed_obs = [x for y in shadowed_obs for x in y] + except AttributeError as e: + print('Check your Keys! Only use Constants as Keys!') + print(e) + raise e + obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL door_shadowing = False if self.parse_doors: diff --git a/environments/factory/factory_dirt.py b/environments/factory/factory_dirt.py index f5c8d2a..077b44e 100644 --- a/environments/factory/factory_dirt.py +++ b/environments/factory/factory_dirt.py @@ -6,6 +6,7 @@ import random import numpy as np # from algorithms.TSP_dirt_agent import TSPDirtAgent +from algorithms.TSP_dirt_agent import TSPDirtAgent from environments.helpers import Constants as BaseConstants from environments.helpers import EnvActions as BaseActions from environments.helpers import Rewards as BaseRewards @@ -27,9 +28,9 @@ class Actions(BaseActions): class Rewards(BaseRewards): - CLEAN_UP_VALID = 0.5 - CLEAN_UP_FAIL = -0.1 - CLEAN_UP_LAST_PIECE = 4.5 + CLEAN_UP_VALID = 1 + CLEAN_UP_FAIL = -0.1 + CLEAN_UP_LAST_PIECE = 4 class DirtProperties(NamedTuple): @@ -293,13 +294,13 @@ if __name__ == '__main__': global_timings = [] for i in range(10): - factory = DirtFactory(n_agents=4, done_at_collision=False, + factory = DirtFactory(n_agents=1, done_at_collision=False, level_name='rooms', max_steps=1000, doors_have_area=False, obs_prop=obs_props, parse_doors=True, verbose=True, mv_prop=move_props, dirt_prop=dirt_props, - # inject_agents=[TSPDirtAgent], + inject_agents=[TSPDirtAgent], ) # noinspection DuplicatedCode @@ -317,10 +318,11 @@ if __name__ == '__main__': env_state = factory.reset() if render: factory.render() - # tsp_agent = factory.get_injected_agents()[0] + tsp_agent = factory.get_injected_agents()[0] rwrd = 0 for agent_i_action in random_actions: + agent_i_action = tsp_agent.predict() env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action) rwrd += step_rwrd if render: diff --git a/reload_agent.py b/reload_agent.py index 45b5e88..9a2e7e7 100644 --- a/reload_agent.py +++ b/reload_agent.py @@ -1,14 +1,10 @@ import warnings from pathlib import Path -import numpy as np import yaml -from stable_baselines3 import A2C +from stable_baselines3 import A2C, PPO, DQN -from environments import helpers as h -from environments.helpers import Constants as c from environments.factory.factory_dirt import DirtFactory -from environments.factory.combined_factories import DirtItemFactory from environments.logging.recorder import EnvRecorder warnings.filterwarnings('ignore', category=FutureWarning) @@ -17,7 +13,7 @@ warnings.filterwarnings('ignore', category=UserWarning) if __name__ == '__main__': - determin = True + determin = False render = True record = False seed = 67 @@ -37,7 +33,7 @@ if __name__ == '__main__': this_model = out_path / 'model.zip' - model_cls = A2C # next(val for key, val in h.MODEL_MAP.items() if key in out_path.parent.name) + model_cls = PPO # next(val for key, val in h.MODEL_MAP.items() if key in out_path.parent.name) models = [model_cls.load(this_model)] # Init Env diff --git a/studies/single_run_with_export.py b/studies/single_run_with_export.py index 62a8f04..5d9ff88 100644 --- a/studies/single_run_with_export.py +++ b/studies/single_run_with_export.py @@ -114,6 +114,7 @@ if __name__ == '__main__': train = True individual_run = True combined_run = True + multi_env = False train_steps = 2e5 frames_to_stack = 3 @@ -122,7 +123,7 @@ if __name__ == '__main__': study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}' def policy_model_kwargs(): - return dict(learning_rate=0.0003, n_steps=10, gamma=0.95, gae_lambda=0.0, ent_coef=0.01, vf_coef=0.5) + return dict() # Define Global Env Parameters # Define properties object parameters @@ -142,22 +143,22 @@ if __name__ == '__main__': item_props = ItemProperties(n_items=10, spawn_frequency=30, n_drop_off_locations=2, max_agent_inventory_capacity=15) dest_props = DestProperties(n_dests=4, spawn_mode=DestModeOptions.GROUPED, spawn_frequency=1) - factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True, - level_name='rooms', doors_have_area=False, + factory_kwargs = dict(n_agents=1, max_steps=500, parse_doors=True, + level_name='rooms', doors_have_area=True, verbose=False, mv_prop=move_props, obs_prop=obs_props, - done_at_collision=True + done_at_collision=False ) # Bundle both environments with global kwargs and parameters env_map = {} env_map.update({'dirt': (DirtFactory, dict(dirt_prop=dirt_props, **factory_kwargs.copy()))}) - env_map.update({'item': (ItemFactory, dict(item_prop=item_props, - **factory_kwargs.copy()))}) - env_map.update({'dest': (DestFactory, dict(dest_prop=dest_props, - **factory_kwargs.copy()))}) + # env_map.update({'item': (ItemFactory, dict(item_prop=item_props, + # **factory_kwargs.copy()))}) + # env_map.update({'dest': (DestFactory, dict(dest_prop=dest_props, + # **factory_kwargs.copy()))}) env_map.update({'combined': (DirtDestItemFactory, dict(dest_prop=dest_props, item_prop=item_props, dirt_prop=dirt_props, @@ -168,7 +169,7 @@ if __name__ == '__main__': # Build Major Loop parameters, parameter versions, Env Classes and models if train: for env_key in (env_key for env_key in env_map if 'combined' != env_key): - model_cls = h.MODEL_MAP['A2C'] + model_cls = h.MODEL_MAP['PPO'] combination_path = study_root_path / env_key env_class, env_kwargs = env_map[env_key] @@ -177,8 +178,11 @@ if __name__ == '__main__': continue combination_path.mkdir(parents=True, exist_ok=True) - env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs) - for _ in range(6)], start_method="spawn") + if not multi_env: + env_factory = encapsule_env_factory(env_class, env_kwargs)() + else: + env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs) + for _ in range(6)], start_method="spawn") param_path = combination_path / f'env_params.json' try: