diff --git a/algorithms/TSP_dirt_agent.py b/algorithms/TSP_dirt_agent.py
index ed564bf..c11c2b5 100644
--- a/algorithms/TSP_dirt_agent.py
+++ b/algorithms/TSP_dirt_agent.py
@@ -5,11 +5,25 @@ from networkx.algorithms.approximation import traveling_salesman as tsp
 from environments.factory.base.objects import Agent
 from environments.helpers import points_to_graph
 from environments import helpers as h
-from environments.helpers import Constants as c
 
+from environments.helpers import Constants as BaseConstants
+from environments.helpers import EnvActions as BaseActions
+
+
+class Constants(BaseConstants):
+    DIRT = 'Dirt'
+
+
+class Actions(BaseActions):
+    CLEAN_UP = 'do_cleanup_action'
+
+
+a = Actions
+c = Constants
 
 future_planning = 7
 
+
 class TSPDirtAgent(Agent):
 
     def __init__(self, env, *args,
@@ -26,7 +40,7 @@ class TSPDirtAgent(Agent):
     def predict(self, *_, **__):
         if self._env[c.DIRT].by_pos(self.pos) is not None:
             # Translate the action_object to an integer to have the same output as any other model
-            action = h.EnvActions.CLEAN_UP
+            action = a.CLEAN_UP
         elif any('door' in x.name.lower() for x in self.tile.guests):
             door = next(x for x in self.tile.guests if 'door' in x.name.lower())
             if door.is_closed:
@@ -37,7 +51,7 @@ class TSPDirtAgent(Agent):
         else:
             action = self._predict_move()
         # Translate the action_object to an integer to have the same output as any other model
-        action_obj = next(action_i for action_i, action_obj in enumerate(self._env._actions) if action_obj == action)
+        action_obj = next(action_i for action_name, action_i in self._env.named_action_space.items() if action_name == action)
         return action_obj
 
     def _predict_move(self):
diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py
index 6270d52..f2ec6e0 100644
--- a/environments/factory/base/base_factory.py
+++ b/environments/factory/base/base_factory.py
@@ -181,11 +181,11 @@ class BaseFactory(gym.Env):
         if agents_to_spawn:
             agents = Agents.from_tiles(floor.empty_tiles[:agents_to_spawn], self._level_shape, **agents_kwargs)
         else:
-            agents = Agents(**agents_kwargs)
+            agents = Agents(self._level_shape, **agents_kwargs)
         if self._injected_agents:
             initialized_injections = list()
             for i, injection in enumerate(self._injected_agents):
-                agents.register_item(injection(self, floor.empty_tiles[agents_to_spawn+i+1], static_problem=False))
+                agents.register_item(injection(self, floor.empty_tiles[0], agents, static_problem=False))
                 initialized_injections.append(agents[-1])
             self._initialized_injections = initialized_injections
         self._entities.register_additional_items({c.AGENT: agents})
@@ -335,7 +335,12 @@ class BaseFactory(gym.Env):
         # Generel Observations
         lvl_obs = self[c.WALLS].as_array()
         door_obs = self[c.DOORS].as_array()
-        global_agent_obs = self[c.AGENT].as_array() if self.obs_prop.render_agents != a_obs.NOT else None
+        if self.obs_prop.render_agents == a_obs.NOT:
+            global_agent_obs = None
+        elif self.obs_prop.omit_agent_self and self.n_agents == 1:
+            global_agent_obs = None
+        else:
+            global_agent_obs = self[c.AGENT].as_array().copy()
         placeholder_obs = self[c.AGENT_PLACEHOLDER].as_array() if self[c.AGENT_PLACEHOLDER] else None
         add_obs_dict = self._additional_observations()
 
@@ -343,7 +348,7 @@ class BaseFactory(gym.Env):
             obs_dict = dict()
             # Build Agent Observations
             if self.obs_prop.render_agents != a_obs.NOT:
-                if self.obs_prop.omit_agent_self:
+                if self.obs_prop.omit_agent_self and self.n_agents >= 2:
                     if self.obs_prop.render_agents == a_obs.SEPERATE:
                         other_agent_obs_idx = [x for x in range(self.n_agents) if x != agent_idx]
                         agent_obs = np.take(global_agent_obs, other_agent_obs_idx, axis=0)
@@ -361,11 +366,12 @@ class BaseFactory(gym.Env):
                 lvl_obs += global_agent_obs
 
             obs_dict[c.WALLS] = lvl_obs
-            if self.obs_prop.render_agents in [a_obs.SEPERATE, a_obs.COMBINED]:
+            if self.obs_prop.render_agents in [a_obs.SEPERATE, a_obs.COMBINED] and agent_obs is not None:
                 obs_dict[c.AGENT] = agent_obs
-            if self[c.AGENT_PLACEHOLDER]:
+            if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None:
                 obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs
-            obs_dict[c.DOORS] = door_obs
+            if self.parse_doors and door_obs is not None:
+                obs_dict[c.DOORS] = door_obs
             obs_dict.update(add_obs_dict)
             obsn = np.vstack(list(obs_dict.values()))
             if self.obs_prop.pomdp_r:
@@ -381,20 +387,21 @@ class BaseFactory(gym.Env):
                                               zip(keys, idxs, list(idxs[1:]) + [idxs[-1]+1, ])}
 
             # Shadow Casting
-            try:
-                light_block_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
-                                   if self[key].is_blocking_light]
-                # Flatten
-                light_block_obs = [x for y in light_block_obs for x in y]
-                shadowed_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
-                                if self[key].can_be_shadowed]
-                # Flatten
-                shadowed_obs = [x for y in shadowed_obs for x in y]
-            except AttributeError as e:
-                print('Check your Keys! Only use Constants as Keys!')
-                print(e)
-                raise e
             if self.obs_prop.cast_shadows:
+                try:
+                    light_block_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
+                                       if self[key].is_blocking_light]
+                    # Flatten
+                    light_block_obs = [x for y in light_block_obs for x in y]
+                    shadowed_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
+                                    if self[key].can_be_shadowed]
+                    # Flatten
+                    shadowed_obs = [x for y in shadowed_obs for x in y]
+                except AttributeError as e:
+                    print('Check your Keys! Only use Constants as Keys!')
+                    print(e)
+                    raise e
+
                 obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL
                 door_shadowing = False
                 if self.parse_doors:
diff --git a/environments/factory/factory_dirt.py b/environments/factory/factory_dirt.py
index f5c8d2a..077b44e 100644
--- a/environments/factory/factory_dirt.py
+++ b/environments/factory/factory_dirt.py
@@ -6,6 +6,7 @@ import random
 import numpy as np
 
 # from algorithms.TSP_dirt_agent import TSPDirtAgent
+from algorithms.TSP_dirt_agent import TSPDirtAgent
 from environments.helpers import Constants as BaseConstants
 from environments.helpers import EnvActions as BaseActions
 from environments.helpers import Rewards as BaseRewards
@@ -27,9 +28,9 @@ class Actions(BaseActions):
 
 
 class Rewards(BaseRewards):
-    CLEAN_UP_VALID          = 0.5
-    CLEAN_UP_FAIL          = -0.1
-    CLEAN_UP_LAST_PIECE     = 4.5
+    CLEAN_UP_VALID          = 1
+    CLEAN_UP_FAIL           = -0.1
+    CLEAN_UP_LAST_PIECE     = 4
 
 
 class DirtProperties(NamedTuple):
@@ -293,13 +294,13 @@ if __name__ == '__main__':
     global_timings = []
     for i in range(10):
 
-        factory = DirtFactory(n_agents=4, done_at_collision=False,
+        factory = DirtFactory(n_agents=1, done_at_collision=False,
                               level_name='rooms', max_steps=1000,
                               doors_have_area=False,
                               obs_prop=obs_props, parse_doors=True,
                               verbose=True,
                               mv_prop=move_props, dirt_prop=dirt_props,
-                              # inject_agents=[TSPDirtAgent],
+                              inject_agents=[TSPDirtAgent],
                               )
 
         # noinspection DuplicatedCode
@@ -317,10 +318,11 @@ if __name__ == '__main__':
             env_state = factory.reset()
             if render:
                 factory.render()
-            # tsp_agent = factory.get_injected_agents()[0]
+            tsp_agent = factory.get_injected_agents()[0]
 
             rwrd = 0
             for agent_i_action in random_actions:
+                agent_i_action = tsp_agent.predict()
                 env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
                 rwrd += step_rwrd
                 if render:
diff --git a/reload_agent.py b/reload_agent.py
index 45b5e88..9a2e7e7 100644
--- a/reload_agent.py
+++ b/reload_agent.py
@@ -1,14 +1,10 @@
 import warnings
 from pathlib import Path
 
-import numpy as np
 import yaml
-from stable_baselines3 import A2C
+from stable_baselines3 import A2C, PPO, DQN
 
-from environments import helpers as h
-from environments.helpers import Constants as c
 from environments.factory.factory_dirt import DirtFactory
-from environments.factory.combined_factories import DirtItemFactory
 from environments.logging.recorder import EnvRecorder
 
 warnings.filterwarnings('ignore', category=FutureWarning)
@@ -17,7 +13,7 @@ warnings.filterwarnings('ignore', category=UserWarning)
 
 if __name__ == '__main__':
 
-    determin = True
+    determin = False
     render = True
     record = False
     seed = 67
@@ -37,7 +33,7 @@ if __name__ == '__main__':
 
     this_model = out_path / 'model.zip'
 
-    model_cls = A2C  # next(val for key, val in h.MODEL_MAP.items() if key in out_path.parent.name)
+    model_cls = PPO  # next(val for key, val in h.MODEL_MAP.items() if key in out_path.parent.name)
     models = [model_cls.load(this_model)]
 
     # Init Env
diff --git a/studies/single_run_with_export.py b/studies/single_run_with_export.py
index 62a8f04..5d9ff88 100644
--- a/studies/single_run_with_export.py
+++ b/studies/single_run_with_export.py
@@ -114,6 +114,7 @@ if __name__ == '__main__':
     train = True
     individual_run = True
     combined_run = True
+    multi_env = False
 
     train_steps = 2e5
     frames_to_stack = 3
@@ -122,7 +123,7 @@ if __name__ == '__main__':
     study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}'
 
     def policy_model_kwargs():
-        return dict(learning_rate=0.0003, n_steps=10, gamma=0.95, gae_lambda=0.0, ent_coef=0.01, vf_coef=0.5)
+        return dict()
 
     # Define Global Env Parameters
     # Define properties object parameters
@@ -142,22 +143,22 @@ if __name__ == '__main__':
     item_props = ItemProperties(n_items=10, spawn_frequency=30, n_drop_off_locations=2,
                                 max_agent_inventory_capacity=15)
     dest_props = DestProperties(n_dests=4, spawn_mode=DestModeOptions.GROUPED, spawn_frequency=1)
-    factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True,
-                          level_name='rooms', doors_have_area=False,
+    factory_kwargs = dict(n_agents=1, max_steps=500, parse_doors=True,
+                          level_name='rooms', doors_have_area=True,
                           verbose=False,
                           mv_prop=move_props,
                           obs_prop=obs_props,
-                          done_at_collision=True
+                          done_at_collision=False
                           )
 
     # Bundle both environments with global kwargs and parameters
     env_map = {}
     env_map.update({'dirt': (DirtFactory, dict(dirt_prop=dirt_props,
                                                **factory_kwargs.copy()))})
-    env_map.update({'item': (ItemFactory, dict(item_prop=item_props,
-                                               **factory_kwargs.copy()))})
-    env_map.update({'dest': (DestFactory, dict(dest_prop=dest_props,
-                                               **factory_kwargs.copy()))})
+    # env_map.update({'item': (ItemFactory, dict(item_prop=item_props,
+    #                                            **factory_kwargs.copy()))})
+    # env_map.update({'dest': (DestFactory, dict(dest_prop=dest_props,
+    #                                           **factory_kwargs.copy()))})
     env_map.update({'combined': (DirtDestItemFactory, dict(dest_prop=dest_props,
                                                            item_prop=item_props,
                                                            dirt_prop=dirt_props,
@@ -168,7 +169,7 @@ if __name__ == '__main__':
     # Build Major Loop  parameters, parameter versions, Env Classes and models
     if train:
         for env_key in (env_key for env_key in env_map if 'combined' != env_key):
-            model_cls = h.MODEL_MAP['A2C']
+            model_cls = h.MODEL_MAP['PPO']
             combination_path = study_root_path / env_key
             env_class, env_kwargs = env_map[env_key]
 
@@ -177,8 +178,11 @@ if __name__ == '__main__':
                 continue
             combination_path.mkdir(parents=True, exist_ok=True)
 
-            env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs)
-                                         for _ in range(6)], start_method="spawn")
+            if not multi_env:
+                env_factory = encapsule_env_factory(env_class, env_kwargs)()
+            else:
+                env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs)
+                                             for _ in range(6)], start_method="spawn")
 
             param_path = combination_path / f'env_params.json'
             try: