Agents not smear Dirt

2021-07-13 17:12:01 +02:00
parent 01e7b752b8
commit 4841336e31
6 changed files with 47 additions and 31 deletions
--- a/environments/factory/base/base_factory.py
+++ b/environments/factory/base/base_factory.py
@ -10,7 +10,7 @@ from gym.wrappers import FrameStack
 from environments.helpers import Constants as c, Constants
 from environments import helpers as h
-from environments.factory.base.objects import Slice, Agent, Tile, Action, MoveableEntity
+from environments.factory.base.objects import Slice, Agent, Tile, Action
 from environments.factory.base.registers import StateSlices, Actions, Entities, Agents, Doors, FloorTiles
 from environments.utility_classes import MovementProperties
@ -85,9 +85,6 @@ class BaseFactory(gym.Env):
                 movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
                 combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False,
                 omit_agent_slice_in_obs=False, done_at_collision=False, **kwargs):
        assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
               (not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
               'Both options are exclusive'
        assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
        # Attribute Assignment
@ -125,7 +122,7 @@ class BaseFactory(gym.Env):
        # Doors
        parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
-        doors = [Slice(c.DOORS.value, parsed_doors)] if parsed_doors.any() and self.parse_doors else []
+        doors = [Slice(c.DOORS.name, parsed_doors)] if parsed_doors.any() and self.parse_doors else []
        # Agents
        agents = []
@ -283,15 +280,17 @@ class BaseFactory(gym.Env):
            obs = self._padded_obs_cube[:, x0:x1, y0:y1]
        else:
            obs = self._obs_cube
-        if self.omit_agent_slice_in_obs:
+
-            obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val]]
+        if self.combin_agent_slices_in_obs and self.n_agents >= 1:
-            return obs_new
+            agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name and
                                    (not self.omit_agent_slice_in_obs and slice.name != agent.name)]],
                               axis=0, keepdims=True)
            obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
            return obs
        else:
-            if self.combin_agent_slices_in_obs:
+            if self.omit_agent_slice_in_obs:
-                agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name]],
+                obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val.name]]
-                                   axis=0, keepdims=True)
+                return obs_new
                obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
                return obs
            else:
                return obs
--- a/environments/factory/base/objects.py
+++ b/environments/factory/base/objects.py
@ -196,7 +196,7 @@ class Door(Entity):
    def encoding(self):
        return 1 if self.is_closed else -1
-    def __init__(self, *args, context, closed_on_init=True, auto_close_interval=500):
+    def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10):
        super(Door, self).__init__(*args)
        self._state = c.IS_CLOSED_DOOR
        self.auto_close_interval = auto_close_interval
--- a/environments/factory/renderer.py
+++ b/environments/factory/renderer.py
@ -39,7 +39,7 @@ class Renderer:
        now = time.time()
        self.font = pygame.font.Font(None, 20)
-        self.font.set_bold(1.0)
+        self.font.set_bold(1)
        print('Loading System font with pygame.font.Font took', time.time() - now)
    def fill_bg(self):
--- a/environments/factory/simple_factory.py
+++ b/environments/factory/simple_factory.py
@ -3,9 +3,8 @@ import random
 import numpy as np
 from environments import helpers as h
 from environments.helpers import Constants as c
 from environments import helpers as h
 from environments.factory.base.base_factory import BaseFactory
 from environments.factory.base.objects import Agent, Action, Object, Slice
 from environments.factory.base.registers import Entities
@ -18,12 +17,13 @@ CLEAN_UP_ACTION = 'clean_up'
 class DirtProperties(NamedTuple):
-    clean_amount: int = 2            # How much does the robot clean with one actions.
+    clean_amount: int = 1               # How much does the robot clean with one actions.
-    max_spawn_ratio: float = 0.2       # On max how much tiles does the dirt spawn in percent.
+    max_spawn_ratio: float = 0.2        # On max how much tiles does the dirt spawn in percent.
-    gain_amount: float = 0.5           # How much dirt does spawn per tile
+    gain_amount: float = 0.3            # How much dirt does spawn per tile
-    spawn_frequency: int = 5         # Spawn Frequency in Steps
+    spawn_frequency: int = 5            # Spawn Frequency in Steps
-    max_local_amount: int = 1        # Max dirt amount per tile.
+    max_local_amount: int = 2           # Max dirt amount per tile.
-    max_global_amount: int = 20      # Max dirt amount in the whole environment.
+    max_global_amount: int = 20         # Max dirt amount in the whole environment.
    dirt_smear_amount: float = 0.2      # Agents smear dirt, when not cleaning up in place
 # noinspection PyAttributeOutsideInit
@ -116,6 +116,17 @@ class SimpleFactory(BaseFactory):
            return False
    def post_step(self) -> dict:
        if smear_amount := self.dirt_properties.dirt_smear_amount:
            dirt_slice = self._slices.by_name(DIRT).slice
            for agent in self._agents:
                if agent.temp_valid and agent.last_pos != h.NO_POS:
                    if dirt := dirt_slice[agent.last_pos]:
                        if smeared_dirt := round(dirt * smear_amount, 2):
                            dirt_slice[agent.last_pos] = max(0, dirt_slice[agent.last_pos]-smeared_dirt)
                            dirt_slice[agent.pos] = min((self.dirt_properties.max_local_amount,
                                                         dirt_slice[agent.pos] + smeared_dirt)
                                                        )
        if not self._next_dirt_spawn:
            self.spawn_dirt()
            self._next_dirt_spawn = self.dirt_properties.spawn_frequency
@ -170,6 +181,7 @@ class SimpleFactory(BaseFactory):
                    reward -= 0.01
                    self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
                    info_dict.update({f'{agent.name}_failed_action': 1})
                    info_dict.update({f'{agent.name}_failed_action': 1})
                    info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
            elif self._actions.is_moving_action(agent.temp_action):
@ -210,11 +222,11 @@ class SimpleFactory(BaseFactory):
 if __name__ == '__main__':
-    render = False
+    render = True
    move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True)
-    dirt_props = DirtProperties()
+    dirt_props = DirtProperties(dirt_smear_amount=0.2)
-    factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=10,
+    factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=1,
                            combin_agent_slices_in_obs=False, level_name='rooms', parse_doors=True,
                            pomdp_radius=3)
--- a/main.py
+++ b/main.py
@ -92,8 +92,9 @@ if __name__ == '__main__':
    from algorithms.reg_dqn import RegDQN
    # from sb3_contrib import QRDQN
-    dirt_props = DirtProperties(clean_amount=3, gain_amount=1, max_global_amount=30,
+    dirt_props = DirtProperties(clean_amount=1, gain_amount=0.3, max_global_amount=20,
-                                max_local_amount=5, spawn_frequency=3, max_spawn_ratio=0.05)
+                                max_local_amount=2, spawn_frequency=3, max_spawn_ratio=0.05,
                                dirt_smear_amount=0.2)
    move_props = MovementProperties(allow_diagonal_movement=True,
                                    allow_square_movement=True,
                                    allow_no_op=False)
@ -106,7 +107,7 @@ if __name__ == '__main__':
            with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=False,
                               movement_properties=move_props, level_name='rooms', frames_to_stack=4,
-                               omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True, record_episodes=False
+                               omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False
                               ) as env:
                if modeL_type.__name__ in ["PPO", "A2C"]:
--- a/reload_agent.py
+++ b/reload_agent.py
@ -14,13 +14,17 @@ warnings.filterwarnings('ignore', category=UserWarning)
 if __name__ == '__main__':
-    model_name = 'PPO_1626075586'
+    model_name = 'A2C_1626103200'
    run_id = 0
    out_path = Path(__file__).parent / 'debug_out'
    model_path = out_path / model_name
    with (model_path / f'env_{model_name}.yaml').open('r') as f:
        env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
    env_kwargs.update(dirt_properties=DirtProperties(clean_amount=1, gain_amount=0.3, max_global_amount=20,
                                                     max_local_amount=2, spawn_frequency=5, max_spawn_ratio=0.05,
                                                     dirt_smear_amount=0.2),
                      combin_agent_slices_in_obs=True, omit_agent_slice_in_obs=True)
    with SimpleFactory(**env_kwargs) as env:
        # Edit THIS:
@ -28,5 +32,5 @@ if __name__ == '__main__':
        this_model = model_files[0]
        model = PPO.load(this_model)
-        evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=False, render=True)
+        evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=True, render=True)
        print(evaluation_result)