Code Comments, Getting Dirty Env, Naming

2025-12-30 10:18:50 +01:00 · 2021-05-11 10:31:35 +02:00
parent f98f689f5e
commit 7d926c403d
5 changed files with 98 additions and 42 deletions
--- a/environments/factory/base_factory.py
+++ b/environments/factory/base_factory.py
@@ -3,28 +3,35 @@ from pathlib import Path
 from environments import helpers as h


-class BaseFactory(object):
+class BaseFactory:
    LEVELS_DIR = 'levels'
+    _level_idx = 0
+    _agent_start_idx = 1
+    _is_free_cell = 0
+    _is_occupied_cell = 1

    def __init__(self, level='simple', n_agents=1, max_steps=1e3):
        self.n_agents = n_agents
        self.max_steps = max_steps
        self.level = h.one_hot_level(
            h.parse_level(Path(__file__).parent / self.LEVELS_DIR / f'{level}.txt')
-        )#[np.newaxis, ...]
+        )
        self.slice_strings = {0: 'level', **{i: f'agent#{i}' for i in range(1, self.n_agents+1)}}
        self.reset()

    def reset(self):
        self.done = False
        self.steps = 0
-        self.agents = np.zeros((self.n_agents, *self.level.shape), dtype=np.int8)
-        free_cells = np.argwhere(self.level == 0)
-        np.random.shuffle(free_cells)
-        for i in range(self.n_agents):
-            r, c = free_cells[i]
-            self.agents[i, r, c] = 1
-        self.state = np.concatenate((self.level[np.newaxis, ...], self.agents), 0)
+        # Agent placement ...
+        agents = np.zeros((self.n_agents, *self.level.shape), dtype=np.int8)
+        floor_tiles = np.argwhere(self.level == self._is_free_cell)
+        # ... on random positions
+        np.random.shuffle(floor_tiles)
+        for i, (x, y) in enumerate(floor_tiles[:self.n_agents]):
+            agents[i, x, y] = self._is_occupied_cell
+        # state.shape = level, agent 1,..., agent n,
+        self.state = np.concatenate((np.expand_dims(self.level, axis=0), agents), axis=0)
+        # Returns State, Reward, Done, Info
        return self.state, 0, self.done, {}

    def step(self, actions):
@@ -33,21 +40,22 @@ class BaseFactory(object):
            actions = [actions]
        self.steps += 1
        r = 0
-        # level, agent 1,..., agent n,
        collision_vecs = np.zeros((self.n_agents, self.state.shape[0]))  # n_agents x n_slices
        for i, a in enumerate(actions):
-            old_pos, new_pos, valid = h.check_agent_move(state=self.state, dim=i+1, action=a)
-            if valid:
+            old_pos, new_pos, valid = h.check_agent_move(state=self.state, dim=i+self._agent_start_idx, action=a)
+            if valid:  # Does not collide width level boundrys
                self.make_move(i, old_pos, new_pos)
-            else:  # trying to leave the level
-                collision_vecs[i, 0] = 1
-        for i in range(self.n_agents):  # might as well save the positions (redundant)
-            agent_slice = self.state[i+1]
-            x, y = np.argwhere(agent_slice == 1)[0]
-            collisions_vec = self.state[:, x, y].copy()  # otherwise you overwrite the grid/state
-            collisions_vec[i+1] = 0  # no self-collisions
+            else:  # Trying to leave the level
+                collision_vecs[i, self._level_idx] = self._is_occupied_cell  # Collides with level boundrys
+
+        # For each agent check for abitrary collions:
+        for i in range(self.n_agents):  # Note: might as well save the positions (redundant): return value of make_move
+            agent_slice = self.state[i+self._agent_start_idx]
+            x, y = np.argwhere(agent_slice == self._is_occupied_cell)[0]    # current position of agent i
+            collisions_vec = self.state[:, x, y].copy()                     # "vertical fiber" at position of agent i
+            collisions_vec[i+self._agent_start_idx] = self._is_free_cell    # no self-collisions
            collision_vecs[i] += collisions_vec
-        reward, info = self.step_core(np.array(collision_vecs), actions, r)
+        reward, info = self.step_core(collision_vecs, actions, r)
        r += reward
        if self.steps >= self.max_steps:
            self.done = True
@@ -55,14 +63,18 @@ class BaseFactory(object):

    def make_move(self, agent_i, old_pos, new_pos):
        (x, y), (x_new, y_new) = old_pos, new_pos
-        self.state[agent_i+1, x, y] = 0
-        self.state[agent_i+1, x_new, y_new] = 1
+        self.state[agent_i+self._agent_start_idx, x, y] = self._is_free_cell
+        self.state[agent_i+self._agent_start_idx, x_new, y_new] = self._is_occupied_cell
+        return new_pos

-    def free_cells(self):
+    @property
+    def free_cells(self) -> np.ndarray:
        free_cells = self.state.sum(0)
-        free_cells = np.argwhere(free_cells == 0)
+        free_cells = np.argwhere(free_cells == self._is_free_cell)
        np.random.shuffle(free_cells)
        return free_cells

    def step_core(self, collisions_vec, actions, r):
-        return 0, {}
+        # Returns: Reward, Info
+        # Set to "raise NotImplementedError"
+        return 0, {}  # What is returned here?
--- a/environments/factory/simple_factory.py
+++ b/environments/factory/simple_factory.py
@@ -8,8 +8,9 @@ class SimpleFactory(BaseFactory):
        super(SimpleFactory, self).__init__(*args, **kwargs)
        self.slice_strings.update({self.state.shape[0]-1: 'dirt'})

+
    def spawn_dirt(self):
-        free_for_dirt = self.free_cells()
+        free_for_dirt = self.free_cells
        for x, y in free_for_dirt[:self.max_dirt]:  # randomly distribute dirt across the grid
            self.state[-1, x, y] = 1

@@ -27,10 +28,9 @@ class SimpleFactory(BaseFactory):
        return 0, {}


-
 if __name__ == '__main__':
    import random
    factory = SimpleFactory(n_agents=1, max_dirt=8)
    random_actions = [random.randint(0, 8) for _ in range(200)]
    for action in random_actions:
-        state, r, done, _ = factory.step(action)
+        state, r, done, _ = factory.step(action)
--- a/environments/factory/simple_factory_getting_dirty.py
+++ b/environments/factory/simple_factory_getting_dirty.py
@@ -0,0 +1,43 @@
+import numpy as np
+from environments.factory.base_factory import BaseFactory
+from collections import namedtuple
+
+
+DirtProperties = namedtuple('DirtProperties', ['clean_amount', 'max_spawn_ratio', 'gain_amount'])
+
+
+class GettingDirty(BaseFactory):
+
+    _dirt_indx = -1
+
+    def __init__(self, *args, dirt_properties, **kwargs):
+        super(GettingDirty, self).__init__(*args, **kwargs)
+        self._dirt_properties = dirt_properties
+        self.slice_strings.update({self.state.shape[0]-1: 'dirt'})
+
+    def spawn_dirt(self):
+        free_for_dirt = self.free_cells
+        for x, y in free_for_dirt[:self._max_dirt_spawn_ratio * free_for_dirt.]:  # randomly distribute dirt across the grid
+            self.state[self._dirt_indx, x, y] += 0.1
+
+    def reset(self):
+        # ToDo: When self.reset returns the new states and stuff, use it here!
+        super().reset()  # state, agents, ... =
+        dirt_slice = np.zeros((1, *self.state.shape[1:]))
+        self.state = np.concatenate((self.state, dirt_slice))  # dirt is now the last slice
+        self.spawn_dirt()
+
+    def step_core(self, collisions_vecs, actions, r):
+        for agent_i, cols in enumerate(collisions_vecs):
+            cols = np.argwhere(cols != 0).flatten()
+            print(f't = {self.steps}\tAgent {agent_i} has collisions with '
+                  f'{[self.slice_strings[entity] for entity in cols]}')
+        return 0, {}
+
+
+if __name__ == '__main__':
+    import random
+    factory = GettingDirty(n_agents=1, max_dirt=8)
+    random_actions = [random.randint(0, 8) for _ in range(200)]
+    for action in random_actions:
+        state, r, done, _ = factory.step(action)
--- a/environments/helpers.py
+++ b/environments/helpers.py
@@ -29,26 +29,28 @@ def check_agent_move(state, dim, action):
    x, y = agent_pos[0]
    x_new, y_new = x, y
    # Actions
-    if action == 0: # North
+    if action == 0:  # North
        x_new -= 1
-    elif action == 1: # East
+    elif action == 1:  # East
        y_new += 1
-    elif action == 2: # South
+    elif action == 2:  # South
        x_new += 1
-    elif action == 3: # West
+    elif action == 3:  # West
        y_new -= 1
-    elif action == 4: # NE
+    elif action == 4:  # NE
        x_new -= 1
        y_new += 1
-    elif action == 5: # SE
+    elif action == 5:  # SE
        x_new += 1
        y_new += 1
-    elif action == 6: # SW
+    elif action == 6:  # SW
        x_new += 1
        y_new -= 1
-    elif action == 7: # NW
+    elif action == 7:  # NW
        x_new -= 1
        y_new -= 1
+    else:
+        pass
    # Check validity
    valid = not (
            x_new < 0 or y_new < 0
@@ -58,10 +60,7 @@ def check_agent_move(state, dim, action):
    return (x, y), (x_new, y_new), valid


-
-
-
 if __name__ == '__main__':
-    x = parse_level(Path(__file__).parent / 'factory' / 'levels' / 'simple.txt')
-    y = one_hot_level(x)
-    print(np.argwhere(y == 0))
+    parsed_level = parse_level(Path(__file__).parent / 'factory' / 'levels' / 'simple.txt')
+    y = one_hot_level(parsed_level)
+    print(np.argwhere(y == 0))