major redesign ob observations and entittes

2025-12-11 18:10:38 +01:00 · 2023-06-09 14:04:17 +02:00
parent 901fbcbc32
commit c552c35f66
161 changed files with 4458 additions and 4163 deletions
--- a/algorithms/TSP_dirt_agent.py
+++ b/algorithms/TSP_dirt_agent.py
@@ -1,100 +0,0 @@
-import numpy as np
-
-from networkx.algorithms.approximation import traveling_salesman as tsp
-
-from environments.factory.base.objects import Agent
-from environments.helpers import points_to_graph
-from environments import helpers as h
-
-from environments.helpers import Constants as BaseConstants
-from environments.helpers import EnvActions as BaseActions
-
-
-class Constants(BaseConstants):
-    DIRT = 'DirtPile'
-
-
-class Actions(BaseActions):
-    CLEAN_UP = 'do_cleanup_action'
-
-
-a = Actions
-c = Constants
-
-future_planning = 7
-
-
-class TSPDirtAgent(Agent):
-
-    def __init__(self, env, *args,
-                 static_problem: bool = True, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.static_problem = static_problem
-        self.local_optimization = True
-        self._env = env
-        self._floortile_graph = points_to_graph(self._env[c.FLOOR].positions,
-                                                allow_euclidean_connections=self._env._actions.allow_diagonal_movement,
-                                                allow_manhattan_connections=self._env._actions.allow_square_movement)
-        self._static_route = None
-
-    def predict(self, *_, **__):
-        if self._env[c.DIRT].by_pos(self.pos) is not None:
-            # Translate the action_object to an integer to have the same output as any other model
-            action = a.CLEAN_UP
-        elif any('door' in x.name.lower() for x in self.tile.guests):
-            door = next(x for x in self.tile.guests if 'door' in x.name.lower())
-            if door.is_closed:
-                # Translate the action_object to an integer to have the same output as any other model
-                action = h.EnvActions.USE_DOOR
-            else:
-                action = self._predict_move()
-        else:
-            action = self._predict_move()
-        # Translate the action_object to an integer to have the same output as any other model
-        action_obj = next(action_i for action_name, action_i in self._env.named_action_space.items() if action_name == action)
-        return action_obj
-
-    def _predict_move(self):
-        if len(self._env[c.DIRT]) >= 1:
-            if self.static_problem:
-                if not self._static_route:
-                    self._static_route = self.calculate_tsp_route()
-                else:
-                    pass
-                next_pos = self._static_route.pop(0)
-                while next_pos == self.pos:
-                    next_pos = self._static_route.pop(0)
-            else:
-                if not self._static_route:
-                    self._static_route = self.calculate_tsp_route()[:7]
-                next_pos = self._static_route.pop(0)
-                while next_pos == self.pos:
-                    next_pos = self._static_route.pop(0)
-
-            diff = np.subtract(next_pos, self.pos)
-            # Retrieve action based on the pos dif (like in: What do i have to do to get there?)
-            try:
-                action = next(action for action, pos_diff in h.ACTIONMAP.items()
-                              if (diff == pos_diff).all())
-            except StopIteration:
-                print('This Should not happen!')
-        else:
-            action = int(np.random.randint(self._env.action_space.n))
-        return action
-
-    def calculate_tsp_route(self):
-        if self.local_optimization:
-            nodes = \
-                [self.pos] + \
-                [x for x in self._env[c.DIRT].positions if max(abs(np.subtract(x, self.pos))) < 3]
-            try:
-                while len(nodes) < 7:
-                    nodes += [next(x for x in self._env[c.DIRT].positions if x not in nodes)]
-            except StopIteration:
-                nodes = [self.pos] + self._env[c.DIRT].positions
-
-        else:
-            nodes = [self.pos] + self._env[c.DIRT].positions
-        route = tsp.traveling_salesman_problem(self._floortile_graph,
-                                               nodes=nodes, cycle=True, method=tsp.greedy_tsp)
-        return route
--- a/algorithms/marl/base_ac.py
+++ b/algorithms/marl/base_ac.py
@@ -1,5 +1,5 @@
 import torch
-from typing import Union, List
+from typing import Union, List, Dict
 import numpy as np
 from torch.distributions import Categorical
 from algorithms.marl.memory import MARLActorCriticMemory
@@ -74,7 +74,7 @@ class BaseActorCritic:
        actions = [Categorical(logits=logits).sample().item() for logits in out[nms.LOGITS]]
        return actions

-    def init_hidden(self) -> dict[ListOrTensor]:
+    def init_hidden(self) -> Dict[str, ListOrTensor]:
        pass

    def forward(self,
@@ -82,7 +82,7 @@ class BaseActorCritic:
                actions:       ListOrTensor,
                hidden_actor:  ListOrTensor,
                hidden_critic: ListOrTensor
-                ) -> dict[ListOrTensor]:
+                ) -> Dict[str, ListOrTensor]:
        pass

    @torch.no_grad()
--- a/algorithms/marl/iac.py
+++ b/algorithms/marl/iac.py
@@ -39,7 +39,7 @@ class LoopIAC(BaseActorCritic):
    def forward(self, observations, actions, hidden_actor, hidden_critic):
        outputs = [
            net(
-                self._as_torch(observations[ag_i]).unsqueeze(0).unsqueeze(0),  # agents x time
+                self._as_torch(observations[ag_i]).unsqueeze(0).unsqueeze(0),  # agent x time
                self._as_torch(actions[ag_i]).unsqueeze(0),
                hidden_actor[ag_i],
                hidden_critic[ag_i]
--- a/algorithms/marl/mappo.py
+++ b/algorithms/marl/mappo.py
@@ -46,7 +46,7 @@ class LoopMAPPO(LoopSNAC):

        # monte carlo returns
        mc_returns = self.monte_carlo_returns(batch[nms.REWARD], batch[nms.DONE], gamma)
-        mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) #todo: norm across agents ok?
+        mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) #todo: norm across agent ok?
        advantages =  mc_returns - out[nms.CRITIC][:, :-1]

        # policy loss
--- a/algorithms/marl/memory.py
+++ b/algorithms/marl/memory.py
@@ -120,7 +120,7 @@ class MARLActorCriticMemory(object):

    def __getattr__(self, attr):
        all_attrs = [getattr(mem, attr) for mem in self.memories]
-        return torch.cat(all_attrs, 0)  # agents x time ...
+        return torch.cat(all_attrs, 0)  # agent x time ...

    def chunk_dataloader(self, chunk_len, k):
        datasets = [ExperienceChunks(mem, chunk_len, k) for mem in self.memories]
--- a/algorithms/static/TSP_base_agent.py
+++ b/algorithms/static/TSP_base_agent.py
@@ -0,0 +1,95 @@
+from random import choice
+
+import numpy as np
+
+from networkx.algorithms.approximation import traveling_salesman as tsp
+
+from environment.utils.helpers import points_to_graph
+
+from modules.doors import constants as do
+from environment import constants as c
+from environment.utils.helpers import MOVEMAP
+
+from abc import abstractmethod, ABC
+
+future_planning = 7
+
+
+class TSPBaseAgent(ABC):
+
+    def __init__(self, state, agent_i, static_problem: bool = True):
+        self.static_problem = static_problem
+        self.local_optimization = True
+        self._env = state
+        self.state = self._env.state[c.AGENT][agent_i]
+        self._floortile_graph = points_to_graph(self._env[c.FLOOR].positions)
+        self._static_route = None
+
+    @abstractmethod
+    def predict(self, *_, **__) -> int:
+        return 0
+
+    def _use_door_or_move(self, door, target):
+        if door.is_closed:
+            # Translate the action_object to an integer to have the same output as any other model
+            action = do.ACTION_DOOR_USE
+        else:
+            action = self._predict_move(target)
+        return action
+
+    def calculate_tsp_route(self, target_identifier):
+        positions = [x for x in self._env.state[target_identifier].positions if x != c.VALUE_NO_POS]
+        if self.local_optimization:
+            nodes = \
+                [self.state.pos] + \
+                [x for x in positions if max(abs(np.subtract(x, self.state.pos))) < 3]
+            try:
+                while len(nodes) < 7:
+                    nodes += [next(x for x in positions if x not in nodes)]
+            except StopIteration:
+                nodes = [self.state.pos] + positions
+
+        else:
+            nodes = [self.state.pos] + positions
+        route = tsp.traveling_salesman_problem(self._floortile_graph,
+                                               nodes=nodes, cycle=True, method=tsp.greedy_tsp)
+        return route
+
+    def _door_is_close(self):
+        try:
+            return next(y for x in self.state.tile.neighboring_floor for y in x.guests if do.DOOR in y.name)
+        except StopIteration:
+            return None
+
+    def _has_targets(self, target_identifier):
+        return bool(len([x for x in self._env.state[target_identifier] if x.pos != c.VALUE_NO_POS]) >= 1)
+
+    def _predict_move(self, target_identifier):
+        if self._has_targets(target_identifier):
+            if self.static_problem:
+                if not self._static_route:
+                    self._static_route = self.calculate_tsp_route(target_identifier)
+                else:
+                    pass
+                next_pos = self._static_route.pop(0)
+                while next_pos == self.state.pos:
+                    next_pos = self._static_route.pop(0)
+            else:
+                if not self._static_route:
+                    self._static_route = self.calculate_tsp_route(target_identifier)[:7]
+                next_pos = self._static_route.pop(0)
+                while next_pos == self.state.pos:
+                    next_pos = self._static_route.pop(0)
+
+            diff = np.subtract(next_pos, self.state.pos)
+            # Retrieve action based on the pos dif (like in: What do I have to do to get there?)
+            try:
+                action = next(action for action, pos_diff in MOVEMAP.items() if np.all(diff == pos_diff))
+            except StopIteration:
+                print(f'diff: {diff}')
+                print('This Should not happen!')
+                action = choice(self.state.actions).name
+        else:
+            action = choice(self.state.actions).name
+        # noinspection PyUnboundLocalVariable
+        return action
--- a/algorithms/static/TSP_dirt_agent.py
+++ b/algorithms/static/TSP_dirt_agent.py
@@ -0,0 +1,27 @@
+from algorithms.static.TSP_base_agent import TSPBaseAgent
+
+from modules.clean_up import constants as di
+
+future_planning = 7
+
+
+class TSPDirtAgent(TSPBaseAgent):
+
+    def __init__(self, *args, **kwargs):
+        super(TSPDirtAgent, self).__init__(*args, **kwargs)
+
+    def predict(self, *_, **__):
+        if self._env.state[di.DIRT].by_pos(self.state.pos) is not None:
+            # Translate the action_object to an integer to have the same output as any other model
+            action = di.CLEAN_UP
+        elif door := self._door_is_close():
+            action = self._use_door_or_move(door, di.DIRT)
+        else:
+            action = self._predict_move(di.DIRT)
+        # Translate the action_object to an integer to have the same output as any other model
+        try:
+            action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
+        except (StopIteration, UnboundLocalError):
+            print('Will not happen')
+            raise EnvironmentError
+        return action_obj
--- a/algorithms/static/TSP_item_agent.py
+++ b/algorithms/static/TSP_item_agent.py
@@ -0,0 +1,59 @@
+import numpy as np
+
+from algorithms.static.TSP_base_agent import TSPBaseAgent
+
+from modules.items import constants as i
+
+future_planning = 7
+inventory_size  = 3
+
+MODE_GET        = 'Mode_Get'
+MODE_BRING      = 'Mode_Bring'
+
+
+class TSPItemAgent(TSPBaseAgent):
+
+    def __init__(self, *args, mode=MODE_GET, **kwargs):
+        super(TSPItemAgent, self).__init__(*args, **kwargs)
+        self.mode = mode
+
+    def predict(self, *_, **__):
+        if self._env.state[i.ITEM].by_pos(self.state.pos) is not None:
+            # Translate the action_object to an integer to have the same output as any other model
+            action = i.ITEM_ACTION
+        elif self._env.state[i.DROP_OFF].by_pos(self.state.pos) is not None:
+            # Translate the action_object to an integer to have the same output as any other model
+            action = i.ITEM_ACTION
+        elif door := self._door_is_close():
+            action = self._use_door_or_move(door, i.DROP_OFF if self.mode == MODE_BRING else i.ITEM)
+        else:
+            action = self._choose()
+        # Translate the action_object to an integer to have the same output as any other model
+        try:
+            action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
+        except (StopIteration, UnboundLocalError):
+            print('Will not happen')
+            raise EnvironmentError
+        # noinspection PyUnboundLocalVariable
+        if self.mode == MODE_BRING and len(self._env[i.INVENTORY].by_entity(self.state)):
+            pass
+        elif self.mode == MODE_BRING and not len(self._env[i.INVENTORY].by_entity(self.state)):
+            self.mode = MODE_GET
+        elif self.mode == MODE_GET and len(self._env[i.INVENTORY].by_entity(self.state)) > inventory_size:
+            self.mode = MODE_BRING
+        else:
+            pass
+        return action_obj
+
+    def _choose(self):
+        target = i.DROP_OFF if self.mode == MODE_BRING else i.ITEM
+        if len(self._env.state[i.ITEM]) >= 1:
+            action = self._predict_move(target)
+
+        elif len(self._env[i.INVENTORY].by_entity(self.state)):
+            self.mode = MODE_BRING
+            action = self._predict_move(target)
+        else:
+            action = int(np.random.randint(self._env.action_space.n))
+        # noinspection PyUnboundLocalVariable
+        return action
--- a/algorithms/static/TSP_target_agent.py
+++ b/algorithms/static/TSP_target_agent.py
@@ -0,0 +1,32 @@
+from algorithms.static.TSP_base_agent import TSPBaseAgent
+
+from modules.destinations import constants as d
+from modules.doors import constants as do
+
+future_planning = 7
+
+
+class TSPTargetAgent(TSPBaseAgent):
+
+    def __init__(self, *args, **kwargs):
+        super(TSPTargetAgent, self).__init__(*args, **kwargs)
+
+    def _handle_doors(self):
+
+        try:
+            return next(y for x in self.state.tile.neighboring_floor for y in x.guests if do.DOOR in y.name)
+        except StopIteration:
+            return None
+
+    def predict(self, *_, **__):
+        if door := self._door_is_close():
+            action = self._use_door_or_move(door, d.DESTINATION)
+        else:
+            action = self._predict_move(d.DESTINATION)
+        # Translate the action_object to an integer to have the same output as any other model
+        try:
+            action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
+        except (StopIteration, UnboundLocalError):
+            print('Will not happen')
+        return action_obj
+
--- a/algorithms/static/init.py
+++ b/algorithms/static/init.py
--- a/algorithms/static/random_agent.py
+++ b/algorithms/static/random_agent.py
@@ -0,0 +1,15 @@
+from random import randint
+
+from algorithms.static.TSP_base_agent import TSPBaseAgent
+
+future_planning = 7
+
+
+class TSPRandomAgent(TSPBaseAgent):
+
+    def __init__(self, n_actions, *args, **kwargs):
+        super(TSPRandomAgent, self).__init__(*args, **kwargs)
+        self.n_action = n_actions
+
+    def predict(self, *_, **__):
+        return randint(0, self.n_action - 1)
--- a/algorithms/utils.py
+++ b/algorithms/utils.py
@@ -1,4 +1,3 @@
-import re
 import torch
 import numpy as np
 import yaml