Merge branch 'main' into unit_testing

2025-07-06 01:21:36 +02:00 · 2023-11-13 11:00:14 +01:00
parent 961b5e36f7 c17da78440
commit f25f90a78b
22 changed files with 205 additions and 114 deletions
--- a/marl_factory_grid/utils/config_parser.py
+++ b/marl_factory_grid/utils/config_parser.py
@ -1,4 +1,5 @@
 import ast
+from collections import defaultdict

 from os import PathLike
 from pathlib import Path
@ -24,13 +25,21 @@ class FactoryConfigParser(object):
        self.config_path = Path(config_path)
        self.custom_modules_path = Path(custom_modules_path) if custom_modules_path is not None else custom_modules_path
        self.config = yaml.safe_load(self.config_path.open())
+        self._n_abbr_dict = None

    def __getattr__(self, item):
        return self['General'][item]

    def _get_sub_list(self, primary_key: str, sub_key: str):
        return [{key: [s for k, v in val.items() if k == sub_key for s in v] for key, val in x.items()
-                 } for x in self.config[primary_key]]
+                 } for x in self.config.get(primary_key, [])]
+
+    def _n_abbr(self, n):
+        assert isinstance(n, int)
+        if self._n_abbr_dict is None:
+            self._n_abbr_dict = defaultdict(lambda: 'th', {1: 'st', 2: 'nd', 3: 'rd'})
+        return self._n_abbr_dict[n]
+

    @property
    def agent_actions(self):
@ -145,11 +154,18 @@ class FactoryConfigParser(object):
            observations.extend(x for x in self.agents[name]['Observations'] if x != c.DEFAULTS)
            positions = [ast.literal_eval(x) for x in self.agents[name].get('Positions', [])]
            other_kwargs = {k: v for k, v in self.agents[name].items() if k not in
-                            ['Actions', 'Observations', 'Positions']}
+                            ['Actions', 'Observations', 'Positions', 'Clones']}
            parsed_agents_conf[name] = dict(
                actions=parsed_actions, observations=observations, positions=positions, other=other_kwargs
                                            )

+            clones = self.agents[name].get('Clones', 0)
+            if clones:
+                if isinstance(clones, int):
+                    clones = [f'{name}_the_{n}{self._n_abbr(n)}' for n in range(clones)]
+                for clone in clones:
+                    parsed_agents_conf[clone] = parsed_agents_conf[name].copy()
+
        return parsed_agents_conf

    def load_env_rules(self) -> List[Rule]:
--- a/marl_factory_grid/utils/logging/envmonitor.py
+++ b/marl_factory_grid/utils/logging/envmonitor.py
@ -58,3 +58,6 @@ class EnvMonitor(Wrapper):
            pickle.dump(self._monitor_df.reset_index(), f, protocol=pickle.HIGHEST_PROTOCOL)
        if auto_plotting_keys:
            plot_single_run(filepath, column_keys=auto_plotting_keys)
+
+    def report_possible_colum_keys(self):
+        print(self._monitor_df.columns)
--- a/marl_factory_grid/utils/observation_builder.py
+++ b/marl_factory_grid/utils/observation_builder.py
@ -24,11 +24,7 @@ class OBSBuilder(object):
            return 0

    def __init__(self, level_shape: np.size, state: Gamestate, pomdp_r: int):
-        self._curr_env_step = None
        self.all_obs = dict()
-        self.light_blockers = defaultdict(lambda: False)
-        self.positional = defaultdict(lambda: False)
-        self.non_positional = defaultdict(lambda: False)
        self.ray_caster = dict()

        self.level_shape = level_shape
@ -37,13 +33,15 @@ class OBSBuilder(object):
        self.size = np.prod(self.obs_shape)

        self.obs_layers = dict()
-
-        self.reset_struc_obs_block(state)
        self.curr_lightmaps = dict()
+
        self._floortiles = defaultdict(list, {pos: [Floor(*pos)] for pos in state.entities.floorlist})

-    def reset_struc_obs_block(self, state):
-        self._curr_env_step = state.curr_step
+        self.reset(state)
+
+    def reset(self, state):
+        # Reset temporary information
+        self.curr_lightmaps = dict()
        # Construct an empty obs (array) for possible placeholders
        self.all_obs[c.PLACEHOLDER] = np.full(self.obs_shape, 0, dtype=float)
        # Fill the all_obs-dict with all available entities
@ -52,7 +50,8 @@ class OBSBuilder(object):

    def observation_space(self, state):
        from gymnasium.spaces import Tuple, Box
-        obsn = self.refresh_and_build_for_all(state)
+        self.reset(state)
+        obsn = self.build_for_all(state)
        if len(state[c.AGENT]) == 1:
            space = Box(low=0, high=1, shape=next(x for x in obsn.values()).shape, dtype=np.float32)
        else:
@ -60,14 +59,13 @@ class OBSBuilder(object):
        return space

    def named_observation_space(self, state):
-        return self.refresh_and_build_for_all(state)
+        self.reset(state)
+        return self.build_for_all(state)

-    def refresh_and_build_for_all(self, state) -> (dict, dict):
-        self.reset_struc_obs_block(state)
+    def build_for_all(self, state) -> (dict, dict):
        return {agent.name: self.build_for_agent(agent, state)[0] for agent in state[c.AGENT]}

-    def refresh_and_build_named_for_all(self, state) -> Dict[str, Dict[str, np.ndarray]]:
-        self.reset_struc_obs_block(state)
+    def build_named_for_all(self, state) -> Dict[str, Dict[str, np.ndarray]]:
        named_obs_dict = {}
        for agent in state[c.AGENT]:
            obs, names = self.build_for_agent(agent, state)
@ -85,9 +83,6 @@ class OBSBuilder(object):
        pass

    def build_for_agent(self, agent, state) -> (List[str], np.ndarray):
-        assert self._curr_env_step == state.curr_step, (
-            "The observation objekt has not been reset this state! Call 'reset_struc_obs_block(state)'"
-        )
        try:
            agent_want_obs = self.obs_layers[agent.name]
        except KeyError:
@ -166,7 +161,8 @@ class OBSBuilder(object):
                            raise ValueError(f'Max(obs.size) for {e.name}:  {obs[idx].size}, but was: {len(v)}.')
        if self.pomdp_r:
            try:
-                light_map = np.zeros(self.obs_shape)
+                light_map = self.curr_lightmaps.get(agent.name, np.zeros(self.obs_shape))
+                light_map[:] = 0.0
                visible_floor = self.ray_caster[agent.name].visible_entities(self._floortiles, reset_cache=False)

                for f in set(visible_floor):
--- a/marl_factory_grid/utils/plotting/plotting_utils.py
+++ b/marl_factory_grid/utils/plotting/plotting_utils.py
@ -49,7 +49,7 @@ def prepare_plt(df, hue, style, hue_order):
    plt.close('all')
    sns.set(rc={'text.usetex': False}, style='whitegrid')
    lineplot = sns.lineplot(data=df, x='Episode', y='Score', hue=hue, style=style,
-                            ci=95, palette=PALETTE, hue_order=hue_order, )
+                            errorbar=('ci', 95), palette=PALETTE, hue_order=hue_order, )
    plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
    plt.tight_layout()
    # lineplot.set_title(f'{sorted(list(df["Measurement"].unique()))}')
--- a/marl_factory_grid/utils/states.py
+++ b/marl_factory_grid/utils/states.py
@ -8,7 +8,7 @@ import numpy as np
 from marl_factory_grid.algorithms.static.utils import points_to_graph
 from marl_factory_grid.environment import constants as c
 from marl_factory_grid.environment.entity.entity import Entity
-from marl_factory_grid.environment.rules import Rule
+from marl_factory_grid.environment.rules import Rule, SpawnAgents
 from marl_factory_grid.utils.results import Result, DoneResult
 from marl_factory_grid.environment.tests import Test
 from marl_factory_grid.utils.results import Result
@ -32,18 +32,19 @@ class StepRules:
        self.rules.append(item)
        return True

-    def do_all_reset(self, state):
-        for rule in self.rules:
-            if rule_reset_printline := rule.on_reset(state):
-                state.print(rule_reset_printline)
-        return c.VALID
-
    def do_all_init(self, state, lvl_map):
        for rule in self.rules:
            if rule_init_printline := rule.on_init(state, lvl_map):
                state.print(rule_init_printline)
        return c.VALID

+    def do_all_reset(self, state):
+        SpawnAgents().on_reset(state)
+        for rule in self.rules:
+            if rule_reset_printline := rule.on_reset(state):
+                state.print(rule_reset_printline)
+        return c.VALID
+
    def tick_step_all(self, state):
        results = list()
        for rule in self.rules:
@ -91,6 +92,10 @@ class Gamestate(object):
        self._floortile_graph = None
        self.tests = StepTests(*tests)

+    def reset(self):
+        self.curr_step = 0
+        self.curr_actions = None
+
    def __getitem__(self, item):
        return self.entities[item]

@ -201,7 +206,7 @@ class Gamestate(object):
                results.extend(on_check_done_result)
        return results

-    def get_all_pos_with_collisions(self) -> List[Tuple[(int, int)]]:
+    def get_collision_positions(self) -> List[Tuple[(int, int)]]:
        """
        Returns a list positions [(x, y), ... ] on which collisions occur. This does not include agents,
        that were unable to move because their target direction was blocked, also a form of collision.