firs commit for our new MARL algorithms library, contains working implementations of IAC, SNAC and SEAC

2022-01-21 15:31:07 +01:00
parent 3e19970a60
commit ffc47752a7
24 changed files with 762 additions and 847 deletions
--- a/algorithms/utils.py
+++ b/algorithms/utils.py
@ -3,14 +3,51 @@ import torch
 import numpy as np
 import yaml
 from pathlib import Path
-from salina import instantiate_class
-from salina import TAgent
-from salina.agents.gyma import (
-    AutoResetGymAgent,
-    _torch_type,
-    _format_frame,
-    _torch_cat_dict
-)
+
+
+def load_class(classname):
+    from importlib import import_module
+    module_path, class_name = classname.rsplit(".", 1)
+    module = import_module(module_path)
+    c = getattr(module, class_name)
+    return c
+
+
+def instantiate_class(arguments):
+    from importlib import import_module
+
+    d = dict(arguments)
+    classname = d["classname"]
+    del d["classname"]
+    module_path, class_name = classname.rsplit(".", 1)
+    module = import_module(module_path)
+    c = getattr(module, class_name)
+    return c(**d)
+
+
+def get_class(arguments):
+    from importlib import import_module
+
+    if isinstance(arguments, dict):
+        classname = arguments["classname"]
+        module_path, class_name = classname.rsplit(".", 1)
+        module = import_module(module_path)
+        c = getattr(module, class_name)
+        return c
+    else:
+        classname = arguments.classname
+        module_path, class_name = classname.rsplit(".", 1)
+        module = import_module(module_path)
+        c = getattr(module, class_name)
+        return c
+
+
+def get_arguments(arguments):
+    from importlib import import_module
+    d = dict(arguments)
+    if "classname" in d:
+        del d["classname"]
+    return d


 def load_yaml_file(path: Path):
@ -21,90 +58,29 @@ def load_yaml_file(path: Path):

 def add_env_props(cfg):
    env = instantiate_class(cfg['env'].copy())
-    cfg['agent'].update(dict(observation_size=env.observation_space.shape,
+    cfg['agent'].update(dict(observation_size=list(env.observation_space.shape),
                             n_actions=env.action_space.n))


+class Checkpointer(object):
+    def __init__(self, experiment_name, root, config, total_steps, n_checkpoints):
+        self.path = root / experiment_name
+        self.checkpoint_indices = list(np.linspace(1, total_steps, n_checkpoints, dtype=int) - 1)
+        self.__current_checkpoint = 0
+        self.__current_step = 0
+        self.path.mkdir(exist_ok=True, parents=True)
+        with (self.path / 'config.yaml').open('w') as outfile:
+            yaml.dump(config, outfile, default_flow_style=False)

+    def save_experiment(self, name: str, model):
+        cpt_path = self.path / f'checkpoint_{self.__current_checkpoint}'
+        cpt_path.mkdir(exist_ok=True, parents=True)
+        torch.save(model.state_dict(), cpt_path / f'{name}.pt')

-AGENT_PREFIX = 'agent#'
-REWARD       =  'reward'
-CUMU_REWARD  = 'cumulated_reward'
-OBS          = 'env_obs'
-SEP          = '_'
-ACTION       = 'action'
-
-
-def access_str(agent_i, name, prefix=''):
-    return f'{prefix}{AGENT_PREFIX}{agent_i}{SEP}{name}'
-
-
-class AutoResetGymMultiAgent(AutoResetGymAgent):
-    def __init__(self, *args, **kwargs):
-        super(AutoResetGymMultiAgent, self).__init__(*args, **kwargs)
-
-    def per_agent_values(self, name, values):
-        return {access_str(agent_i, name): value
-                for agent_i, value in zip(range(self.n_agents), values)}
-
-    def _initialize_envs(self, n):
-        super()._initialize_envs(n)
-        n_agents_list = [self.envs[i].unwrapped.n_agents for i in range(n)]
-        assert all(n_agents == n_agents_list[0] for n_agents in n_agents_list), \
-            'All envs must have the same number of agents.'
-        self.n_agents = n_agents_list[0]
-
-    def _reset(self, k, save_render):
-        ret = super()._reset(k, save_render)
-        obs = ret['env_obs'].squeeze()
-        self.cumulated_reward[k] = [0.0]*self.n_agents
-        obs      = self.per_agent_values(OBS,  [_format_frame(obs[i]) for i in range(self.n_agents)])
-        cumu_rew = self.per_agent_values(CUMU_REWARD, torch.zeros(self.n_agents, 1).float().unbind())
-        rewards  = self.per_agent_values(REWARD,      torch.zeros(self.n_agents, 1).float().unbind())
-        ret.update(cumu_rew)
-        ret.update(rewards)
-        ret.update(obs)
-        for remove in ['env_obs', 'cumulated_reward', 'reward']:
-            del ret[remove]
-        return ret
-
-    def _step(self, k, action, save_render):
-        self.timestep[k] += 1
-        env = self.envs[k]
-        if len(action.size()) == 0:
-            action = action.item()
-            assert isinstance(action, int)
-        else:
-            action = np.array(action.tolist())
-        o, r, d, _ = env.step(action)
-        self.cumulated_reward[k] = [x+y for x, y in zip(r, self.cumulated_reward[k])]
-        observation = self.per_agent_values(OBS, [_format_frame(o[i]) for i in range(self.n_agents)])
-        if d:
-            self.is_running[k] = False
-        if save_render:
-            image = env.render(mode="image").unsqueeze(0)
-            observation["rendering"] = image
-        rewards           = self.per_agent_values(REWARD, torch.tensor(r).float().view(-1, 1).unbind())
-        cumulated_rewards = self.per_agent_values(CUMU_REWARD, torch.tensor(self.cumulated_reward[k]).float().view(-1, 1).unbind())
-        ret = {
-            **observation,
-            **rewards,
-            **cumulated_rewards,
-            "done": torch.tensor([d]),
-            "initial_state": torch.tensor([False]),
-            "timestep": torch.tensor([self.timestep[k]])
-        }
-        return _torch_type(ret)
-
-
-class CombineActionsAgent(TAgent):
-    def __init__(self):
-        super().__init__()
-        self.pattern = fr'^{AGENT_PREFIX}\d{SEP}{ACTION}$'
-
-    def forward(self, t, **kwargs):
-        keys = list(self.workspace.keys())
-        action_keys = sorted([k for k in keys if bool(re.match(self.pattern, k))])
-        actions = torch.cat([self.get((k, t)) for k in action_keys], 0)
-        actions = actions if len(action_keys) <= 1 else actions.unsqueeze(0)
-        self.set((f'action', t), actions)
+    def step(self, to_save):
+        if self.__current_step in self.checkpoint_indices:
+            print(f'Checkpointing #{self.__current_checkpoint}')
+            for name, model in to_save:
+                self.save_experiment(name, model)
+            self.__current_checkpoint += 1
+        self.__current_step += 1