From b09c461754c287bf2c1b986d2824aa4a615d1bb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Robert=20M=C3=BCller?= <robert.mueller1990@googlemail.com>
Date: Fri, 28 Jan 2022 11:07:25 +0100
Subject: [PATCH] added first working MAPPO implementation

---
 algorithms/marl/__init__.py    |  8 ++--
 algorithms/marl/base_ac.py     | 41 +++++++++++++-----
 algorithms/marl/iac.py         |  1 -
 algorithms/marl/mappo.py       | 78 ++++++++++++++++++++++++++++++++++
 algorithms/marl/memory.py      | 31 +++++++++++++-
 algorithms/marl/networks.py    | 27 +++++++++---
 algorithms/marl/seac.py        |  4 +-
 requirements.txt               |  6 ++-
 studies/normalization_study.py |  9 ++--
 studies/playground_file.py     | 36 ++++++----------
 studies/viz_policy.py          | 14 +++---
 11 files changed, 194 insertions(+), 61 deletions(-)
 create mode 100644 algorithms/marl/mappo.py

diff --git a/algorithms/marl/__init__.py b/algorithms/marl/__init__.py
index f1c46d1..7c39bb0 100644
--- a/algorithms/marl/__init__.py
+++ b/algorithms/marl/__init__.py
@@ -1,4 +1,6 @@
 from algorithms.marl.base_ac import BaseActorCritic
-from algorithms.marl.iac import LoopIAC
-from algorithms.marl.snac import LoopSNAC
-from algorithms.marl.seac import LoopSEAC
\ No newline at end of file
+from algorithms.marl.iac     import LoopIAC
+from algorithms.marl.snac    import LoopSNAC
+from algorithms.marl.seac    import LoopSEAC
+from algorithms.marl.mappo   import LoopMAPPO
+from algorithms.marl.memory  import MARLActorCriticMemory
\ No newline at end of file
diff --git a/algorithms/marl/base_ac.py b/algorithms/marl/base_ac.py
index e2d2fe9..99d6591 100644
--- a/algorithms/marl/base_ac.py
+++ b/algorithms/marl/base_ac.py
@@ -1,5 +1,6 @@
 import torch
 from typing import Union, List
+import copy
 import numpy as np
 from torch.distributions import Categorical
 from algorithms.marl.memory import MARLActorCriticMemory
@@ -59,7 +60,7 @@ class BaseActorCritic:
                 actions:       ListOrTensor,
                 hidden_actor:  ListOrTensor,
                 hidden_critic: ListOrTensor
-                ):
+                ) -> dict[ListOrTensor]:
         pass
 
 
@@ -67,8 +68,9 @@ class BaseActorCritic:
     def train_loop(self, checkpointer=None):
         env = instantiate_class(self.cfg['env'])
         n_steps, max_steps = [self.cfg['algorithm'][k] for k in ['n_steps', 'max_steps']]
-        global_steps = 0
+        global_steps, episode,  df_results = 0, 0, []
         reward_queue = deque(maxlen=2000)
+        memory_queue = deque(maxlen=self.cfg['algorithm'].get('keep_n_segments', 1))
         while global_steps < max_steps:
             tm = MARLActorCriticMemory(self.n_agents)
             obs = env.reset()
@@ -85,7 +87,8 @@ class BaseActorCritic:
                 next_obs = next_obs
                 if isinstance(done, bool): done = [done] * self.n_agents
 
-                tm.add(observation=obs, action=action, reward=reward, done=done)
+                tm.add(observation=obs, action=action, reward=reward, done=done,
+                       logits=out.get('logits', None), values=out.get('critic', None))
                 obs = next_obs
                 last_action = action
                 last_hiddens = dict(hidden_actor=out.get('hidden_actor', None),
@@ -94,9 +97,11 @@ class BaseActorCritic:
 
                 if len(tm) >= n_steps or all(done):
                     tm.add(observation=next_obs)
+                    memory_queue.append(copy.deepcopy(tm))
                     if self.__training:
                         with torch.inference_mode(False):
-                            self.learn(tm)
+                            tm_ = tm if memory_queue.maxlen <= 1 else list(memory_queue)
+                            self.learn(tm_)
                     tm.reset()
                     tm.add(action=last_action, **last_hiddens)
                 global_steps += 1
@@ -110,7 +115,13 @@ class BaseActorCritic:
                     ])
 
                 if global_steps >= max_steps: break
-            print(f'reward at step: {global_steps} = {rew_log}')
+            print(f'reward at step: {episode} = {rew_log}')
+            episode += 1
+            df_results.append([global_steps, rew_log])
+        df_results = pd.DataFrame(df_results, columns=['steps', 'reward'])
+        if checkpointer is not None:
+            df_results.to_csv(checkpointer.path / 'results.csv', index=False)
+        return df_results
 
     @torch.inference_mode(True)
     def eval_loop(self, n_episodes, render=False):
@@ -143,10 +154,21 @@ class BaseActorCritic:
         return results
 
     @staticmethod
-    def compute_advantages(critic, reward, done, gamma):
-        return (reward + gamma * (1.0 - done) * critic[:, 1:].detach()) - critic[:, :-1]
+    def compute_advantages(critic, reward, done, gamma, gae_coef=0.0):
+        tds = (reward + gamma * (1.0 - done) * critic[:, 1:].detach()) - critic[:, :-1]
 
-    def actor_critic(self, tm, network, gamma, entropy_coef, vf_coef, **kwargs):
+        if gae_coef <= 0:
+            return tds
+
+        gae = torch.zeros_like(tds[:, -1])
+        gaes = []
+        for t in range(tds.shape[1]-1, -1, -1):
+            gae = tds[:, t] + gamma * gae_coef * (1.0 - done[:, t]) * gae
+            gaes.insert(0, gae)
+        gaes = torch.stack(gaes, dim=1)
+        return gaes
+
+    def actor_critic(self, tm, network, gamma, entropy_coef, vf_coef, gae_coef=0.0, **kwargs):
         obs, actions, done, reward = tm.observation, tm.action, tm.done, tm.reward
 
         out = network(obs, actions, tm.hidden_actor, tm.hidden_critic)
@@ -154,7 +176,7 @@ class BaseActorCritic:
         critic = out['critic']
 
         entropy_loss = Categorical(logits=logits).entropy().mean(-1)
-        advantages = self.compute_advantages(critic, reward, done, gamma)
+        advantages = self.compute_advantages(critic, reward, done, gamma, gae_coef)
         value_loss = advantages.pow(2).mean(-1)  # n_agent
 
         # policy loss
@@ -163,7 +185,6 @@ class BaseActorCritic:
         a2c_loss = -(advantages.detach() * log_ap).mean(-1)
         # weighted loss
         loss = a2c_loss + vf_coef*value_loss - entropy_coef * entropy_loss
-
         return loss.mean()
 
     def learn(self, tm: MARLActorCriticMemory, **kwargs):
diff --git a/algorithms/marl/iac.py b/algorithms/marl/iac.py
index a04668f..7d0c640 100644
--- a/algorithms/marl/iac.py
+++ b/algorithms/marl/iac.py
@@ -21,7 +21,6 @@ class LoopIAC(BaseActorCritic):
 
     def load_state_dict(self, path: Path):
         paths = natsorted(list(path.glob('*.pt')))
-        print(list(paths))
         for path, net in zip(paths, self.net):
             net.load_state_dict(torch.load(path))
 
diff --git a/algorithms/marl/mappo.py b/algorithms/marl/mappo.py
new file mode 100644
index 0000000..6719c47
--- /dev/null
+++ b/algorithms/marl/mappo.py
@@ -0,0 +1,78 @@
+from algorithms.marl import LoopSNAC
+from algorithms.marl.memory import MARLActorCriticMemory
+from typing import List
+import random
+import torch
+from torch.distributions import Categorical
+
+
+class LoopMAPPO(LoopSNAC):
+    def __init__(self, *args, **kwargs):
+        super(LoopMAPPO, self).__init__(*args, **kwargs)
+
+    def build_batch(self, tm: List[MARLActorCriticMemory]):
+        sample = random.choices(tm, k=self.cfg['algorithm']['batch_size']-1)
+        sample.append(tm[-1]) # always use latest segment in batch
+
+        obs           = torch.cat([s.observation   for s in sample], 0)
+        actions       = torch.cat([s.action        for s in sample], 0)
+        hidden_actor  = torch.cat([s.hidden_actor  for s in sample], 0)
+        hidden_critic = torch.cat([s.hidden_critic for s in sample], 0)
+        logits        = torch.cat([s.logits        for s in sample], 0)
+        values        = torch.cat([s.values        for s in sample], 0)
+        reward = torch.cat([s.reward for s in sample], 0)
+        done = torch.cat([s.done for s in sample], 0)
+
+
+        log_props = torch.log_softmax(logits, -1)
+        log_props = torch.gather(log_props, index=actions[:, 1:].unsqueeze(-1), dim=-1).squeeze()
+
+        return obs, actions, hidden_actor, hidden_critic, log_props, values, reward, done
+
+    def learn(self, tm: List[MARLActorCriticMemory], **kwargs):
+        if len(tm) >= self.cfg['algorithm']['keep_n_segments']:
+            # only learn when buffer is full
+            for batch_i in range(self.cfg['algorithm']['n_updates']):
+                loss = self.actor_critic(tm, self.net,  **self.cfg['algorithm'], **kwargs)
+                self.optimizer.zero_grad()
+                loss.backward()
+                torch.nn.utils.clip_grad_norm_(self.net.parameters(), 0.5)
+                self.optimizer.step()
+
+    def monte_carlo_returns(self, rewards, done, gamma):
+        rewards_ = []
+        discounted_reward = torch.zeros_like(rewards[:, -1])
+        for t in range(rewards.shape[1]-1, -1, -1):
+            discounted_reward = rewards[:, t] + (gamma * (1.0 - done[:, t]) * discounted_reward)
+            rewards_.insert(0, discounted_reward)
+        rewards_ = torch.stack(rewards_, dim=1)
+        return rewards_
+
+    def actor_critic(self, tm, network, gamma, entropy_coef, vf_coef, clip_range, gae_coef=0.0, **kwargs):
+        obs, actions, hidden_actor, hidden_critic, old_log_probs, old_critic, reward, done = self.build_batch(tm)
+
+        out = network(obs, actions, hidden_actor, hidden_critic)
+        logits = out['logits'][:, :-1]  # last one only needed for v_{t+1}
+        critic = out['critic']
+
+        # monte carlo returns
+        mc_returns = self.monte_carlo_returns(reward, done, gamma)
+        # monte_carlo_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-7) todo: norm across agents?
+        advantages =  mc_returns - critic[:, :-1]
+
+        # policy loss
+        log_ap = torch.log_softmax(logits, -1)
+        log_ap = torch.gather(log_ap, dim=-1, index=actions[:, 1:].unsqueeze(-1)).squeeze()
+        ratio = (log_ap - old_log_probs).exp()
+        surr1 = ratio * advantages.detach()
+        surr2 = torch.clamp(ratio, 1 - clip_range, 1 + clip_range) * advantages.detach()
+        policy_loss = -torch.min(surr1, surr2).mean(-1)
+
+        # entropy & value loss
+        entropy_loss = Categorical(logits=logits).entropy().mean(-1)
+        value_loss = advantages.pow(2).mean(-1)  # n_agent
+
+        # weighted loss
+        loss = policy_loss + vf_coef*value_loss - entropy_coef * entropy_loss
+
+        return loss.mean()
diff --git a/algorithms/marl/memory.py b/algorithms/marl/memory.py
index 2b15e07..71dcad7 100644
--- a/algorithms/marl/memory.py
+++ b/algorithms/marl/memory.py
@@ -13,14 +13,16 @@ class ActorCriticMemory(object):
         self.__actions = []
         self.__rewards = []
         self.__dones   = []
-        self.__hiddens_actor = []
+        self.__hiddens_actor  = []
         self.__hiddens_critic = []
+        self.__logits         = []
+        self.__values         = []
 
     def __len__(self):
         return len(self.__states)
 
     @property
-    def observation(self):
+    def observation(self):  # add time dimension through stacking
         return torch.stack(self.__states, 0).unsqueeze(0)      # 1 x timesteps x hidden dim
 
     @property
@@ -47,6 +49,14 @@ class ActorCriticMemory(object):
     def done(self):
         return torch.tensor(self.__dones).float().unsqueeze(0)  # 1 x timesteps
 
+    @property
+    def logits(self):  # assumes a trailing 1 for time dimension - common when using output from NN
+        return torch.cat(self.__logits, 0).unsqueeze(0)  # 1 x timesteps x actions
+
+    @property
+    def values(self):
+        return torch.cat(self.__values, 0).unsqueeze(0)  # 1 x timesteps x actions
+
     def add_observation(self, state:  Union[Tensor, np.ndarray]):
         self.__states.append(state    if isinstance(state, Tensor) else torch.from_numpy(state))
 
@@ -69,6 +79,12 @@ class ActorCriticMemory(object):
     def add_done(self, done:   bool):
         self.__dones.append(done)
 
+    def add_logits(self, logits: Tensor):
+        self.__logits.append(logits)
+
+    def add_values(self, logits: Tensor):
+        self.__values.append(logits)
+
     def add(self, **kwargs):
         for k, v in kwargs.items():
             func = getattr(ActorCriticMemory, f'add_{k}')
@@ -129,3 +145,14 @@ class MARLActorCriticMemory(object):
         all_hc = [mem.hidden_critic for mem in self.memories]
         return torch.cat(all_hc, 0)  # agents  x layers x timesteps x hidden dim
 
+    @property
+    def logits(self):
+        all_lgts = [mem.logits for mem in self.memories]
+        return torch.cat(all_lgts, 0)  # agents  x layers x timesteps x hidden dim
+
+    @property
+    def values(self):
+        all_vals = [mem.values for mem in self.memories]
+        return torch.cat(all_vals, 0)  # agents  x layers x timesteps x hidden dim
+
+
diff --git a/algorithms/marl/networks.py b/algorithms/marl/networks.py
index a60f9c4..c4fdb72 100644
--- a/algorithms/marl/networks.py
+++ b/algorithms/marl/networks.py
@@ -12,6 +12,7 @@ class RecurrentAC(nn.Module):
         super(RecurrentAC, self).__init__()
         observation_size = np.prod(observation_size)
         self.n_layers = 1
+        self.n_actions = n_actions
         self.use_agent_embedding = use_agent_embedding
         self.hidden_size_actor = hidden_size_actor
         self.hidden_size_critic = hidden_size_critic
@@ -25,13 +26,14 @@ class RecurrentAC(nn.Module):
                                  nn.Tanh(),
                                  nn.Linear(obs_emb_size, obs_emb_size)
                                  )
-        self.gru_actor   = nn.GRU(obs_emb_size, hidden_size_actor, batch_first=True, num_layers=self.n_layers)
+        self.gru_actor   = nn.GRU(obs_emb_size, hidden_size_actor,  batch_first=True, num_layers=self.n_layers)
         self.gru_critic  = nn.GRU(obs_emb_size, hidden_size_critic, batch_first=True, num_layers=self.n_layers)
         self.action_head = nn.Sequential(
-            spectral_norm(nn.Linear(hidden_size_actor, hidden_size_actor)),
+            nn.Linear(hidden_size_actor, hidden_size_actor),
             nn.Tanh(),
             nn.Linear(hidden_size_actor, n_actions)
         )
+        #            spectral_norm(nn.Linear(hidden_size_actor, hidden_size_actor)),
         self.critic_head = nn.Sequential(
             nn.Linear(hidden_size_critic, hidden_size_critic),
             nn.Tanh(),
@@ -50,12 +52,14 @@ class RecurrentAC(nn.Module):
         n_agents, t, *_ = observations.shape
         obs_emb    = self.obs_proj(observations.view(n_agents, t, -1).float())
         action_emb = self.action_emb(actions+1)  # shift by one due to padding idx
-        agent_emb  = self.agent_emb(
-            torch.cat([torch.arange(0, n_agents, 1).view(-1, 1)]*t, 1)
-        )
-        x_t        = torch.cat((obs_emb, action_emb), -1) \
-            if not self.use_agent_embedding else torch.cat((obs_emb, agent_emb, action_emb), -1)
 
+        if not self.use_agent_embedding:
+            x_t = torch.cat((obs_emb, action_emb), -1)
+        else:
+            agent_emb = self.agent_emb(
+                torch.cat([torch.arange(0, n_agents, 1).view(-1, 1)] * t, 1)
+            )
+            x_t = torch.cat((obs_emb, agent_emb, action_emb), -1)
 
         mixed_x_t   = self.mix(x_t)
         output_p, _ = self.gru_actor(input=mixed_x_t,  hx=hidden_actor.swapaxes(1, 0))
@@ -66,6 +70,15 @@ class RecurrentAC(nn.Module):
         return dict(logits=logits, critic=critic, hidden_actor=output_p, hidden_critic=output_c)
 
 
+class RecurrentACL2(RecurrentAC):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.action_head = nn.Sequential(
+            nn.Linear(self.hidden_size_actor, self.hidden_size_actor),
+            nn.Tanh(),
+            NormalizedLinear(self.hidden_size_actor, self.n_actions, trainable_magnitude=True)
+        )
+
 
 class NormalizedLinear(nn.Linear):
     def __init__(self, in_features: int, out_features: int,
diff --git a/algorithms/marl/seac.py b/algorithms/marl/seac.py
index 5ed9a24..5b33b0a 100644
--- a/algorithms/marl/seac.py
+++ b/algorithms/marl/seac.py
@@ -8,7 +8,7 @@ class LoopSEAC(LoopIAC):
     def __init__(self, cfg):
         super(LoopSEAC, self).__init__(cfg)
 
-    def actor_critic(self, tm, networks, gamma, entropy_coef, vf_coef, **kwargs):
+    def actor_critic(self, tm, networks, gamma, entropy_coef, vf_coef, gae_coef=0.0, **kwargs):
         obs, actions, done, reward = tm.observation, tm.action, tm.done, tm.reward
         outputs = [net(obs, actions, tm.hidden_actor, tm.hidden_critic) for net in networks]
 
@@ -26,7 +26,7 @@ class LoopSEAC(LoopIAC):
             critic = out['critic']
 
             entropy_loss = Categorical(logits=logits[ag_i]).entropy().mean()
-            advantages = self.compute_advantages(critic, reward, done, gamma)
+            advantages = self.compute_advantages(critic, reward, done, gamma, gae_coef)
 
             # policy loss
             log_ap = torch.log_softmax(logits, -1)
diff --git a/requirements.txt b/requirements.txt
index 24d52cf..fe4eb9a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,12 +1,14 @@
 numpy
 scipy
 tqdm
+pandas
 seaborn>=0.11.1
-matplotlib>=3.4.1
+matplotlib>=3.3.4
 stable-baselines3>=1.0
 pygame>=2.1.0
 gym>=0.18.0
 networkx>=2.6.3
 simplejson>=3.17.5
 PyYAML>=6.0
-einops
\ No newline at end of file
+einops
+natsort
\ No newline at end of file
diff --git a/studies/normalization_study.py b/studies/normalization_study.py
index ccfb67c..e8e4d14 100644
--- a/studies/normalization_study.py
+++ b/studies/normalization_study.py
@@ -1,14 +1,13 @@
 from algorithms.utils import Checkpointer
 from pathlib import Path
 from algorithms.utils import load_yaml_file, add_env_props, instantiate_class, load_class
-from algorithms.marl import LoopSNAC, LoopIAC, LoopSEAC
+#from algorithms.marl import LoopSNAC, LoopIAC, LoopSEAC
 
 
-#study_root = Path(__file__).parent / 'curious_study'
-study_root = Path('/Users/romue/PycharmProjects/EDYS/algorithms/marl')
 
 for i in range(0, 5):
-    for name in ['example_config']:
+    for name in ['mappo']:#['seac', 'iac', 'snac']:
+        study_root = Path(__file__).parent / name
         cfg = load_yaml_file(study_root / f'{name}.yaml')
         add_env_props(cfg)
 
@@ -17,7 +16,7 @@ for i in range(0, 5):
         max_steps = cfg['algorithm']['max_steps']
         n_steps = cfg['algorithm']['n_steps']
 
-        checkpointer = Checkpointer(f'{name}#{i}', study_root, cfg, max_steps, 250)
+        checkpointer = Checkpointer(f'{name}#{i}', study_root, cfg, max_steps, 50)
 
         loop = load_class(cfg['method'])(cfg)
         df = loop.train_loop(checkpointer)
diff --git a/studies/playground_file.py b/studies/playground_file.py
index e32f60a..b58c82b 100644
--- a/studies/playground_file.py
+++ b/studies/playground_file.py
@@ -1,32 +1,22 @@
-import numpy as np
 import pandas as pd
 from pathlib import Path
 import matplotlib.pyplot as plt
 import seaborn as sns
 
-study_root = Path(__file__).parent / 'entropy_study'
-names_all = ['basic_gru', 'layernorm_gru', 'spectralnorm_gru', 'nonorm_gru']
-names_only_1 = ['L2OnlyAh_gru', 'L2OnlyChAh_gru', 'L2OnlyMix_gru', 'basic_gru']
-names_only_2 = ['L2NoCh_gru', 'L2NoAh_gru', 'nomix_gru', 'basic_gru']
 
-names = names_only_2
-#names = ['nonorm_gru']
-# /Users/romue/PycharmProjects/EDYS/studies/normalization_study/basic_gru#3
-csvs = []
-for name in ['basic_gru', 'nonorm_gru', 'spectralnorm_gru']:
-    for run in range(0, 1):
+dfs = []
+for name in ['l2snac', 'iac', 'snac', 'seac']:
+    for c in range(5):
         try:
-            df = pd.read_csv(study_root / f'{name}#{run}' / 'results.csv')
-            df = df[df.agent == 'sum']
-            df = df.groupby(['checkpoint', 'run']).mean().reset_index()
-            df['method'] = name
-            df['run_'] = run
-
-            df.reward = df.reward.rolling(15).mean()
-            csvs.append(df)
+            study_root = Path(__file__).parent / name / f'{name}#{c}'
+            df = pd.read_csv(study_root / 'results.csv', index_col=False)
+            df.reward = df.reward.rolling(100).mean()
+            df['method'] = name.upper()
+            dfs.append(df)
         except Exception as e:
-            print(f'skipped {run}\t {name}')
+            pass
 
-csvs = pd.concat(csvs).rename(columns={"checkpoint": "steps*2e3", "B": "c"})
-sns.lineplot(data=csvs, x='steps*2e3', y='reward', hue='method', palette='husl', ci='sd', linewidth=1.8)
-plt.savefig('entropy.png')
\ No newline at end of file
+df = pd.concat(dfs).reset_index()
+sns.lineplot(data=df, x='episode', y='reward', hue='method', palette='husl', ci='sd', linewidth=1.5)
+plt.savefig('study.png')
+print('saved image')
\ No newline at end of file
diff --git a/studies/viz_policy.py b/studies/viz_policy.py
index b8ffd78..a3efd3e 100644
--- a/studies/viz_policy.py
+++ b/studies/viz_policy.py
@@ -3,19 +3,21 @@ from algorithms.marl import LoopSNAC, LoopIAC, LoopSEAC
 from pathlib import Path
 from algorithms.utils import load_yaml_file
 from tqdm import trange
-study = 'curious_study'
-study_root = Path(__file__).parent / study
+study = 'example_config#0'
+#study_root = Path(__file__).parent / study
+study_root = Path('/Users/romue/PycharmProjects/EDYS/algorithms/marl/')
 
 #['L2NoAh_gru', 'L2NoCh_gru', 'nomix_gru']:
 render = True
 eval_eps = 3
 for run in range(0, 5):
-    for name in ['basic_gru']:#['L2OnlyAh_gru', 'L2OnlyChAh_gru', 'L2OnlyMix_gru']: #['layernorm_gru', 'basic_gru', 'nonorm_gru', 'spectralnorm_gru']:
-        cfg = load_yaml_file(Path(__file__).parent / study / f'{name}.yaml')
-        p_root = Path(study_root / f'{name}#{run}')
+    for name in ['example_config']:#['L2OnlyAh_gru', 'L2OnlyChAh_gru', 'L2OnlyMix_gru']: #['layernorm_gru', 'basic_gru', 'nonorm_gru', 'spectralnorm_gru']:
+        cfg = load_yaml_file(study_root / study / 'config.yaml')
+        #p_root = Path(study_root / study / f'{name}#{run}')
         dfs = []
         for i in trange(500):
-            path = p_root / f'checkpoint_{i}'
+            path = study_root / study / f'checkpoint_{161}'
+            print(path)
 
             snac = LoopSEAC(cfg)
             snac.load_state_dict(path)