Resolved some warnings and style issues

2026-07-15 23:31:52 +02:00 · 2023-11-10 09:29:54 +01:00
parent a9462a8b6f
commit 6711a0976b
64 changed files with 331 additions and 361 deletions
@@ -36,7 +36,7 @@ class LoopMAPPO(LoopSNAC):
        rewards_ = torch.stack(rewards_, dim=1)
        return rewards_

-    def mappo(self, batch, network, gamma, entropy_coef, vf_coef, clip_range, **kwargs):
+    def mappo(self, batch, network, gamma, entropy_coef, vf_coef, clip_range, **__):
        out = network(batch[nms.OBSERVATION], batch[nms.ACTION], batch[nms.HIDDEN_ACTOR], batch[nms.HIDDEN_CRITIC])
        logits = out[nms.LOGITS][:, :-1]  # last one only needed for v_{t+1}

@@ -45,7 +45,7 @@ class LoopMAPPO(LoopSNAC):

        # monte carlo returns
        mc_returns = self.monte_carlo_returns(batch[nms.REWARD], batch[nms.DONE], gamma)
-        mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) #todo: norm across agent ok?
+        mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8)  # todo: norm across agent ok?
        advantages =  mc_returns - out[nms.CRITIC][:, :-1]

        # policy loss