mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-06-21 11:21:35 +02:00
added running marl a2c
This commit is contained in:
@ -13,16 +13,18 @@ from algorithms.utils import (
|
||||
add_env_props,
|
||||
load_yaml_file,
|
||||
CombineActionsAgent,
|
||||
AutoResetGymMultiAgent
|
||||
AutoResetGymMultiAgent,
|
||||
access_str,
|
||||
AGENT_PREFIX, REWARD, CUMU_REWARD, OBS, SEP
|
||||
)
|
||||
|
||||
|
||||
class A2CAgent(TAgent):
|
||||
def __init__(self, observation_size, hidden_size, n_actions, agent_id=-1, marl=False):
|
||||
def __init__(self, observation_size, hidden_size, n_actions, agent_id):
|
||||
super().__init__()
|
||||
observation_size = np.prod(observation_size)
|
||||
print(observation_size)
|
||||
self.agent_id = agent_id
|
||||
self.marl = marl
|
||||
self.model = nn.Sequential(
|
||||
nn.Flatten(),
|
||||
nn.Linear(observation_size, hidden_size),
|
||||
@ -36,10 +38,7 @@ class A2CAgent(TAgent):
|
||||
self.critic_head = nn.Linear(hidden_size, 1)
|
||||
|
||||
def get_obs(self, t):
|
||||
observation = self.get(("env/env_obs", t))
|
||||
print(observation.shape)
|
||||
if self.marl:
|
||||
observation = observation[self.agent_id]
|
||||
observation = self.get((f'env/{access_str(self.agent_id, OBS)}', t))
|
||||
return observation
|
||||
|
||||
def forward(self, t, stochastic, **kwargs):
|
||||
@ -52,10 +51,9 @@ class A2CAgent(TAgent):
|
||||
action = torch.distributions.Categorical(probs).sample()
|
||||
else:
|
||||
action = probs.argmax(1)
|
||||
agent_str = f'agent{self.agent_id}_'
|
||||
self.set((f'{agent_str}action', t), action)
|
||||
self.set((f'{agent_str}action_probs', t), probs)
|
||||
self.set((f'{agent_str}critic', t), critic)
|
||||
self.set((f'{access_str(self.agent_id, "action")}', t), action)
|
||||
self.set((f'{access_str(self.agent_id, "action_probs")}', t), probs)
|
||||
self.set((f'{access_str(self.agent_id, "critic")}', t), critic)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
@ -73,13 +71,11 @@ if __name__ == '__main__':
|
||||
env_agent = AutoResetGymMultiAgent(
|
||||
get_class(cfg['env']),
|
||||
get_arguments(cfg['env']),
|
||||
n_envs=1,
|
||||
n_agents=n_agents
|
||||
n_envs=1
|
||||
)
|
||||
|
||||
a2c_agents = [instantiate_class({**cfg['agent'],
|
||||
'agent_id': agent_id,
|
||||
'marl': n_agents > 1})
|
||||
'agent_id': agent_id})
|
||||
for agent_id in range(n_agents)]
|
||||
|
||||
# combine agents
|
||||
@ -105,11 +101,12 @@ if __name__ == '__main__':
|
||||
|
||||
for agent_id in range(n_agents):
|
||||
critic, done, action_probs, reward, action = workspace[
|
||||
f"agent{agent_id}_critic", "env/done",
|
||||
f'agent{agent_id}_action_probs', "env/reward",
|
||||
f"agent{agent_id}_action"
|
||||
access_str(agent_id, 'critic'),
|
||||
"env/done",
|
||||
access_str(agent_id, 'action_probs'),
|
||||
access_str(agent_id, 'reward', 'env/'),
|
||||
access_str(agent_id, 'action')
|
||||
]
|
||||
reward = reward[agent_id]
|
||||
td = gae(critic, reward, done, 0.98, 0.25)
|
||||
td_error = td ** 2
|
||||
critic_loss = td_error.mean()
|
||||
@ -129,13 +126,14 @@ if __name__ == '__main__':
|
||||
optimizer.step()
|
||||
|
||||
# Compute the cumulated reward on final_state
|
||||
creward = workspace["env/cumulated_reward"]#[agent_id].unsqueeze(-1)
|
||||
print(creward.shape, done.shape)
|
||||
creward = creward[done]
|
||||
if creward.size()[0] > 0:
|
||||
cum_r = creward.mean().item()
|
||||
if cum_r > best:
|
||||
# torch.save(a2c_agent.state_dict(), Path(__file__).parent / f'agent_{uid}.pt')
|
||||
best = cum_r
|
||||
pbar.set_description(f"Cum. r: {cum_r:.2f}, Best r. so far: {best:.2f}", refresh=True)
|
||||
rews = ''
|
||||
for agent_i in range(n_agents):
|
||||
creward = workspace['env/'+access_str(agent_i, CUMU_REWARD)]
|
||||
creward = creward[done]
|
||||
if creward.size()[0] > 0:
|
||||
rews += f'{AGENT_PREFIX}{agent_i}: {creward.mean().item():.2f} | '
|
||||
"""if cum_r > best:
|
||||
torch.save(a2c_agent.state_dict(), Path(__file__).parent / f'agent_{uid}.pt')
|
||||
best = cum_r"""
|
||||
pbar.set_description(rews, refresh=True)
|
||||
|
||||
|
Reference in New Issue
Block a user