mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-06-21 11:21:35 +02:00
firs commit for our new MARL algorithms library, contains working implementations of IAC, SNAC and SEAC
This commit is contained in:
24
studies/normalization_study.py
Normal file
24
studies/normalization_study.py
Normal file
@ -0,0 +1,24 @@
|
||||
from algorithms.utils import Checkpointer
|
||||
from pathlib import Path
|
||||
from algorithms.utils import load_yaml_file, add_env_props, instantiate_class, load_class
|
||||
from algorithms.marl import LoopSNAC, LoopIAC, LoopSEAC
|
||||
|
||||
|
||||
#study_root = Path(__file__).parent / 'curious_study'
|
||||
study_root = Path('/Users/romue/PycharmProjects/EDYS/algorithms/marl')
|
||||
|
||||
for i in range(0, 5):
|
||||
for name in ['example_config']:
|
||||
cfg = load_yaml_file(study_root / f'{name}.yaml')
|
||||
add_env_props(cfg)
|
||||
|
||||
env = instantiate_class(cfg['env'])
|
||||
net = instantiate_class(cfg['agent'])
|
||||
max_steps = cfg['algorithm']['max_steps']
|
||||
n_steps = cfg['algorithm']['n_steps']
|
||||
|
||||
checkpointer = Checkpointer(f'{name}#{i}', study_root, cfg, max_steps, 250)
|
||||
|
||||
loop = load_class(cfg['method'])(cfg)
|
||||
df = loop.train_loop(checkpointer)
|
||||
|
32
studies/playground_file.py
Normal file
32
studies/playground_file.py
Normal file
@ -0,0 +1,32 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
|
||||
study_root = Path(__file__).parent / 'entropy_study'
|
||||
names_all = ['basic_gru', 'layernorm_gru', 'spectralnorm_gru', 'nonorm_gru']
|
||||
names_only_1 = ['L2OnlyAh_gru', 'L2OnlyChAh_gru', 'L2OnlyMix_gru', 'basic_gru']
|
||||
names_only_2 = ['L2NoCh_gru', 'L2NoAh_gru', 'nomix_gru', 'basic_gru']
|
||||
|
||||
names = names_only_2
|
||||
#names = ['nonorm_gru']
|
||||
# /Users/romue/PycharmProjects/EDYS/studies/normalization_study/basic_gru#3
|
||||
csvs = []
|
||||
for name in ['basic_gru', 'nonorm_gru', 'spectralnorm_gru']:
|
||||
for run in range(0, 1):
|
||||
try:
|
||||
df = pd.read_csv(study_root / f'{name}#{run}' / 'results.csv')
|
||||
df = df[df.agent == 'sum']
|
||||
df = df.groupby(['checkpoint', 'run']).mean().reset_index()
|
||||
df['method'] = name
|
||||
df['run_'] = run
|
||||
|
||||
df.reward = df.reward.rolling(15).mean()
|
||||
csvs.append(df)
|
||||
except Exception as e:
|
||||
print(f'skipped {run}\t {name}')
|
||||
|
||||
csvs = pd.concat(csvs).rename(columns={"checkpoint": "steps*2e3", "B": "c"})
|
||||
sns.lineplot(data=csvs, x='steps*2e3', y='reward', hue='method', palette='husl', ci='sd', linewidth=1.8)
|
||||
plt.savefig('entropy.png')
|
@ -1,139 +0,0 @@
|
||||
from salina.agents.gyma import AutoResetGymAgent
|
||||
from salina.agents import Agents, TemporalAgent
|
||||
from salina.rl.functional import _index, gae
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.distributions import Categorical
|
||||
from salina import TAgent, Workspace, get_arguments, get_class, instantiate_class
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import time
|
||||
from algorithms.utils import (
|
||||
add_env_props,
|
||||
load_yaml_file,
|
||||
CombineActionsAgent,
|
||||
AutoResetGymMultiAgent,
|
||||
access_str,
|
||||
AGENT_PREFIX, REWARD, CUMU_REWARD, OBS, SEP
|
||||
)
|
||||
|
||||
|
||||
class A2CAgent(TAgent):
|
||||
def __init__(self, observation_size, hidden_size, n_actions, agent_id):
|
||||
super().__init__()
|
||||
observation_size = np.prod(observation_size)
|
||||
print(observation_size)
|
||||
self.agent_id = agent_id
|
||||
self.model = nn.Sequential(
|
||||
nn.Flatten(),
|
||||
nn.Linear(observation_size, hidden_size),
|
||||
nn.ELU(),
|
||||
nn.Linear(hidden_size, hidden_size),
|
||||
nn.ELU(),
|
||||
nn.Linear(hidden_size, hidden_size),
|
||||
nn.ELU()
|
||||
)
|
||||
self.action_head = nn.Linear(hidden_size, n_actions)
|
||||
self.critic_head = nn.Linear(hidden_size, 1)
|
||||
|
||||
def get_obs(self, t):
|
||||
observation = self.get((f'env/{access_str(self.agent_id, OBS)}', t))
|
||||
return observation
|
||||
|
||||
def forward(self, t, stochastic, **kwargs):
|
||||
observation = self.get_obs(t)
|
||||
features = self.model(observation)
|
||||
scores = self.action_head(features)
|
||||
probs = torch.softmax(scores, dim=-1)
|
||||
critic = self.critic_head(features).squeeze(-1)
|
||||
if stochastic:
|
||||
action = torch.distributions.Categorical(probs).sample()
|
||||
else:
|
||||
action = probs.argmax(1)
|
||||
self.set((f'{access_str(self.agent_id, "action")}', t), action)
|
||||
self.set((f'{access_str(self.agent_id, "action_probs")}', t), probs)
|
||||
self.set((f'{access_str(self.agent_id, "critic")}', t), critic)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Setup workspace
|
||||
uid = time.time()
|
||||
workspace = Workspace()
|
||||
n_agents = 2
|
||||
|
||||
# load config
|
||||
cfg = load_yaml_file(Path(__file__).parent / 'sat_mad.yaml')
|
||||
add_env_props(cfg)
|
||||
cfg['env'].update({'n_agents': n_agents})
|
||||
|
||||
# instantiate agent and env
|
||||
env_agent = AutoResetGymMultiAgent(
|
||||
get_class(cfg['env']),
|
||||
get_arguments(cfg['env']),
|
||||
n_envs=1
|
||||
)
|
||||
|
||||
a2c_agents = [instantiate_class({**cfg['agent'],
|
||||
'agent_id': agent_id})
|
||||
for agent_id in range(n_agents)]
|
||||
|
||||
# combine agents
|
||||
acquisition_agent = TemporalAgent(Agents(env_agent, *a2c_agents, CombineActionsAgent()))
|
||||
acquisition_agent.seed(69)
|
||||
|
||||
# optimizers & other parameters
|
||||
cfg_optim = cfg['algorithm']['optimizer']
|
||||
optimizers = [get_class(cfg_optim)(a2c_agent.parameters(), **get_arguments(cfg_optim))
|
||||
for a2c_agent in a2c_agents]
|
||||
n_timesteps = cfg['algorithm']['n_timesteps']
|
||||
|
||||
# Decision making loop
|
||||
best = -float('inf')
|
||||
with tqdm(range(int(cfg['algorithm']['max_epochs'] / n_timesteps))) as pbar:
|
||||
for epoch in pbar:
|
||||
workspace.zero_grad()
|
||||
if epoch > 0:
|
||||
workspace.copy_n_last_steps(1)
|
||||
acquisition_agent(workspace, t=1, n_steps=n_timesteps-1, stochastic=True)
|
||||
else:
|
||||
acquisition_agent(workspace, t=0, n_steps=n_timesteps, stochastic=True)
|
||||
|
||||
for agent_id in range(n_agents):
|
||||
critic, done, action_probs, reward, action = workspace[
|
||||
access_str(agent_id, 'critic'),
|
||||
"env/done",
|
||||
access_str(agent_id, 'action_probs'),
|
||||
access_str(agent_id, 'reward', 'env/'),
|
||||
access_str(agent_id, 'action')
|
||||
]
|
||||
td = gae(critic, reward, done, 0.98, 0.25)
|
||||
td_error = td ** 2
|
||||
critic_loss = td_error.mean()
|
||||
entropy_loss = Categorical(action_probs).entropy().mean()
|
||||
action_logp = _index(action_probs, action).log()
|
||||
a2c_loss = action_logp[:-1] * td.detach()
|
||||
a2c_loss = a2c_loss.mean()
|
||||
loss = (
|
||||
-0.001 * entropy_loss
|
||||
+ 1.0 * critic_loss
|
||||
- 0.1 * a2c_loss
|
||||
)
|
||||
optimizer = optimizers[agent_id]
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
#torch.nn.utils.clip_grad_norm_(a2c_agents[agent_id].parameters(), .5)
|
||||
optimizer.step()
|
||||
|
||||
# Compute the cumulated reward on final_state
|
||||
rews = ''
|
||||
for agent_i in range(n_agents):
|
||||
creward = workspace['env/'+access_str(agent_i, CUMU_REWARD)]
|
||||
creward = creward[done]
|
||||
if creward.size()[0] > 0:
|
||||
rews += f'{AGENT_PREFIX}{agent_i}: {creward.mean().item():.2f} | '
|
||||
"""if cum_r > best:
|
||||
torch.save(a2c_agent.state_dict(), Path(__file__).parent / f'agent_{uid}.pt')
|
||||
best = cum_r"""
|
||||
pbar.set_description(rews, refresh=True)
|
||||
|
@ -1,27 +0,0 @@
|
||||
agent:
|
||||
classname: studies.sat_mad.A2CAgent
|
||||
observation_size: 4*5*5
|
||||
hidden_size: 128
|
||||
n_actions: 10
|
||||
|
||||
env:
|
||||
classname: environments.factory.make
|
||||
env_name: "DirtyFactory-v0"
|
||||
n_agents: 1
|
||||
pomdp_r: 2
|
||||
max_steps: 400
|
||||
stack_n_frames: 3
|
||||
individual_rewards: True
|
||||
|
||||
algorithm:
|
||||
max_epochs: 1000000
|
||||
n_envs: 1
|
||||
n_timesteps: 10
|
||||
discount_factor: 0.99
|
||||
entropy_coef: 0.01
|
||||
critic_coef: 1.0
|
||||
gae: 0.25
|
||||
optimizer:
|
||||
classname: torch.optim.Adam
|
||||
lr: 0.0003
|
||||
weight_decay: 0.0
|
34
studies/viz_policy.py
Normal file
34
studies/viz_policy.py
Normal file
@ -0,0 +1,34 @@
|
||||
import pandas as pd
|
||||
from algorithms.marl import LoopSNAC, LoopIAC, LoopSEAC
|
||||
from pathlib import Path
|
||||
from algorithms.utils import load_yaml_file
|
||||
from tqdm import trange
|
||||
study = 'curious_study'
|
||||
study_root = Path(__file__).parent / study
|
||||
|
||||
#['L2NoAh_gru', 'L2NoCh_gru', 'nomix_gru']:
|
||||
render = True
|
||||
eval_eps = 3
|
||||
for run in range(0, 5):
|
||||
for name in ['basic_gru']:#['L2OnlyAh_gru', 'L2OnlyChAh_gru', 'L2OnlyMix_gru']: #['layernorm_gru', 'basic_gru', 'nonorm_gru', 'spectralnorm_gru']:
|
||||
cfg = load_yaml_file(Path(__file__).parent / study / f'{name}.yaml')
|
||||
p_root = Path(study_root / f'{name}#{run}')
|
||||
dfs = []
|
||||
for i in trange(500):
|
||||
path = p_root / f'checkpoint_{i}'
|
||||
|
||||
snac = LoopSEAC(cfg)
|
||||
snac.load_state_dict(path)
|
||||
snac.eval()
|
||||
|
||||
df = snac.eval_loop(render=render, n_episodes=eval_eps)
|
||||
df['checkpoint'] = i
|
||||
dfs.append(df)
|
||||
|
||||
results = pd.concat(dfs)
|
||||
results['run'] = run
|
||||
results.to_csv(p_root / 'results.csv', index=False)
|
||||
|
||||
#sns.lineplot(data=results, x='checkpoint', y='reward', hue='agent', palette='husl')
|
||||
|
||||
#plt.savefig(f'{experiment_name}.png')
|
@ -1,39 +0,0 @@
|
||||
from salina.agents import Agents, TemporalAgent
|
||||
import torch
|
||||
from salina import Workspace, get_arguments, get_class, instantiate_class
|
||||
from pathlib import Path
|
||||
from salina.agents.gyma import GymAgent
|
||||
import time
|
||||
from algorithms.utils import load_yaml_file, add_env_props
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Setup workspace
|
||||
uid = time.time()
|
||||
workspace = Workspace()
|
||||
weights = Path('/Users/romue/PycharmProjects/EDYS/studies/agent_1636994369.145843.pt')
|
||||
|
||||
cfg = load_yaml_file(Path(__file__).parent / 'sat_mad.yaml')
|
||||
add_env_props(cfg)
|
||||
cfg['env'].update({'n_agents': 2})
|
||||
|
||||
# instantiate agent and env
|
||||
env_agent = GymAgent(
|
||||
get_class(cfg['env']),
|
||||
get_arguments(cfg['env']),
|
||||
n_envs=1
|
||||
)
|
||||
|
||||
agents = []
|
||||
for _ in range(2):
|
||||
a2c_agent = instantiate_class(cfg['agent'])
|
||||
if weights:
|
||||
a2c_agent.load_state_dict(torch.load(weights))
|
||||
agents.append(a2c_agent)
|
||||
|
||||
# combine agents
|
||||
acquisition_agent = TemporalAgent(Agents(env_agent, *agents))
|
||||
acquisition_agent.seed(42)
|
||||
|
||||
acquisition_agent(workspace, t=0, n_steps=400, stochastic=False, save_render=True)
|
||||
|
||||
|
Reference in New Issue
Block a user