Merge remote-tracking branch 'origin/main'

This commit is contained in:
Steffen Illium
2022-04-11 16:15:55 +02:00
25 changed files with 974 additions and 848 deletions

View File

@ -0,0 +1,23 @@
from algorithms.utils import Checkpointer
from pathlib import Path
from algorithms.utils import load_yaml_file, add_env_props, instantiate_class, load_class
#from algorithms.marl import LoopSNAC, LoopIAC, LoopSEAC
for i in range(0, 5):
for name in ['snac', 'mappo', 'iac', 'seac']:
study_root = Path(__file__).parent / name
cfg = load_yaml_file(study_root / f'{name}.yaml')
add_env_props(cfg)
env = instantiate_class(cfg['env'])
net = instantiate_class(cfg['agent'])
max_steps = cfg['algorithm']['max_steps']
n_steps = cfg['algorithm']['n_steps']
checkpointer = Checkpointer(f'{name}#{i}', study_root, cfg, max_steps, 50)
loop = load_class(cfg['method'])(cfg)
df = loop.train_loop(checkpointer)

View File

@ -0,0 +1,22 @@
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
dfs = []
for name in ['mappo']:
for c in range(5):
try:
study_root = Path(__file__).parent / name / f'{name}#{c}'
print(study_root)
df = pd.read_csv(study_root / 'results.csv', index_col=False)
df.reward = df.reward.rolling(100).mean()
df['method'] = name.upper()
dfs.append(df)
except Exception as e:
pass
df = pd.concat(dfs).reset_index()
sns.lineplot(data=df, x='steps', y='reward', hue='method', palette='husl', ci='sd', linewidth=1.5, err_style='bars')
plt.savefig('study.png')
print('saved image')

View File

@ -1,139 +0,0 @@
from salina.agents.gyma import AutoResetGymAgent
from salina.agents import Agents, TemporalAgent
from salina.rl.functional import _index, gae
import torch
import torch.nn as nn
from torch.distributions import Categorical
from salina import TAgent, Workspace, get_arguments, get_class, instantiate_class
from pathlib import Path
import numpy as np
from tqdm import tqdm
import time
from algorithms.utils import (
add_env_props,
load_yaml_file,
CombineActionsAgent,
AutoResetGymMultiAgent,
access_str,
AGENT_PREFIX, REWARD, CUMU_REWARD, OBS, SEP
)
class A2CAgent(TAgent):
def __init__(self, observation_size, hidden_size, n_actions, agent_id):
super().__init__()
observation_size = np.prod(observation_size)
print(observation_size)
self.agent_id = agent_id
self.model = nn.Sequential(
nn.Flatten(),
nn.Linear(observation_size, hidden_size),
nn.ELU(),
nn.Linear(hidden_size, hidden_size),
nn.ELU(),
nn.Linear(hidden_size, hidden_size),
nn.ELU()
)
self.action_head = nn.Linear(hidden_size, n_actions)
self.critic_head = nn.Linear(hidden_size, 1)
def get_obs(self, t):
observation = self.get((f'env/{access_str(self.agent_id, OBS)}', t))
return observation
def forward(self, t, stochastic, **kwargs):
observation = self.get_obs(t)
features = self.model(observation)
scores = self.action_head(features)
probs = torch.softmax(scores, dim=-1)
critic = self.critic_head(features).squeeze(-1)
if stochastic:
action = torch.distributions.Categorical(probs).sample()
else:
action = probs.argmax(1)
self.set((f'{access_str(self.agent_id, "action")}', t), action)
self.set((f'{access_str(self.agent_id, "action_probs")}', t), probs)
self.set((f'{access_str(self.agent_id, "critic")}', t), critic)
if __name__ == '__main__':
# Setup workspace
uid = time.time()
workspace = Workspace()
n_agents = 2
# load config
cfg = load_yaml_file(Path(__file__).parent / 'sat_mad.yaml')
add_env_props(cfg)
cfg['env'].update({'n_agents': n_agents})
# instantiate agent and env
env_agent = AutoResetGymMultiAgent(
get_class(cfg['env']),
get_arguments(cfg['env']),
n_envs=1
)
a2c_agents = [instantiate_class({**cfg['agent'],
'agent_id': agent_id})
for agent_id in range(n_agents)]
# combine agents
acquisition_agent = TemporalAgent(Agents(env_agent, *a2c_agents, CombineActionsAgent()))
acquisition_agent.seed(69)
# optimizers & other parameters
cfg_optim = cfg['algorithm']['optimizer']
optimizers = [get_class(cfg_optim)(a2c_agent.parameters(), **get_arguments(cfg_optim))
for a2c_agent in a2c_agents]
n_timesteps = cfg['algorithm']['n_timesteps']
# Decision making loop
best = -float('inf')
with tqdm(range(int(cfg['algorithm']['max_epochs'] / n_timesteps))) as pbar:
for epoch in pbar:
workspace.zero_grad()
if epoch > 0:
workspace.copy_n_last_steps(1)
acquisition_agent(workspace, t=1, n_steps=n_timesteps-1, stochastic=True)
else:
acquisition_agent(workspace, t=0, n_steps=n_timesteps, stochastic=True)
for agent_id in range(n_agents):
critic, done, action_probs, reward, action = workspace[
access_str(agent_id, 'critic'),
"env/done",
access_str(agent_id, 'action_probs'),
access_str(agent_id, 'reward', 'env/'),
access_str(agent_id, 'action')
]
td = gae(critic, reward, done, 0.98, 0.25)
td_error = td ** 2
critic_loss = td_error.mean()
entropy_loss = Categorical(action_probs).entropy().mean()
action_logp = _index(action_probs, action).log()
a2c_loss = action_logp[:-1] * td.detach()
a2c_loss = a2c_loss.mean()
loss = (
-0.001 * entropy_loss
+ 1.0 * critic_loss
- 0.1 * a2c_loss
)
optimizer = optimizers[agent_id]
optimizer.zero_grad()
loss.backward()
#torch.nn.utils.clip_grad_norm_(a2c_agents[agent_id].parameters(), .5)
optimizer.step()
# Compute the cumulated reward on final_state
rews = ''
for agent_i in range(n_agents):
creward = workspace['env/'+access_str(agent_i, CUMU_REWARD)]
creward = creward[done]
if creward.size()[0] > 0:
rews += f'{AGENT_PREFIX}{agent_i}: {creward.mean().item():.2f} | '
"""if cum_r > best:
torch.save(a2c_agent.state_dict(), Path(__file__).parent / f'agent_{uid}.pt')
best = cum_r"""
pbar.set_description(rews, refresh=True)

View File

@ -1,27 +0,0 @@
agent:
classname: studies.sat_mad.A2CAgent
observation_size: 4*5*5
hidden_size: 128
n_actions: 10
env:
classname: environments.factory.make
env_name: "DirtyFactory-v0"
n_agents: 1
pomdp_r: 2
max_steps: 400
stack_n_frames: 3
individual_rewards: True
algorithm:
max_epochs: 1000000
n_envs: 1
n_timesteps: 10
discount_factor: 0.99
entropy_coef: 0.01
critic_coef: 1.0
gae: 0.25
optimizer:
classname: torch.optim.Adam
lr: 0.0003
weight_decay: 0.0

36
studies/viz_policy.py Normal file
View File

@ -0,0 +1,36 @@
import pandas as pd
from algorithms.marl import LoopSNAC, LoopIAC, LoopSEAC
from pathlib import Path
from algorithms.utils import load_yaml_file
from tqdm import trange
study = 'example_config#0'
#study_root = Path(__file__).parent / study
study_root = Path('/Users/romue/PycharmProjects/EDYS/algorithms/marl/')
#['L2NoAh_gru', 'L2NoCh_gru', 'nomix_gru']:
render = True
eval_eps = 3
for run in range(0, 5):
for name in ['example_config']:#['L2OnlyAh_gru', 'L2OnlyChAh_gru', 'L2OnlyMix_gru']: #['layernorm_gru', 'basic_gru', 'nonorm_gru', 'spectralnorm_gru']:
cfg = load_yaml_file(study_root / study / 'config.yaml')
#p_root = Path(study_root / study / f'{name}#{run}')
dfs = []
for i in trange(500):
path = study_root / study / f'checkpoint_{161}'
print(path)
snac = LoopSEAC(cfg)
snac.load_state_dict(path)
snac.eval()
df = snac.eval_loop(render=render, n_episodes=eval_eps)
df['checkpoint'] = i
dfs.append(df)
results = pd.concat(dfs)
results['run'] = run
results.to_csv(p_root / 'results.csv', index=False)
#sns.lineplot(data=results, x='checkpoint', y='reward', hue='agent', palette='husl')
#plt.savefig(f'{experiment_name}.png')

View File

@ -1,39 +0,0 @@
from salina.agents import Agents, TemporalAgent
import torch
from salina import Workspace, get_arguments, get_class, instantiate_class
from pathlib import Path
from salina.agents.gyma import GymAgent
import time
from algorithms.utils import load_yaml_file, add_env_props
if __name__ == '__main__':
# Setup workspace
uid = time.time()
workspace = Workspace()
weights = Path('/Users/romue/PycharmProjects/EDYS/studies/agent_1636994369.145843.pt')
cfg = load_yaml_file(Path(__file__).parent / 'sat_mad.yaml')
add_env_props(cfg)
cfg['env'].update({'n_agents': 2})
# instantiate agent and env
env_agent = GymAgent(
get_class(cfg['env']),
get_arguments(cfg['env']),
n_envs=1
)
agents = []
for _ in range(2):
a2c_agent = instantiate_class(cfg['agent'])
if weights:
a2c_agent.load_state_dict(torch.load(weights))
agents.append(a2c_agent)
# combine agents
acquisition_agent = TemporalAgent(Agents(env_agent, *agents))
acquisition_agent.seed(42)
acquisition_agent(workspace, t=0, n_steps=400, stochastic=False, save_render=True)