mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-12-14 03:00:37 +01:00
Code cleaning part 2
This commit is contained in:
@@ -1,34 +0,0 @@
|
|||||||
agent:
|
|
||||||
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
|
|
||||||
n_agents: 2
|
|
||||||
obs_emb_size: 96
|
|
||||||
action_emb_size: 16
|
|
||||||
hidden_size_actor: 64
|
|
||||||
hidden_size_critic: 64
|
|
||||||
use_agent_embedding: False
|
|
||||||
env:
|
|
||||||
classname: marl_factory_grid.environment.configs.marl_eval
|
|
||||||
env_name: "marl_eval/dirt_quadrant_eval_config"
|
|
||||||
n_agents: 2
|
|
||||||
max_steps: 250
|
|
||||||
pomdp_r: 2
|
|
||||||
stack_n_frames: 0
|
|
||||||
individual_rewards: True
|
|
||||||
train_render: False
|
|
||||||
eval_render: True
|
|
||||||
save_and_log: True
|
|
||||||
record: False
|
|
||||||
method: marl_factory_grid.algorithms.marl.LoopSEAC
|
|
||||||
algorithm:
|
|
||||||
gamma: 0.99
|
|
||||||
entropy_coef: 0.01
|
|
||||||
vf_coef: 0.05
|
|
||||||
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
|
||||||
max_steps: 200000
|
|
||||||
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
|
||||||
pile-order: "dynamic" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
|
|
||||||
pile-observability: "single" # Options: "single", "all"
|
|
||||||
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
|
|
||||||
auxiliary_piles: False # Option that is only considered when pile-order = "agents"
|
|
||||||
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
|
||||||
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
agent:
|
|
||||||
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
|
|
||||||
n_agents: 2
|
|
||||||
obs_emb_size: 96
|
|
||||||
action_emb_size: 16
|
|
||||||
hidden_size_actor: 64
|
|
||||||
hidden_size_critic: 64
|
|
||||||
use_agent_embedding: False
|
|
||||||
env:
|
|
||||||
classname: marl_factory_grid.environment.configs.marl_eval
|
|
||||||
env_name: "marl_eval/two_rooms_eval_config"
|
|
||||||
n_agents: 2
|
|
||||||
max_steps: 250
|
|
||||||
pomdp_r: 2
|
|
||||||
stack_n_frames: 0
|
|
||||||
individual_rewards: True
|
|
||||||
train_render: False
|
|
||||||
eval_render: True
|
|
||||||
save_and_log: True
|
|
||||||
record: False
|
|
||||||
method: marl_factory_grid.algorithms.marl.LoopSEAC
|
|
||||||
algorithm:
|
|
||||||
gamma: 0.99
|
|
||||||
entropy_coef: 0.01
|
|
||||||
vf_coef: 0.05
|
|
||||||
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
|
||||||
max_steps: 260000
|
|
||||||
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
|
||||||
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
|
||||||
pile-observability: "single" # Options: "single", "all"
|
|
||||||
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
|
|
||||||
auxiliary_piles: True # Use True to see emergent phenomenon and False to prevent it
|
|
||||||
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,44 +1,23 @@
|
|||||||
import copy
|
import copy
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
|
||||||
import imageio # requires ffmpeg install on operating system and imageio-ffmpeg package for python
|
|
||||||
from scipy import signal
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import torch
|
import torch
|
||||||
from typing import Union, List, Dict
|
from typing import Union, List
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from torch.distributions import Categorical
|
|
||||||
|
|
||||||
from marl_factory_grid.algorithms.marl.base_a2c import PolicyGradient, cumulate_discount
|
from marl_factory_grid.algorithms.rl.base_a2c import PolicyGradient, cumulate_discount
|
||||||
from marl_factory_grid.algorithms.utils import add_env_props, instantiate_class
|
from marl_factory_grid.algorithms.utils import add_env_props
|
||||||
from pathlib import Path
|
|
||||||
from collections import deque
|
|
||||||
|
|
||||||
from marl_factory_grid.environment.actions import Noop
|
|
||||||
from marl_factory_grid.modules import Clean, DoorUse
|
|
||||||
from marl_factory_grid.utils.plotting.plot_single_runs import plot_action_maps
|
from marl_factory_grid.utils.plotting.plot_single_runs import plot_action_maps
|
||||||
|
|
||||||
|
|
||||||
class Names:
|
class Names:
|
||||||
REWARD = 'reward'
|
|
||||||
DONE = 'done'
|
|
||||||
ACTION = 'action'
|
|
||||||
OBSERVATION = 'observation'
|
|
||||||
LOGITS = 'logits'
|
|
||||||
HIDDEN_ACTOR = 'hidden_actor'
|
|
||||||
HIDDEN_CRITIC = 'hidden_critic'
|
|
||||||
AGENT = 'agent'
|
|
||||||
ENV = 'env'
|
ENV = 'env'
|
||||||
ENV_NAME = 'env_name'
|
ENV_NAME = 'env_name'
|
||||||
N_AGENTS = 'n_agents'
|
N_AGENTS = 'n_agents'
|
||||||
ALGORITHM = 'algorithm'
|
ALGORITHM = 'algorithm'
|
||||||
MAX_STEPS = 'max_steps'
|
MAX_STEPS = 'max_steps'
|
||||||
N_STEPS = 'n_steps'
|
N_STEPS = 'n_steps'
|
||||||
BUFFER_SIZE = 'buffer_size'
|
|
||||||
CRITIC = 'critic'
|
|
||||||
BATCH_SIZE = 'bnatch_size'
|
|
||||||
N_ACTIONS = 'n_actions'
|
|
||||||
TRAIN_RENDER = 'train_render'
|
TRAIN_RENDER = 'train_render'
|
||||||
EVAL_RENDER = 'eval_render'
|
EVAL_RENDER = 'eval_render'
|
||||||
|
|
||||||
@@ -55,7 +34,7 @@ class A2C:
|
|||||||
self.train_cfg = train_cfg
|
self.train_cfg = train_cfg
|
||||||
self.eval_cfg = eval_cfg
|
self.eval_cfg = eval_cfg
|
||||||
self.cfg = train_cfg
|
self.cfg = train_cfg
|
||||||
self.n_agents = train_cfg[nms.AGENT][nms.N_AGENTS]
|
self.n_agents = train_cfg[nms.ENV][nms.N_AGENTS]
|
||||||
self.setup()
|
self.setup()
|
||||||
self.reward_development = []
|
self.reward_development = []
|
||||||
self.action_probabilities = {agent_idx:[] for agent_idx in range(self.n_agents)}
|
self.action_probabilities = {agent_idx:[] for agent_idx in range(self.n_agents)}
|
||||||
@@ -80,8 +59,6 @@ class A2C:
|
|||||||
os.mkdir(self.results_path)
|
os.mkdir(self.results_path)
|
||||||
# Save settings in results folder
|
# Save settings in results folder
|
||||||
self.save_configs()
|
self.save_configs()
|
||||||
if self.cfg[nms.ENV]["record"]:
|
|
||||||
self.recorder = imageio.get_writer(f'{self.results_path}/pygame_recording.mp4', fps=5)
|
|
||||||
|
|
||||||
def set_cfg(self, eval=False):
|
def set_cfg(self, eval=False):
|
||||||
if eval:
|
if eval:
|
||||||
@@ -610,8 +587,6 @@ class A2C:
|
|||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
self.set_agent_spawnpoint(env)
|
self.set_agent_spawnpoint(env)
|
||||||
if self.cfg[nms.ENV][nms.EVAL_RENDER]:
|
if self.cfg[nms.ENV][nms.EVAL_RENDER]:
|
||||||
if self.cfg[nms.ENV]["save_and_log"] and self.cfg[nms.ENV]["record"]:
|
|
||||||
env.set_recorder(self.recorder)
|
|
||||||
if self.cfg[nms.ALGORITHM]["auxiliary_piles"]:
|
if self.cfg[nms.ALGORITHM]["auxiliary_piles"]:
|
||||||
# Don't render auxiliary piles
|
# Don't render auxiliary piles
|
||||||
auxiliary_piles = [pile for idx, pile in enumerate(env.state.entities['DirtPiles']) if idx % 2 == 0]
|
auxiliary_piles = [pile for idx, pile in enumerate(env.state.entities['DirtPiles']) if idx % 2 == 0]
|
||||||
@@ -664,10 +639,6 @@ class A2C:
|
|||||||
|
|
||||||
episode += 1
|
episode += 1
|
||||||
|
|
||||||
# Properly finalize the video file
|
|
||||||
if self.cfg[nms.ENV]["save_and_log"] and self.cfg[nms.ENV]["record"]:
|
|
||||||
self.recorder.close()
|
|
||||||
|
|
||||||
def plot_reward_development(self):
|
def plot_reward_development(self):
|
||||||
smoothed_data = np.convolve(self.reward_development, np.ones(10) / 10, mode='valid')
|
smoothed_data = np.convolve(self.reward_development, np.ones(10) / 10, mode='valid')
|
||||||
plt.plot(smoothed_data)
|
plt.plot(smoothed_data)
|
||||||
@@ -689,16 +660,14 @@ class A2C:
|
|||||||
|
|
||||||
def save_agent_models(self):
|
def save_agent_models(self):
|
||||||
for idx, agent in enumerate(self.agents):
|
for idx, agent in enumerate(self.agents):
|
||||||
agent_name = list(self.factory.state.agents_conf.keys())[idx]
|
agent.pi.save_model_parameters(self.results_path)
|
||||||
agent.pi.save_model_parameters(self.results_path, agent_name)
|
agent.vf.save_model_parameters(self.results_path)
|
||||||
agent.vf.save_model_parameters(self.results_path, agent_name)
|
|
||||||
|
|
||||||
def load_agents(self, runs_list):
|
def load_agents(self, runs_list):
|
||||||
for idx, run in enumerate(runs_list):
|
for idx, run in enumerate(runs_list):
|
||||||
run_path = f"../study_out/{run}"
|
run_path = f"../study_out/{run}"
|
||||||
agent_name = list(self.eval_factory.state.agents_conf.keys())[idx]
|
self.agents[idx].pi.load_model_parameters(f"{run_path}/PolicyNet_model_parameters.pth")
|
||||||
self.agents[idx].pi.load_model_parameters(f"{run_path}/{agent_name}_PolicyNet_model_parameters.pth")
|
self.agents[idx].vf.load_model_parameters(f"{run_path}/ValueNet_model_parameters.pth")
|
||||||
self.agents[idx].vf.load_model_parameters(f"{run_path}/{agent_name}_ValueNet_model_parameters.pth")
|
|
||||||
|
|
||||||
def create_info_maps(self, env, used_actions):
|
def create_info_maps(self, env, used_actions):
|
||||||
# Create value map
|
# Create value map
|
||||||
@@ -19,11 +19,11 @@ class Net(th.nn.Module):
|
|||||||
if module.bias is not None:
|
if module.bias is not None:
|
||||||
nn.init.uniform_(module.bias, a=-0.1, b=0.1)
|
nn.init.uniform_(module.bias, a=-0.1, b=0.1)
|
||||||
|
|
||||||
def save_model(self, path, agent_name):
|
def save_model(self, path):
|
||||||
th.save(self.net, f"{path}/{agent_name}_{self.__class__.__name__}_model.pth")
|
th.save(self.net, f"{path}/{self.__class__.__name__}_model.pth")
|
||||||
|
|
||||||
def save_model_parameters(self, path, agent_name):
|
def save_model_parameters(self, path):
|
||||||
th.save(self.net.state_dict(), f"{path}/{agent_name}_{self.__class__.__name__}_model_parameters.pth")
|
th.save(self.net.state_dict(), f"{path}/{self.__class__.__name__}_model_parameters.pth")
|
||||||
|
|
||||||
def load_model_parameters(self, path):
|
def load_model_parameters(self, path):
|
||||||
self.net.load_state_dict(th.load(path))
|
self.net.load_state_dict(th.load(path))
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
env:
|
||||||
|
classname: marl_factory_grid.environment.configs.marl_eval
|
||||||
|
env_name: "marl_eval/dirt_quadrant_eval_config"
|
||||||
|
n_agents: 2
|
||||||
|
eval_render: True
|
||||||
|
save_and_log: False
|
||||||
|
algorithm:
|
||||||
|
pile-order: "smart" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
|
||||||
|
pile-observability: "single" # Options: "single", "all"
|
||||||
|
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
|
||||||
|
auxiliary_piles: False # Dirt quadrant does not use this option
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
env:
|
||||||
|
classname: marl_factory_grid.environment.configs.marl_eval
|
||||||
|
env_name: "marl_eval/dirt_quadrant_eval_config"
|
||||||
|
n_agents: 2
|
||||||
|
eval_render: True
|
||||||
|
save_and_log: False
|
||||||
|
algorithm:
|
||||||
|
pile-order: "dynamic" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
|
||||||
|
pile-observability: "single" # Options: "single", "all"
|
||||||
|
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
|
||||||
|
auxiliary_piles: False # Dirt quadrant does not use this option
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
env:
|
||||||
|
classname: marl_factory_grid.environment.configs.marl_eval
|
||||||
|
env_name: "marl_eval/two_rooms_eval_config"
|
||||||
|
n_agents: 2
|
||||||
|
eval_render: True
|
||||||
|
save_and_log: False
|
||||||
|
algorithm:
|
||||||
|
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||||
|
pile-observability: "single" # Options: "single", "all"
|
||||||
|
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
|
||||||
|
auxiliary_piles: True # Use True to see emergent phenomenon and False to prevent it
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
env:
|
||||||
|
classname: marl_factory_grid.environment.configs.marl_eval
|
||||||
|
env_name: "marl_eval/two_rooms_eval_config_emergent"
|
||||||
|
n_agents: 2
|
||||||
|
eval_render: True
|
||||||
|
save_and_log: False
|
||||||
|
algorithm:
|
||||||
|
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||||
|
pile-observability: "single" # Options: "single", "all"
|
||||||
|
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
|
||||||
|
auxiliary_piles: False # Use True to see emergent phenomenon and False to prevent it
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
env:
|
||||||
|
classname: marl_factory_grid.environment.configs.rl
|
||||||
|
env_name: "rl/dirt_quadrant_agent1_eval_config"
|
||||||
|
n_agents: 1
|
||||||
|
eval_render: True
|
||||||
|
save_and_log: False
|
||||||
|
algorithm:
|
||||||
|
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||||
|
pile-observability: "single" # Options: "single", "all"
|
||||||
|
pile_all_done: "all" #
|
||||||
|
auxiliary_piles: False # Dirt quadrant does not use this option
|
||||||
|
|
||||||
@@ -1,34 +1,17 @@
|
|||||||
agent:
|
|
||||||
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
|
|
||||||
n_agents: 1
|
|
||||||
obs_emb_size: 96
|
|
||||||
action_emb_size: 16
|
|
||||||
hidden_size_actor: 64
|
|
||||||
hidden_size_critic: 64
|
|
||||||
use_agent_embedding: False
|
|
||||||
env:
|
env:
|
||||||
classname: marl_factory_grid.environment.configs.rl
|
classname: marl_factory_grid.environment.configs.rl
|
||||||
env_name: "rl/dirt_quadrant_train_config"
|
env_name: "rl/dirt_quadrant_agent1_train_config"
|
||||||
n_agents: 1
|
n_agents: 1
|
||||||
max_steps: 250
|
|
||||||
pomdp_r: 2
|
|
||||||
stack_n_frames: 0
|
|
||||||
individual_rewards: True
|
|
||||||
train_render: False
|
train_render: False
|
||||||
eval_render: True
|
|
||||||
save_and_log: True
|
save_and_log: True
|
||||||
record: False
|
|
||||||
method: marl_factory_grid.algorithms.marl.LoopSEAC
|
|
||||||
algorithm:
|
algorithm:
|
||||||
gamma: 0.99
|
gamma: 0.99
|
||||||
entropy_coef: 0.01
|
|
||||||
vf_coef: 0.05
|
|
||||||
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
||||||
max_steps: 240000
|
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
||||||
|
max_steps: 140000
|
||||||
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
||||||
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||||
pile-observability: "single" # Options: "single", "all"
|
pile-observability: "single" # Options: "single", "all"
|
||||||
pile_all_done: "single" # Options: "single", "all" ("single" for training, "all" for eval)
|
pile_all_done: "single" # Options: "single", "all" ("single" for training, "all" for eval)
|
||||||
auxiliary_piles: False # Option that is only considered when pile-order = "agents"
|
auxiliary_piles: False # Dirt quadrant does not use this option
|
||||||
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
|
||||||
|
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
env:
|
||||||
|
classname: marl_factory_grid.environment.configs.rl
|
||||||
|
env_name: "rl/two_rooms_eval_config"
|
||||||
|
n_agents: 1
|
||||||
|
eval_render: True
|
||||||
|
save_and_log: False
|
||||||
|
algorithm:
|
||||||
|
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||||
|
pile-observability: "single" # Options: "single", "all"
|
||||||
|
pile_all_done: "all" # Options: "single", "all" ("single" for training, "all" for eval)
|
||||||
|
auxiliary_piles: False # Auxiliary piles are only used during marl eval
|
||||||
|
|
||||||
|
|
||||||
@@ -1,35 +1,17 @@
|
|||||||
agent:
|
|
||||||
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
|
|
||||||
n_agents: 1
|
|
||||||
obs_emb_size: 96
|
|
||||||
action_emb_size: 16
|
|
||||||
hidden_size_actor: 64
|
|
||||||
hidden_size_critic: 64
|
|
||||||
use_agent_embedding: False
|
|
||||||
env:
|
env:
|
||||||
classname: marl_factory_grid.environment.configs.rl
|
classname: marl_factory_grid.environment.configs.rl
|
||||||
env_name: "rl/two_rooms_train_config"
|
|
||||||
n_agents: 1
|
n_agents: 1
|
||||||
max_steps: 250
|
|
||||||
pomdp_r: 2
|
|
||||||
stack_n_frames: 0
|
|
||||||
individual_rewards: True
|
|
||||||
train_render: False
|
train_render: False
|
||||||
eval_render: True
|
save_and_log: True
|
||||||
save_and_log: False
|
|
||||||
record: False
|
|
||||||
method: marl_factory_grid.algorithms.marl.LoopSEAC
|
|
||||||
algorithm:
|
algorithm:
|
||||||
gamma: 0.99
|
gamma: 0.99
|
||||||
entropy_coef: 0.01
|
|
||||||
vf_coef: 0.05
|
|
||||||
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
||||||
|
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
||||||
max_steps: 260000
|
max_steps: 260000
|
||||||
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
||||||
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||||
pile-observability: "single" # Options: "single", "all"
|
pile-observability: "single" # Options: "single", "all"
|
||||||
pile_all_done: "single" # Options: "single", "all" ("single" for training, "all" for eval)
|
pile_all_done: "single" # Options: "single", "all" ("single" for training, "all" for eval)
|
||||||
auxiliary_piles: False # Option that is only considered when pile-order = "agents"
|
auxiliary_piles: False # Auxiliary piles are only used during marl eval
|
||||||
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
|
||||||
|
|
||||||
|
|
||||||
@@ -6,7 +6,7 @@ from networkx.algorithms.approximation import traveling_salesman as tsp
|
|||||||
import time
|
import time
|
||||||
import copy
|
import copy
|
||||||
|
|
||||||
from marl_factory_grid.algorithms.static.utils import points_to_graph
|
from marl_factory_grid.algorithms.tsp.utils import points_to_graph
|
||||||
from marl_factory_grid.modules.doors import constants as do
|
from marl_factory_grid.modules.doors import constants as do
|
||||||
from marl_factory_grid.environment import constants as c
|
from marl_factory_grid.environment import constants as c
|
||||||
from marl_factory_grid.utils.helpers import MOVEMAP
|
from marl_factory_grid.utils.helpers import MOVEMAP
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
from marl_factory_grid.algorithms.static.TSP_base_agent import TSPBaseAgent
|
from marl_factory_grid.algorithms.tsp.TSP_base_agent import TSPBaseAgent
|
||||||
|
|
||||||
from marl_factory_grid.modules.clean_up import constants as di
|
from marl_factory_grid.modules.clean_up import constants as di
|
||||||
from marl_factory_grid.environment import constants as c
|
from marl_factory_grid.environment import constants as c
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
from marl_factory_grid.algorithms.static.TSP_base_agent import TSPBaseAgent
|
from marl_factory_grid.algorithms.tsp.TSP_base_agent import TSPBaseAgent
|
||||||
|
|
||||||
from marl_factory_grid.modules.destinations import constants as d
|
from marl_factory_grid.modules.destinations import constants as d
|
||||||
from marl_factory_grid.modules.doors import constants as do
|
from marl_factory_grid.modules.doors import constants as do
|
||||||
@@ -64,13 +64,6 @@ def add_env_props(cfg):
|
|||||||
factory = Factory(env_path)
|
factory = Factory(env_path)
|
||||||
_ = factory.reset()
|
_ = factory.reset()
|
||||||
|
|
||||||
# Agent Init
|
|
||||||
if len(factory.state.moving_entites) == 1: # Single agent setting
|
|
||||||
observation_size = list(factory.observation_space.shape)
|
|
||||||
else: # Multi-agent setting
|
|
||||||
observation_size = list(factory.observation_space[0].shape)
|
|
||||||
cfg['agent'].update(dict(observation_size=observation_size, n_actions=factory.action_space[0].n))
|
|
||||||
|
|
||||||
return factory
|
return factory
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -5,18 +5,17 @@ General:
|
|||||||
individual_rewards: true
|
individual_rewards: true
|
||||||
# The level.txt file to load from marl_factory_grid/levels
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
level_name: quadrant
|
level_name: quadrant
|
||||||
# Radius of Partially observable Markov decision process
|
# View Radius
|
||||||
pomdp_r: 0 # default 3
|
pomdp_r: 0 # 0 = full observability
|
||||||
# Print all messages and events
|
# Print all messages and events
|
||||||
verbose: false
|
verbose: false
|
||||||
# Run tests
|
# Run tests
|
||||||
tests: false
|
tests: false
|
||||||
|
|
||||||
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
# Define Agents, their actions, observations and spawnpoints
|
||||||
# other agents aim to clean dirt piles.
|
|
||||||
Agents:
|
Agents:
|
||||||
# The clean agents
|
# The clean agents
|
||||||
Sigmund:
|
Agent1:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
- Noop
|
- Noop
|
||||||
@@ -25,7 +24,7 @@ Agents:
|
|||||||
- Self
|
- Self
|
||||||
Positions:
|
Positions:
|
||||||
- (9,1)
|
- (9,1)
|
||||||
Wolfgang:
|
Agent2:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
- Noop
|
- Noop
|
||||||
@@ -37,8 +36,8 @@ Agents:
|
|||||||
|
|
||||||
Entities:
|
Entities:
|
||||||
DirtPiles:
|
DirtPiles:
|
||||||
coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1)
|
||||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
initial_amount: 0.5
|
||||||
clean_amount: 1
|
clean_amount: 1
|
||||||
dirt_spawn_r_var: 0
|
dirt_spawn_r_var: 0
|
||||||
max_global_amount: 12
|
max_global_amount: 12
|
||||||
@@ -46,7 +45,6 @@ Entities:
|
|||||||
|
|
||||||
# Rules section specifies the rules governing the dynamics of the environment.
|
# Rules section specifies the rules governing the dynamics of the environment.
|
||||||
Rules:
|
Rules:
|
||||||
|
|
||||||
# Utilities
|
# Utilities
|
||||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||||
@@ -57,5 +55,3 @@ Rules:
|
|||||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||||
# The environment stops when all dirt is cleaned
|
# The environment stops when all dirt is cleaned
|
||||||
DoneOnAllDirtCleaned:
|
DoneOnAllDirtCleaned:
|
||||||
#DoneAtMaxStepsReached:
|
|
||||||
#max_steps: 200
|
|
||||||
|
|||||||
@@ -1,20 +1,20 @@
|
|||||||
General:
|
General:
|
||||||
|
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||||
env_seed: 69
|
env_seed: 69
|
||||||
# Individual vs global rewards
|
# Individual vs global rewards
|
||||||
individual_rewards: true
|
individual_rewards: true
|
||||||
# The level.txt file to load from marl_factory_grid/levels
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
level_name: two_rooms
|
level_name: two_rooms
|
||||||
# View Radius; 0 = full observatbility
|
# View Radius
|
||||||
pomdp_r: 0
|
pomdp_r: 0 # 0 = full observability
|
||||||
# Print all messages and events
|
# Print all messages and events
|
||||||
verbose: false
|
verbose: false
|
||||||
# Run tests
|
# Run tests
|
||||||
tests: false
|
tests: false
|
||||||
|
|
||||||
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
|
# Define Agents, their actions, observations and spawnpoints
|
||||||
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
|
|
||||||
Agents:
|
Agents:
|
||||||
Sigmund:
|
Agent1:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
- DoorUse
|
- DoorUse
|
||||||
@@ -24,7 +24,7 @@ Agents:
|
|||||||
- Self
|
- Self
|
||||||
Positions:
|
Positions:
|
||||||
- (3,1)
|
- (3,1)
|
||||||
Wolfgang:
|
Agent2:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
- DoorUse
|
- DoorUse
|
||||||
@@ -36,9 +36,10 @@ Agents:
|
|||||||
- (3,13)
|
- (3,13)
|
||||||
|
|
||||||
Entities:
|
Entities:
|
||||||
|
# For RL-agent we model the flags as dirt piles to be more flexible
|
||||||
DirtPiles:
|
DirtPiles:
|
||||||
coords_or_quantity: (2,1), (3,12), (2,13), (3,2) # Static form: auxiliary pile, primary pile, auxiliary pile, ...
|
coords_or_quantity: (2,1), (3,12), (2,13), (3,2) # Static form: auxiliary pile, primary pile, auxiliary pile, ...
|
||||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
initial_amount: 0.5
|
||||||
clean_amount: 1
|
clean_amount: 1
|
||||||
dirt_spawn_r_var: 0
|
dirt_spawn_r_var: 0
|
||||||
max_global_amount: 12
|
max_global_amount: 12
|
||||||
@@ -47,16 +48,13 @@ Entities:
|
|||||||
Doors: { }
|
Doors: { }
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
# Environment Dynamics
|
|
||||||
#DoorAutoClose:
|
|
||||||
#close_frequency: 10
|
|
||||||
|
|
||||||
# Utilities
|
# Utilities
|
||||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
WatchCollisions:
|
WatchCollisions:
|
||||||
done_at_collisions: false
|
done_at_collisions: false
|
||||||
|
|
||||||
# Done Conditions
|
# Done Conditions
|
||||||
#DoneOnAllDirtCleaned:
|
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||||
|
# Environment execution stops after 30 steps
|
||||||
DoneAtMaxStepsReached:
|
DoneAtMaxStepsReached:
|
||||||
max_steps: 50
|
max_steps: 30
|
||||||
|
|||||||
@@ -1,20 +1,20 @@
|
|||||||
General:
|
General:
|
||||||
|
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||||
env_seed: 69
|
env_seed: 69
|
||||||
# Individual vs global rewards
|
# Individual vs global rewards
|
||||||
individual_rewards: true
|
individual_rewards: true
|
||||||
# The level.txt file to load from marl_factory_grid/levels
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
level_name: two_rooms
|
level_name: two_rooms
|
||||||
# View Radius; 0 = full observatbility
|
# View Radius
|
||||||
pomdp_r: 0
|
pomdp_r: 0 # 0 = full observability
|
||||||
# Print all messages and events
|
# Print all messages and events
|
||||||
verbose: false
|
verbose: false
|
||||||
# Run tests
|
# Run tests
|
||||||
tests: false
|
tests: false
|
||||||
|
|
||||||
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
|
# Define Agents, their actions, observations and spawnpoints
|
||||||
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
|
|
||||||
Agents:
|
Agents:
|
||||||
Sigmund:
|
Agent1:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
- DoorUse
|
- DoorUse
|
||||||
@@ -24,7 +24,7 @@ Agents:
|
|||||||
- Self
|
- Self
|
||||||
Positions:
|
Positions:
|
||||||
- (3,1)
|
- (3,1)
|
||||||
Wolfgang:
|
Agent2:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
- DoorUse
|
- DoorUse
|
||||||
@@ -36,9 +36,10 @@ Agents:
|
|||||||
- (3,13)
|
- (3,13)
|
||||||
|
|
||||||
Entities:
|
Entities:
|
||||||
|
# For RL-agent we model the flags as dirt piles to be more flexible
|
||||||
DirtPiles:
|
DirtPiles:
|
||||||
coords_or_quantity: (3,12), (3,2) # Static form: auxiliary pile, primary pile, auxiliary pile, ...
|
coords_or_quantity: (3,12), (3,2) # Locations of flags
|
||||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
initial_amount: 0.5
|
||||||
clean_amount: 1
|
clean_amount: 1
|
||||||
dirt_spawn_r_var: 0
|
dirt_spawn_r_var: 0
|
||||||
max_global_amount: 12
|
max_global_amount: 12
|
||||||
@@ -47,16 +48,13 @@ Entities:
|
|||||||
Doors: { }
|
Doors: { }
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
# Environment Dynamics
|
|
||||||
#DoorAutoClose:
|
|
||||||
#close_frequency: 10
|
|
||||||
|
|
||||||
# Utilities
|
# Utilities
|
||||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
WatchCollisions:
|
WatchCollisions:
|
||||||
done_at_collisions: false
|
done_at_collisions: false
|
||||||
|
|
||||||
# Done Conditions
|
# Done Conditions
|
||||||
#DoneOnAllDirtCleaned:
|
# Define the conditions for the environment to stop. Either success or a fail conditions
|
||||||
|
# Environment execution stops after 30 steps
|
||||||
DoneAtMaxStepsReached:
|
DoneAtMaxStepsReached:
|
||||||
max_steps: 30
|
max_steps: 30
|
||||||
|
|||||||
@@ -0,0 +1,48 @@
|
|||||||
|
General:
|
||||||
|
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||||
|
env_seed: 69
|
||||||
|
# Individual vs global rewards
|
||||||
|
individual_rewards: true
|
||||||
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
|
level_name: quadrant
|
||||||
|
# View Radius
|
||||||
|
pomdp_r: 0 # 0 = full observability
|
||||||
|
# Print all messages and events
|
||||||
|
verbose: false
|
||||||
|
# Run tests
|
||||||
|
tests: false
|
||||||
|
|
||||||
|
# Define Agents, their actions, observations and spawnpoints
|
||||||
|
Agents:
|
||||||
|
# The clean agents
|
||||||
|
Agent1:
|
||||||
|
Actions:
|
||||||
|
- Move4
|
||||||
|
- Noop
|
||||||
|
Observations:
|
||||||
|
- DirtPiles
|
||||||
|
- Self
|
||||||
|
Positions:
|
||||||
|
- (9,1)
|
||||||
|
|
||||||
|
Entities:
|
||||||
|
DirtPiles:
|
||||||
|
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) # Locations of dirt piles
|
||||||
|
initial_amount: 0.5
|
||||||
|
clean_amount: 1
|
||||||
|
dirt_spawn_r_var: 0
|
||||||
|
max_global_amount: 12
|
||||||
|
max_local_amount: 1
|
||||||
|
|
||||||
|
# Rules section specifies the rules governing the dynamics of the environment.
|
||||||
|
Rules:
|
||||||
|
# Utilities
|
||||||
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
|
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||||
|
WatchCollisions:
|
||||||
|
done_at_collisions: false
|
||||||
|
|
||||||
|
# Done Conditions
|
||||||
|
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||||
|
# The environment stops when all dirt is cleaned
|
||||||
|
DoneOnAllDirtCleaned:
|
||||||
@@ -5,61 +5,38 @@ General:
|
|||||||
individual_rewards: true
|
individual_rewards: true
|
||||||
# The level.txt file to load from marl_factory_grid/levels
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
level_name: quadrant
|
level_name: quadrant
|
||||||
# Radius of Partially observable Markov decision process
|
# View Radius
|
||||||
pomdp_r: 0 # default 3
|
pomdp_r: 0 # 0 = full observability
|
||||||
# Print all messages and events
|
# Print all messages and events
|
||||||
verbose: false
|
verbose: false
|
||||||
# Run tests
|
# Run tests
|
||||||
tests: false
|
tests: false
|
||||||
|
|
||||||
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
# Define Agents, their actions, observations and spawnpoints
|
||||||
# other agents aim to clean dirt piles.
|
|
||||||
Agents:
|
Agents:
|
||||||
# The clean agents
|
# The clean agents
|
||||||
#Sigmund:
|
Agent1:
|
||||||
#Actions:
|
|
||||||
#- Move4
|
|
||||||
#Observations:
|
|
||||||
#- DirtPiles
|
|
||||||
#- Self
|
|
||||||
#Positions:
|
|
||||||
#- (9,1)
|
|
||||||
#- (1,1)
|
|
||||||
#- (2,4)
|
|
||||||
#- (4,7)
|
|
||||||
#- (6,8)
|
|
||||||
#- (7,9)
|
|
||||||
#- (2,4)
|
|
||||||
#- (4,7)
|
|
||||||
#- (6,8)
|
|
||||||
#- (7,9)
|
|
||||||
#- (9,9)
|
|
||||||
#- (9,1)
|
|
||||||
Wolfgang:
|
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
Observations:
|
Observations:
|
||||||
- DirtPiles
|
- DirtPiles
|
||||||
- Self
|
- Self
|
||||||
Positions:
|
Positions:
|
||||||
- (9,5)
|
- (9,1)
|
||||||
- (1,1)
|
- (1,1)
|
||||||
- (2,4)
|
- (2,4)
|
||||||
- (4,7)
|
- (4,7)
|
||||||
- (6,8)
|
|
||||||
- (7,9)
|
- (7,9)
|
||||||
- (2,4)
|
- (2,4)
|
||||||
- (4,7)
|
- (4,7)
|
||||||
- (6,8)
|
|
||||||
- (7,9)
|
- (7,9)
|
||||||
- (9,9)
|
- (9,9)
|
||||||
- (9,5)
|
- (9,1)
|
||||||
|
|
||||||
|
|
||||||
Entities:
|
Entities:
|
||||||
DirtPiles:
|
DirtPiles:
|
||||||
coords_or_quantity: (1, 1), (2,4), (4,7), (6,8), (7,9), (9,9) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) # Locations of dirt piles
|
||||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
initial_amount: 0.5
|
||||||
clean_amount: 1
|
clean_amount: 1
|
||||||
dirt_spawn_r_var: 0
|
dirt_spawn_r_var: 0
|
||||||
max_global_amount: 12
|
max_global_amount: 12
|
||||||
@@ -67,7 +44,6 @@ Entities:
|
|||||||
|
|
||||||
# Rules section specifies the rules governing the dynamics of the environment.
|
# Rules section specifies the rules governing the dynamics of the environment.
|
||||||
Rules:
|
Rules:
|
||||||
|
|
||||||
# Utilities
|
# Utilities
|
||||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||||
@@ -78,8 +54,6 @@ Rules:
|
|||||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||||
# The environment stops when all dirt is cleaned
|
# The environment stops when all dirt is cleaned
|
||||||
DoneOnAllDirtCleaned:
|
DoneOnAllDirtCleaned:
|
||||||
#DoneAtMaxStepsReached: # An episode should last for at most max_steps steps
|
|
||||||
#max_steps: 1000
|
|
||||||
|
|
||||||
# Define how agents spawn.
|
# Define how agents spawn.
|
||||||
# Options: "random" (Spawn agent at a random position from the list of defined positions)
|
# Options: "random" (Spawn agent at a random position from the list of defined positions)
|
||||||
@@ -1,78 +0,0 @@
|
|||||||
General:
|
|
||||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
|
||||||
env_seed: 69
|
|
||||||
# Individual vs global rewards
|
|
||||||
individual_rewards: true
|
|
||||||
# The level.txt file to load from marl_factory_grid/levels
|
|
||||||
level_name: quadrant
|
|
||||||
# Radius of Partially observable Markov decision process
|
|
||||||
pomdp_r: 0 # default 3
|
|
||||||
# Print all messages and events
|
|
||||||
verbose: false
|
|
||||||
# Run tests
|
|
||||||
tests: false
|
|
||||||
|
|
||||||
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
|
||||||
# other agents aim to clean dirt piles.
|
|
||||||
Agents:
|
|
||||||
# The clean agents
|
|
||||||
#Sigmund:
|
|
||||||
#Actions:
|
|
||||||
#- Move4
|
|
||||||
#- Noop
|
|
||||||
#Observations:
|
|
||||||
#- DirtPiles
|
|
||||||
#- Self
|
|
||||||
#Positions:
|
|
||||||
#- (9,1)
|
|
||||||
#- (1,1)
|
|
||||||
#- (2,4)
|
|
||||||
#- (4,7)
|
|
||||||
#- (7,9)
|
|
||||||
#- (2,4)
|
|
||||||
#- (4,7)
|
|
||||||
#- (7,9)
|
|
||||||
#- (9,9)
|
|
||||||
#- (9,1)
|
|
||||||
Wolfgang:
|
|
||||||
Actions:
|
|
||||||
- Move4
|
|
||||||
Observations:
|
|
||||||
- DirtPiles
|
|
||||||
- Self
|
|
||||||
Positions:
|
|
||||||
- (9,5)
|
|
||||||
#- (1,1)
|
|
||||||
#- (2,4)
|
|
||||||
#- (4,7)
|
|
||||||
#- (7,9)
|
|
||||||
#- (2,4)
|
|
||||||
#- (4,7)
|
|
||||||
#- (7,9)
|
|
||||||
#- (9,9)
|
|
||||||
#- (9,5)
|
|
||||||
|
|
||||||
Entities:
|
|
||||||
DirtPiles:
|
|
||||||
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) #(9,9), (7,9), (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
|
||||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
|
||||||
clean_amount: 1
|
|
||||||
dirt_spawn_r_var: 0
|
|
||||||
max_global_amount: 12
|
|
||||||
max_local_amount: 1
|
|
||||||
|
|
||||||
# Rules section specifies the rules governing the dynamics of the environment.
|
|
||||||
Rules:
|
|
||||||
|
|
||||||
# Utilities
|
|
||||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
|
||||||
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
|
||||||
WatchCollisions:
|
|
||||||
done_at_collisions: false
|
|
||||||
|
|
||||||
# Done Conditions
|
|
||||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
|
||||||
# The environment stops when all dirt is cleaned
|
|
||||||
DoneOnAllDirtCleaned:
|
|
||||||
#DoneAtMaxStepsReached:
|
|
||||||
#max_steps: 200
|
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
General:
|
||||||
|
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||||
|
env_seed: 69
|
||||||
|
# Individual vs global rewards
|
||||||
|
individual_rewards: true
|
||||||
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
|
level_name: two_rooms
|
||||||
|
# View Radius
|
||||||
|
pomdp_r: 0 # 0 = full observability
|
||||||
|
# Print all messages and events
|
||||||
|
verbose: false
|
||||||
|
# Run tests
|
||||||
|
tests: false
|
||||||
|
|
||||||
|
# Define Agents, their actions, observations and spawnpoints
|
||||||
|
Agents:
|
||||||
|
Agent1:
|
||||||
|
Actions:
|
||||||
|
- Move4
|
||||||
|
- DoorUse
|
||||||
|
Observations:
|
||||||
|
- DirtPiles
|
||||||
|
- Self
|
||||||
|
Positions:
|
||||||
|
- (3,1)
|
||||||
|
- (2,1)
|
||||||
|
|
||||||
|
Entities:
|
||||||
|
DirtPiles:
|
||||||
|
coords_or_quantity: (2,1), (3,12) # Locations of dirt piles
|
||||||
|
initial_amount: 0.5
|
||||||
|
clean_amount: 1
|
||||||
|
dirt_spawn_r_var: 0
|
||||||
|
max_global_amount: 12
|
||||||
|
max_local_amount: 1
|
||||||
|
|
||||||
|
Doors: { }
|
||||||
|
|
||||||
|
# Rules section specifies the rules governing the dynamics of the environment.
|
||||||
|
Rules:
|
||||||
|
# Utilities
|
||||||
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
|
WatchCollisions:
|
||||||
|
done_at_collisions: false
|
||||||
|
|
||||||
|
# Done Conditions
|
||||||
|
# Define the conditions for the environment to stop. Either success or a fail conditions
|
||||||
|
# Environment execution stops after 30 steps
|
||||||
|
DoneAtMaxStepsReached:
|
||||||
|
max_steps: 30
|
||||||
@@ -0,0 +1,60 @@
|
|||||||
|
General:
|
||||||
|
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||||
|
env_seed: 69
|
||||||
|
# Individual vs global rewards
|
||||||
|
individual_rewards: true
|
||||||
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
|
level_name: two_rooms
|
||||||
|
# View Radius
|
||||||
|
pomdp_r: 0 # 0 = full observability
|
||||||
|
# Print all messages and events
|
||||||
|
verbose: false
|
||||||
|
# Run tests
|
||||||
|
tests: false
|
||||||
|
|
||||||
|
# Define Agents, their actions, observations and spawnpoints
|
||||||
|
Agents:
|
||||||
|
Agent1:
|
||||||
|
Actions:
|
||||||
|
- Move4
|
||||||
|
Observations:
|
||||||
|
- DirtPiles
|
||||||
|
- Self
|
||||||
|
Positions:
|
||||||
|
- (3,1)
|
||||||
|
- (1,1)
|
||||||
|
- (3,1)
|
||||||
|
- (5,1)
|
||||||
|
- (3,1)
|
||||||
|
- (1,8)
|
||||||
|
- (3,1)
|
||||||
|
- (5,8)
|
||||||
|
|
||||||
|
Entities:
|
||||||
|
DirtPiles:
|
||||||
|
coords_or_quantity: (2,1), (3,12) # Locations of dirt piles
|
||||||
|
initial_amount: 0.5
|
||||||
|
clean_amount: 1
|
||||||
|
dirt_spawn_r_var: 0
|
||||||
|
max_global_amount: 12
|
||||||
|
max_local_amount: 1
|
||||||
|
|
||||||
|
#Doors: { } # We leave out the door during training
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
# Utilities
|
||||||
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
|
WatchCollisions:
|
||||||
|
done_at_collisions: false
|
||||||
|
|
||||||
|
# Done Conditions
|
||||||
|
# Define the conditions for the environment to stop. Either success or a fail conditions
|
||||||
|
# The environment stops when all dirt is cleaned
|
||||||
|
DoneOnAllDirtCleaned:
|
||||||
|
|
||||||
|
# Define how agents spawn.
|
||||||
|
# Options: "random" (Spawn agent at a random position from the list of defined positions)
|
||||||
|
# "first" (Always spawn agent at first position regardless of the other provided positions)
|
||||||
|
# "order" (Loop through agent positions)
|
||||||
|
AgentSpawnRule:
|
||||||
|
spawn_rule: "order"
|
||||||
@@ -1,30 +1,20 @@
|
|||||||
General:
|
General:
|
||||||
|
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||||
env_seed: 69
|
env_seed: 69
|
||||||
# Individual vs global rewards
|
# Individual vs global rewards
|
||||||
individual_rewards: true
|
individual_rewards: true
|
||||||
# The level.txt file to load from marl_factory_grid/levels
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
level_name: two_rooms
|
level_name: two_rooms
|
||||||
# View Radius; 0 = full observatbility
|
# View Radius
|
||||||
pomdp_r: 0
|
pomdp_r: 0 # 0 = full observability
|
||||||
# Print all messages and events
|
# Print all messages and events
|
||||||
verbose: false
|
verbose: false
|
||||||
# Run tests
|
# Run tests
|
||||||
tests: false
|
tests: false
|
||||||
|
|
||||||
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
|
# Define Agents, their actions, observations and spawnpoints
|
||||||
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
|
|
||||||
Agents:
|
Agents:
|
||||||
#Sigmund:
|
Agent2:
|
||||||
#Actions:
|
|
||||||
#- Move4
|
|
||||||
#- DoorUse
|
|
||||||
#Observations:
|
|
||||||
#- DirtPiles
|
|
||||||
#- Self
|
|
||||||
#Positions:
|
|
||||||
#- (3,1)
|
|
||||||
#- (2,1)
|
|
||||||
Wolfgang:
|
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
- DoorUse
|
- DoorUse
|
||||||
@@ -37,8 +27,8 @@ Agents:
|
|||||||
|
|
||||||
Entities:
|
Entities:
|
||||||
DirtPiles:
|
DirtPiles:
|
||||||
coords_or_quantity: (2,13), (3,2) # (2,1), (3,12)
|
coords_or_quantity: (2,13), (3,2) # Locations of dirt piles
|
||||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
initial_amount: 0.5
|
||||||
clean_amount: 1
|
clean_amount: 1
|
||||||
dirt_spawn_r_var: 0
|
dirt_spawn_r_var: 0
|
||||||
max_global_amount: 12
|
max_global_amount: 12
|
||||||
@@ -46,17 +36,15 @@ Entities:
|
|||||||
|
|
||||||
Doors: { }
|
Doors: { }
|
||||||
|
|
||||||
|
# Rules section specifies the rules governing the dynamics of the environment.
|
||||||
Rules:
|
Rules:
|
||||||
# Environment Dynamics
|
|
||||||
#DoorAutoClose:
|
|
||||||
#close_frequency: 10
|
|
||||||
|
|
||||||
# Utilities
|
# Utilities
|
||||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
WatchCollisions:
|
WatchCollisions:
|
||||||
done_at_collisions: false
|
done_at_collisions: false
|
||||||
|
|
||||||
# Done Conditions
|
# Done Conditions
|
||||||
#DoneOnAllDirtCleaned:
|
# Define the conditions for the environment to stop. Either success or a fail conditions
|
||||||
|
# Environment execution stops after 30 steps
|
||||||
DoneAtMaxStepsReached:
|
DoneAtMaxStepsReached:
|
||||||
max_steps: 50
|
max_steps: 30
|
||||||
@@ -1,35 +1,20 @@
|
|||||||
General:
|
General:
|
||||||
|
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||||
env_seed: 69
|
env_seed: 69
|
||||||
# Individual vs global rewards
|
# Individual vs global rewards
|
||||||
individual_rewards: true
|
individual_rewards: true
|
||||||
# The level.txt file to load from marl_factory_grid/levels
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
level_name: two_rooms
|
level_name: two_rooms
|
||||||
# View Radius; 0 = full observatbility
|
# View Radius
|
||||||
pomdp_r: 0
|
pomdp_r: 0 # 0 = full observability
|
||||||
# Print all messages and events
|
# Print all messages and events
|
||||||
verbose: false
|
verbose: false
|
||||||
# Run tests
|
# Run tests
|
||||||
tests: false
|
tests: false
|
||||||
|
|
||||||
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
|
# Define Agents, their actions, observations and spawnpoints
|
||||||
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
|
|
||||||
Agents:
|
Agents:
|
||||||
#Sigmund:
|
Agent2:
|
||||||
#Actions:
|
|
||||||
#- Move4
|
|
||||||
#Observations:
|
|
||||||
#- DirtPiles
|
|
||||||
#- Self
|
|
||||||
#Positions:
|
|
||||||
#- (3,1)
|
|
||||||
#- (1,1)
|
|
||||||
#- (3,1)
|
|
||||||
#- (5,1)
|
|
||||||
#- (3,1)
|
|
||||||
#- (1,8)
|
|
||||||
#- (3,1)
|
|
||||||
#- (5,8)
|
|
||||||
Wolfgang:
|
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
Observations:
|
Observations:
|
||||||
@@ -47,29 +32,30 @@ Agents:
|
|||||||
|
|
||||||
Entities:
|
Entities:
|
||||||
DirtPiles:
|
DirtPiles:
|
||||||
coords_or_quantity: (2,13), (3,2) # (2,1), (3,12)
|
coords_or_quantity: (2,13), (3,2) # Locations of dirt piles
|
||||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
initial_amount: 0.5
|
||||||
clean_amount: 1
|
clean_amount: 1
|
||||||
dirt_spawn_r_var: 0
|
dirt_spawn_r_var: 0
|
||||||
max_global_amount: 12
|
max_global_amount: 12
|
||||||
max_local_amount: 1
|
max_local_amount: 1
|
||||||
|
|
||||||
#Doors: { }
|
#Doors: { } # We leave out the door during training
|
||||||
|
|
||||||
|
# Rules section specifies the rules governing the dynamics of the environment.
|
||||||
Rules:
|
Rules:
|
||||||
# Environment Dynamics
|
|
||||||
#DoorAutoClose:
|
|
||||||
#close_frequency: 10
|
|
||||||
|
|
||||||
# Utilities
|
# Utilities
|
||||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
WatchCollisions:
|
WatchCollisions:
|
||||||
done_at_collisions: false
|
done_at_collisions: false
|
||||||
|
|
||||||
# Done Conditions
|
# Done Conditions
|
||||||
|
# Define the conditions for the environment to stop. Either success or a fail conditions
|
||||||
|
# The environment stops when all dirt is cleaned
|
||||||
DoneOnAllDirtCleaned:
|
DoneOnAllDirtCleaned:
|
||||||
#DoneAtMaxStepsReached:
|
|
||||||
#max_steps: 100
|
|
||||||
|
|
||||||
|
# Defines how agents spawn.
|
||||||
|
# Options: "random" (Spawn agent at a random position from the list of defined positions)
|
||||||
|
# "first" (Always spawn agent at first position regardless of the other provided positions)
|
||||||
|
# "order" (Loop through agent positions)
|
||||||
AgentSpawnRule:
|
AgentSpawnRule:
|
||||||
spawn_rule: "order"
|
spawn_rule: "order"
|
||||||
@@ -5,37 +5,34 @@ General:
|
|||||||
individual_rewards: true
|
individual_rewards: true
|
||||||
# The level.txt file to load from marl_factory_grid/levels
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
level_name: quadrant
|
level_name: quadrant
|
||||||
# Radius of Partially observable Markov decision process
|
# View Radius
|
||||||
pomdp_r: 0 # default 3
|
pomdp_r: 0 # 0 = full observability
|
||||||
# Print all messages and events
|
# Print all messages and events
|
||||||
verbose: false
|
verbose: false
|
||||||
# Run tests
|
# Run tests
|
||||||
tests: false
|
tests: false
|
||||||
|
|
||||||
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
# Define Agents, their actions, observations and spawnpoints
|
||||||
# other agents aim to clean dirt piles.
|
|
||||||
Agents:
|
Agents:
|
||||||
# The clean agents
|
# The clean agents
|
||||||
Wolfgang:
|
Agent1:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
- Clean
|
- Clean
|
||||||
- Noop
|
- Noop
|
||||||
Observations:
|
Observations:
|
||||||
- Walls
|
- Walls
|
||||||
- Other
|
|
||||||
- DirtPiles
|
- DirtPiles
|
||||||
- Self
|
- Self
|
||||||
Positions:
|
Positions:
|
||||||
- (9,1)
|
- (9,1)
|
||||||
Reiner:
|
Agent2:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
- Clean
|
- Clean
|
||||||
- Noop
|
- Noop
|
||||||
Observations:
|
Observations:
|
||||||
- Walls
|
- Walls
|
||||||
- Other
|
|
||||||
- DirtPiles
|
- DirtPiles
|
||||||
- Self
|
- Self
|
||||||
Positions:
|
Positions:
|
||||||
@@ -44,7 +41,7 @@ Agents:
|
|||||||
Entities:
|
Entities:
|
||||||
DirtPiles:
|
DirtPiles:
|
||||||
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9)
|
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9)
|
||||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
initial_amount: 0.5
|
||||||
clean_amount: 1
|
clean_amount: 1
|
||||||
dirt_spawn_r_var: 0
|
dirt_spawn_r_var: 0
|
||||||
max_global_amount: 12
|
max_global_amount: 12
|
||||||
@@ -63,5 +60,3 @@ Rules:
|
|||||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||||
# The environment stops when all dirt is cleaned
|
# The environment stops when all dirt is cleaned
|
||||||
DoneOnAllDirtCleaned:
|
DoneOnAllDirtCleaned:
|
||||||
DoneAtMaxStepsReached:
|
|
||||||
max_steps: 200
|
|
||||||
|
|||||||
@@ -1,40 +1,38 @@
|
|||||||
General:
|
General:
|
||||||
|
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||||
env_seed: 69
|
env_seed: 69
|
||||||
# Individual vs global rewards
|
# Individual vs global rewards
|
||||||
individual_rewards: true
|
individual_rewards: true
|
||||||
# The level.txt file to load from marl_factory_grid/levels
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
level_name: two_rooms
|
level_name: two_rooms
|
||||||
# View Radius; 0 = full observatbility
|
# View Radius
|
||||||
pomdp_r: 0
|
pomdp_r: 0 # 0 = full observability
|
||||||
# Print all messages and events
|
# Print all messages and events
|
||||||
verbose: false
|
verbose: false
|
||||||
# Run tests
|
# Run tests
|
||||||
tests: false
|
tests: false
|
||||||
|
|
||||||
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
|
# Define Agents, their actions, observations and spawnpoints
|
||||||
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
|
|
||||||
Agents:
|
Agents:
|
||||||
Wolfgang:
|
Agent1:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
- Noop
|
- Noop
|
||||||
- DestAction
|
- DestAction # Action that is performed when the destination is reached
|
||||||
- DoorUse
|
- DoorUse
|
||||||
Observations:
|
Observations:
|
||||||
- Walls
|
- Walls
|
||||||
- Other
|
|
||||||
- Doors
|
- Doors
|
||||||
- Destination
|
- Destination
|
||||||
Positions:
|
Positions:
|
||||||
- (3,1) # Agent spawnpoint
|
- (3,1)
|
||||||
Sigmund:
|
Agent2:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
- Noop
|
- Noop
|
||||||
- DestAction
|
- DestAction
|
||||||
- DoorUse
|
- DoorUse
|
||||||
Observations:
|
Observations:
|
||||||
- Other
|
|
||||||
- Walls
|
- Walls
|
||||||
- Destination
|
- Destination
|
||||||
- Doors
|
- Doors
|
||||||
@@ -45,10 +43,11 @@ Entities:
|
|||||||
Destinations:
|
Destinations:
|
||||||
spawnrule:
|
spawnrule:
|
||||||
SpawnDestinationsPerAgent:
|
SpawnDestinationsPerAgent:
|
||||||
|
# Target coordinates
|
||||||
coords_or_quantity:
|
coords_or_quantity:
|
||||||
Wolfgang:
|
Agent1:
|
||||||
- (3,12) # Target coordinates
|
- (3,12)
|
||||||
Sigmund:
|
Agent2:
|
||||||
- (3,2)
|
- (3,2)
|
||||||
|
|
||||||
Doors: { }
|
Doors: { }
|
||||||
@@ -68,10 +67,12 @@ Rules:
|
|||||||
AssignGlobalPositions: { }
|
AssignGlobalPositions: { }
|
||||||
|
|
||||||
DoneAtDestinationReach:
|
DoneAtDestinationReach:
|
||||||
reward_at_done: 1
|
reward_at_done: 50
|
||||||
# We want to give rewards only, when all targets have been reached.
|
# We want to give rewards only, when all targets have been reached.
|
||||||
condition: "all"
|
condition: "all"
|
||||||
|
|
||||||
# Done Conditions
|
# Done Conditions
|
||||||
|
# Define the conditions for the environment to stop. Either success or a fail conditions
|
||||||
|
# Environment execution stops after 30 steps
|
||||||
DoneAtMaxStepsReached:
|
DoneAtMaxStepsReached:
|
||||||
max_steps: 50
|
max_steps: 30
|
||||||
|
|||||||
@@ -293,9 +293,6 @@ class Factory(gym.Env):
|
|||||||
render_entity.aux = self.obs_builder.curr_lightmaps[render_entity.real_name]
|
render_entity.aux = self.obs_builder.curr_lightmaps[render_entity.real_name]
|
||||||
return self._renderer.render(render_entities)
|
return self._renderer.render(render_entities)
|
||||||
|
|
||||||
def set_recorder(self, recorder):
|
|
||||||
self._recorder = recorder
|
|
||||||
|
|
||||||
def summarize_header(self):
|
def summarize_header(self):
|
||||||
header = {'rec_step': self.state.curr_step}
|
header = {'rec_step': self.state.curr_step}
|
||||||
for entity_group in (x for x in self.state if x.name in ['Walls', 'DropOffLocations', 'ChargePods']):
|
for entity_group in (x for x in self.state if x.name in ['Walls', 'DropOffLocations', 'ChargePods']):
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ from typing import List, Tuple
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from marl_factory_grid.algorithms.static.utils import points_to_graph
|
from marl_factory_grid.algorithms.tsp.utils import points_to_graph
|
||||||
from marl_factory_grid.environment import constants as c
|
from marl_factory_grid.environment import constants as c
|
||||||
from marl_factory_grid.environment.entity.entity import Entity
|
from marl_factory_grid.environment.entity.entity import Entity
|
||||||
from marl_factory_grid.environment.rules import Rule, SpawnAgents
|
from marl_factory_grid.environment.rules import Rule, SpawnAgents
|
||||||
|
|||||||
@@ -1,93 +0,0 @@
|
|||||||
import copy
|
|
||||||
from pathlib import Path
|
|
||||||
from marl_factory_grid.algorithms.marl.a2c_dirt import A2C
|
|
||||||
from marl_factory_grid.algorithms.utils import load_yaml_file
|
|
||||||
|
|
||||||
def single_agent_training(config_name):
|
|
||||||
cfg_path = Path(f'../marl_factory_grid/algorithms/marl/single_agent_configs/{config_name}_config.yaml')
|
|
||||||
|
|
||||||
train_cfg = load_yaml_file(cfg_path)
|
|
||||||
# Use environment config with fixed spawnpoints for eval
|
|
||||||
eval_cfg = copy.deepcopy(train_cfg)
|
|
||||||
eval_cfg["env"]["env_name"] = f"rl/{config_name}_eval_config"
|
|
||||||
|
|
||||||
print("Training phase")
|
|
||||||
agent = A2C(train_cfg, eval_cfg)
|
|
||||||
agent.train_loop()
|
|
||||||
print("Evaluation phase")
|
|
||||||
# Have consecutive episode for eval in single agent case
|
|
||||||
train_cfg["algorithm"]["pile_all_done"] = "all"
|
|
||||||
agent.eval_loop(10)
|
|
||||||
|
|
||||||
|
|
||||||
def single_agent_eval(config_name, run):
|
|
||||||
cfg_path = Path(f'../marl_factory_grid/algorithms/marl/single_agent_configs/{config_name}_config.yaml')
|
|
||||||
|
|
||||||
train_cfg = load_yaml_file(cfg_path)
|
|
||||||
# Use environment config with fixed spawnpoints for eval
|
|
||||||
eval_cfg = copy.deepcopy(train_cfg)
|
|
||||||
eval_cfg["env"]["env_name"] = f"rl/{config_name}_eval_config"
|
|
||||||
agent = A2C(train_cfg, eval_cfg)
|
|
||||||
print("Evaluation phase")
|
|
||||||
agent.load_agents(run)
|
|
||||||
agent.eval_loop(1)
|
|
||||||
|
|
||||||
|
|
||||||
def multi_agent_eval(config_name, runs, emergent_phenomenon=False):
|
|
||||||
cfg_path = Path(f'../marl_factory_grid/algorithms/marl/multi_agent_configs/{config_name}_config.yaml')
|
|
||||||
|
|
||||||
eval_cfg = load_yaml_file(cfg_path)
|
|
||||||
# Sanity setting of required attributes and configs
|
|
||||||
if config_name == "two_rooms":
|
|
||||||
if emergent_phenomenon:
|
|
||||||
eval_cfg["env"]["env_name"] = f"marl_eval/{config_name}_eval_config_emergent"
|
|
||||||
eval_cfg["algorithm"]["auxiliary_piles"] = False
|
|
||||||
else:
|
|
||||||
eval_cfg["algorithm"]["auxiliary_piles"] = True
|
|
||||||
elif config_name == "dirt_quadrant":
|
|
||||||
if emergent_phenomenon:
|
|
||||||
eval_cfg["algorithm"]["pile-order"] = "dynamic"
|
|
||||||
else:
|
|
||||||
eval_cfg["algorithm"]["pile-order"] = "smart"
|
|
||||||
agent = A2C(train_cfg=eval_cfg, eval_cfg=eval_cfg)
|
|
||||||
print("Evaluation phase")
|
|
||||||
agent.load_agents(runs)
|
|
||||||
agent.eval_loop(1)
|
|
||||||
|
|
||||||
|
|
||||||
def dirt_quadrant_single_agent_training():
|
|
||||||
single_agent_training("dirt_quadrant")
|
|
||||||
|
|
||||||
|
|
||||||
def two_rooms_one_door_modified_single_agent_training():
|
|
||||||
single_agent_training("two_rooms")
|
|
||||||
|
|
||||||
|
|
||||||
def dirt_quadrant_single_agent_eval(agent_name):
|
|
||||||
if agent_name == "Sigmund":
|
|
||||||
run = "run0"
|
|
||||||
elif agent_name == "Wolfgang":
|
|
||||||
run = "run1"
|
|
||||||
single_agent_eval("dirt_quadrant", [run])
|
|
||||||
|
|
||||||
|
|
||||||
def two_rooms_one_door_modified_single_agent_eval(agent_name):
|
|
||||||
if agent_name == "Sigmund":
|
|
||||||
run = "run2"
|
|
||||||
elif agent_name == "Wolfgang":
|
|
||||||
run = "run3"
|
|
||||||
single_agent_eval("two_rooms", [run])
|
|
||||||
|
|
||||||
|
|
||||||
def dirt_quadrant_5_multi_agent_eval(emergent_phenomenon):
|
|
||||||
multi_agent_eval("dirt_quadrant", ["run4", "run5"], emergent_phenomenon)
|
|
||||||
|
|
||||||
def dirt_quadrant_5_multi_agent_ctde_eval(emergent_phenomenon): # run7 == run4
|
|
||||||
multi_agent_eval("dirt_quadrant", ["run4", "run7"], emergent_phenomenon)
|
|
||||||
|
|
||||||
def two_rooms_one_door_modified_multi_agent_eval(emergent_phenomenon):
|
|
||||||
multi_agent_eval("two_rooms", ["run2", "run3"], emergent_phenomenon)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
two_rooms_one_door_modified_multi_agent_eval(False)
|
|
||||||
75
studies/rl_runs.py
Normal file
75
studies/rl_runs.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
from marl_factory_grid.algorithms.rl.a2c_dirt import A2C
|
||||||
|
from marl_factory_grid.algorithms.utils import load_yaml_file
|
||||||
|
|
||||||
|
|
||||||
|
def dirt_quadrant_agent1_training():
|
||||||
|
train_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_train_config.yaml')
|
||||||
|
eval_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_eval_config.yaml')
|
||||||
|
train_cfg = load_yaml_file(train_cfg_path)
|
||||||
|
eval_cfg = load_yaml_file(eval_cfg_path)
|
||||||
|
|
||||||
|
print("Training phase")
|
||||||
|
agent = A2C(train_cfg, eval_cfg)
|
||||||
|
agent.train_loop()
|
||||||
|
print("Evaluation phase")
|
||||||
|
agent.eval_loop(n_episodes=1)
|
||||||
|
|
||||||
|
|
||||||
|
def two_rooms_training(max_steps, agent_name):
|
||||||
|
train_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_train_config.yaml')
|
||||||
|
eval_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_eval_config.yaml')
|
||||||
|
train_cfg = load_yaml_file(train_cfg_path)
|
||||||
|
eval_cfg = load_yaml_file(eval_cfg_path)
|
||||||
|
|
||||||
|
train_cfg["algorithm"]["max_steps"] = max_steps
|
||||||
|
train_cfg["env"]["env_name"] = f"rl/two_rooms_{agent_name}_train_config"
|
||||||
|
eval_cfg["env"]["env_name"] = f"rl/two_rooms_{agent_name}_eval_config"
|
||||||
|
print("Training phase")
|
||||||
|
agent = A2C(train_cfg, eval_cfg)
|
||||||
|
agent.train_loop()
|
||||||
|
print("Evaluation phase")
|
||||||
|
agent.eval_loop(n_episodes=1)
|
||||||
|
|
||||||
|
|
||||||
|
def two_rooms_agent1_training():
|
||||||
|
two_rooms_training(max_steps=190000, agent_name="agent1")
|
||||||
|
|
||||||
|
|
||||||
|
def two_rooms_agent2_training():
|
||||||
|
two_rooms_training(max_steps=260000, agent_name="agent2")
|
||||||
|
|
||||||
|
|
||||||
|
def single_agent_eval(config_name, run_folder_name):
|
||||||
|
eval_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/{config_name}_eval_config.yaml')
|
||||||
|
train_cfg = eval_cfg = load_yaml_file(eval_cfg_path)
|
||||||
|
|
||||||
|
# A value for train_cfg is required, but the train environment won't be used
|
||||||
|
agent = A2C(train_cfg=train_cfg, eval_cfg=eval_cfg)
|
||||||
|
print("Evaluation phase")
|
||||||
|
agent.load_agents([run_folder_name])
|
||||||
|
agent.eval_loop(1)
|
||||||
|
|
||||||
|
|
||||||
|
def multi_agent_eval(config_name, runs, emergent_phenomenon=False):
|
||||||
|
eval_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/multi_agent_configs/{config_name}' +
|
||||||
|
f'_eval_config{"_emergent" if emergent_phenomenon else ""}.yaml')
|
||||||
|
eval_cfg = load_yaml_file(eval_cfg_path)
|
||||||
|
|
||||||
|
# A value for train_cfg is required, but the train environment won't be used
|
||||||
|
agent = A2C(train_cfg=eval_cfg, eval_cfg=eval_cfg)
|
||||||
|
print("Evaluation phase")
|
||||||
|
agent.load_agents(runs)
|
||||||
|
agent.eval_loop(1)
|
||||||
|
|
||||||
|
|
||||||
|
def dirt_quadrant_multi_agent_ctde_eval(emergent_phenomenon):
|
||||||
|
multi_agent_eval("dirt_quadrant", ["run0", "run0"], emergent_phenomenon)
|
||||||
|
|
||||||
|
|
||||||
|
def two_rooms_multi_agent_eval(emergent_phenomenon):
|
||||||
|
multi_agent_eval("two_rooms", ["run1", "run2"], emergent_phenomenon)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
dirt_quadrant_agent1_training()
|
||||||
@@ -4,10 +4,11 @@ from pathlib import Path
|
|||||||
|
|
||||||
from tqdm import trange
|
from tqdm import trange
|
||||||
|
|
||||||
from marl_factory_grid.algorithms.static.TSP_dirt_agent import TSPDirtAgent
|
from marl_factory_grid.algorithms.tsp.TSP_dirt_agent import TSPDirtAgent
|
||||||
from marl_factory_grid.algorithms.static.TSP_target_agent import TSPTargetAgent
|
from marl_factory_grid.algorithms.tsp.TSP_target_agent import TSPTargetAgent
|
||||||
from marl_factory_grid.environment.factory import Factory
|
from marl_factory_grid.environment.factory import Factory
|
||||||
|
|
||||||
|
|
||||||
def get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory):
|
def get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory):
|
||||||
agents = [TSPDirtAgent(factory, 0), TSPDirtAgent(factory, 1)]
|
agents = [TSPDirtAgent(factory, 0), TSPDirtAgent(factory, 1)]
|
||||||
if not emergent_phenomenon:
|
if not emergent_phenomenon:
|
||||||
@@ -31,13 +32,11 @@ def get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory):
|
|||||||
for u, v, weight in agent._position_graph.edges(data='weight'):
|
for u, v, weight in agent._position_graph.edges(data='weight'):
|
||||||
agent._position_graph[u][v]['weight'] = edge_costs[f"{u}-{v}"]
|
agent._position_graph[u][v]['weight'] = edge_costs[f"{u}-{v}"]
|
||||||
|
|
||||||
"""for u, v, weight in agent._position_graph.edges(data='weight'):
|
|
||||||
print(f"Edge ({u}-{v}) has weight: {weight}")"""
|
|
||||||
|
|
||||||
return agents
|
return agents
|
||||||
|
|
||||||
|
|
||||||
def get_two_rooms_one_door_modified_tsp_agents(emergent_phenomenon, factory):
|
def get_two_rooms_tsp_agents(emergent_phenomenon, factory):
|
||||||
agents = [TSPTargetAgent(factory, 0), TSPTargetAgent(factory, 1)]
|
agents = [TSPTargetAgent(factory, 0), TSPTargetAgent(factory, 1)]
|
||||||
if not emergent_phenomenon:
|
if not emergent_phenomenon:
|
||||||
print(emergent_phenomenon)
|
print(emergent_phenomenon)
|
||||||
@@ -45,6 +44,7 @@ def get_two_rooms_one_door_modified_tsp_agents(emergent_phenomenon, factory):
|
|||||||
agent._position_graph[(3, 1)][(3, 2)]['weight'] = 4
|
agent._position_graph[(3, 1)][(3, 2)]['weight'] = 4
|
||||||
return agents
|
return agents
|
||||||
|
|
||||||
|
|
||||||
def run_tsp_setting(config_name, emergent_phenomenon):
|
def run_tsp_setting(config_name, emergent_phenomenon):
|
||||||
# Render at each step?
|
# Render at each step?
|
||||||
render = True
|
render = True
|
||||||
@@ -74,7 +74,7 @@ def run_tsp_setting(config_name, emergent_phenomenon):
|
|||||||
if config_name == "dirt_quadrant":
|
if config_name == "dirt_quadrant":
|
||||||
agents = get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory)
|
agents = get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory)
|
||||||
elif config_name == "two_rooms":
|
elif config_name == "two_rooms":
|
||||||
agents = get_two_rooms_one_door_modified_tsp_agents(emergent_phenomenon, factory)
|
agents = get_two_rooms_tsp_agents(emergent_phenomenon, factory)
|
||||||
else:
|
else:
|
||||||
print("Config name does not exist. Abort...")
|
print("Config name does not exist. Abort...")
|
||||||
break
|
break
|
||||||
@@ -95,7 +95,7 @@ def dirt_quadrant_multi_agent_tsp(emergent_phenomenon):
|
|||||||
run_tsp_setting("dirt_quadrant", emergent_phenomenon)
|
run_tsp_setting("dirt_quadrant", emergent_phenomenon)
|
||||||
|
|
||||||
|
|
||||||
def two_rooms_one_door_modified_multi_agent_tsp(emergent_phenomenon):
|
def two_rooms_multi_agent_tsp(emergent_phenomenon):
|
||||||
run_tsp_setting("two_rooms", emergent_phenomenon)
|
run_tsp_setting("two_rooms", emergent_phenomenon)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user