Code cleaning part 2

This commit is contained in:
Julian Schönberger
2024-05-24 23:56:00 +02:00
parent 6e6ce9dc5d
commit 81f0f6e209
36 changed files with 421 additions and 495 deletions

View File

@ -1,34 +0,0 @@
agent:
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
n_agents: 2
obs_emb_size: 96
action_emb_size: 16
hidden_size_actor: 64
hidden_size_critic: 64
use_agent_embedding: False
env:
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/dirt_quadrant_eval_config"
n_agents: 2
max_steps: 250
pomdp_r: 2
stack_n_frames: 0
individual_rewards: True
train_render: False
eval_render: True
save_and_log: True
record: False
method: marl_factory_grid.algorithms.marl.LoopSEAC
algorithm:
gamma: 0.99
entropy_coef: 0.01
vf_coef: 0.05
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
max_steps: 200000
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
pile-order: "dynamic" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
pile-observability: "single" # Options: "single", "all"
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
auxiliary_piles: False # Option that is only considered when pile-order = "agents"
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)

View File

@ -1,35 +0,0 @@
agent:
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
n_agents: 2
obs_emb_size: 96
action_emb_size: 16
hidden_size_actor: 64
hidden_size_critic: 64
use_agent_embedding: False
env:
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/two_rooms_eval_config"
n_agents: 2
max_steps: 250
pomdp_r: 2
stack_n_frames: 0
individual_rewards: True
train_render: False
eval_render: True
save_and_log: True
record: False
method: marl_factory_grid.algorithms.marl.LoopSEAC
algorithm:
gamma: 0.99
entropy_coef: 0.01
vf_coef: 0.05
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
max_steps: 260000
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
auxiliary_piles: True # Use True to see emergent phenomenon and False to prevent it
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)

View File

@ -1,44 +1,23 @@
import copy
import os
import random
import imageio # requires ffmpeg install on operating system and imageio-ffmpeg package for python
from scipy import signal
import matplotlib.pyplot as plt
import torch
from typing import Union, List, Dict
from typing import Union, List
import numpy as np
from torch.distributions import Categorical
from marl_factory_grid.algorithms.marl.base_a2c import PolicyGradient, cumulate_discount
from marl_factory_grid.algorithms.utils import add_env_props, instantiate_class
from pathlib import Path
from collections import deque
from marl_factory_grid.environment.actions import Noop
from marl_factory_grid.modules import Clean, DoorUse
from marl_factory_grid.algorithms.rl.base_a2c import PolicyGradient, cumulate_discount
from marl_factory_grid.algorithms.utils import add_env_props
from marl_factory_grid.utils.plotting.plot_single_runs import plot_action_maps
class Names:
REWARD = 'reward'
DONE = 'done'
ACTION = 'action'
OBSERVATION = 'observation'
LOGITS = 'logits'
HIDDEN_ACTOR = 'hidden_actor'
HIDDEN_CRITIC = 'hidden_critic'
AGENT = 'agent'
ENV = 'env'
ENV_NAME = 'env_name'
N_AGENTS = 'n_agents'
ALGORITHM = 'algorithm'
MAX_STEPS = 'max_steps'
N_STEPS = 'n_steps'
BUFFER_SIZE = 'buffer_size'
CRITIC = 'critic'
BATCH_SIZE = 'bnatch_size'
N_ACTIONS = 'n_actions'
TRAIN_RENDER = 'train_render'
EVAL_RENDER = 'eval_render'
@ -55,7 +34,7 @@ class A2C:
self.train_cfg = train_cfg
self.eval_cfg = eval_cfg
self.cfg = train_cfg
self.n_agents = train_cfg[nms.AGENT][nms.N_AGENTS]
self.n_agents = train_cfg[nms.ENV][nms.N_AGENTS]
self.setup()
self.reward_development = []
self.action_probabilities = {agent_idx:[] for agent_idx in range(self.n_agents)}
@ -80,8 +59,6 @@ class A2C:
os.mkdir(self.results_path)
# Save settings in results folder
self.save_configs()
if self.cfg[nms.ENV]["record"]:
self.recorder = imageio.get_writer(f'{self.results_path}/pygame_recording.mp4', fps=5)
def set_cfg(self, eval=False):
if eval:
@ -610,8 +587,6 @@ class A2C:
obs = env.reset()
self.set_agent_spawnpoint(env)
if self.cfg[nms.ENV][nms.EVAL_RENDER]:
if self.cfg[nms.ENV]["save_and_log"] and self.cfg[nms.ENV]["record"]:
env.set_recorder(self.recorder)
if self.cfg[nms.ALGORITHM]["auxiliary_piles"]:
# Don't render auxiliary piles
auxiliary_piles = [pile for idx, pile in enumerate(env.state.entities['DirtPiles']) if idx % 2 == 0]
@ -664,10 +639,6 @@ class A2C:
episode += 1
# Properly finalize the video file
if self.cfg[nms.ENV]["save_and_log"] and self.cfg[nms.ENV]["record"]:
self.recorder.close()
def plot_reward_development(self):
smoothed_data = np.convolve(self.reward_development, np.ones(10) / 10, mode='valid')
plt.plot(smoothed_data)
@ -689,16 +660,14 @@ class A2C:
def save_agent_models(self):
for idx, agent in enumerate(self.agents):
agent_name = list(self.factory.state.agents_conf.keys())[idx]
agent.pi.save_model_parameters(self.results_path, agent_name)
agent.vf.save_model_parameters(self.results_path, agent_name)
agent.pi.save_model_parameters(self.results_path)
agent.vf.save_model_parameters(self.results_path)
def load_agents(self, runs_list):
for idx, run in enumerate(runs_list):
run_path = f"../study_out/{run}"
agent_name = list(self.eval_factory.state.agents_conf.keys())[idx]
self.agents[idx].pi.load_model_parameters(f"{run_path}/{agent_name}_PolicyNet_model_parameters.pth")
self.agents[idx].vf.load_model_parameters(f"{run_path}/{agent_name}_ValueNet_model_parameters.pth")
self.agents[idx].pi.load_model_parameters(f"{run_path}/PolicyNet_model_parameters.pth")
self.agents[idx].vf.load_model_parameters(f"{run_path}/ValueNet_model_parameters.pth")
def create_info_maps(self, env, used_actions):
# Create value map

View File

@ -19,11 +19,11 @@ class Net(th.nn.Module):
if module.bias is not None:
nn.init.uniform_(module.bias, a=-0.1, b=0.1)
def save_model(self, path, agent_name):
th.save(self.net, f"{path}/{agent_name}_{self.__class__.__name__}_model.pth")
def save_model(self, path):
th.save(self.net, f"{path}/{self.__class__.__name__}_model.pth")
def save_model_parameters(self, path, agent_name):
th.save(self.net.state_dict(), f"{path}/{agent_name}_{self.__class__.__name__}_model_parameters.pth")
def save_model_parameters(self, path):
th.save(self.net.state_dict(), f"{path}/{self.__class__.__name__}_model_parameters.pth")
def load_model_parameters(self, path):
self.net.load_state_dict(th.load(path))

View File

@ -0,0 +1,11 @@
env:
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/dirt_quadrant_eval_config"
n_agents: 2
eval_render: True
save_and_log: False
algorithm:
pile-order: "smart" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
pile-observability: "single" # Options: "single", "all"
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
auxiliary_piles: False # Dirt quadrant does not use this option

View File

@ -0,0 +1,11 @@
env:
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/dirt_quadrant_eval_config"
n_agents: 2
eval_render: True
save_and_log: False
algorithm:
pile-order: "dynamic" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
pile-observability: "single" # Options: "single", "all"
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
auxiliary_piles: False # Dirt quadrant does not use this option

View File

@ -0,0 +1,13 @@
env:
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/two_rooms_eval_config"
n_agents: 2
eval_render: True
save_and_log: False
algorithm:
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
auxiliary_piles: True # Use True to see emergent phenomenon and False to prevent it

View File

@ -0,0 +1,13 @@
env:
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/two_rooms_eval_config_emergent"
n_agents: 2
eval_render: True
save_and_log: False
algorithm:
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
auxiliary_piles: False # Use True to see emergent phenomenon and False to prevent it

View File

@ -0,0 +1,12 @@
env:
classname: marl_factory_grid.environment.configs.rl
env_name: "rl/dirt_quadrant_agent1_eval_config"
n_agents: 1
eval_render: True
save_and_log: False
algorithm:
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "all" #
auxiliary_piles: False # Dirt quadrant does not use this option

View File

@ -1,34 +1,17 @@
agent:
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
n_agents: 1
obs_emb_size: 96
action_emb_size: 16
hidden_size_actor: 64
hidden_size_critic: 64
use_agent_embedding: False
env:
classname: marl_factory_grid.environment.configs.rl
env_name: "rl/dirt_quadrant_train_config"
env_name: "rl/dirt_quadrant_agent1_train_config"
n_agents: 1
max_steps: 250
pomdp_r: 2
stack_n_frames: 0
individual_rewards: True
train_render: False
eval_render: True
save_and_log: True
record: False
method: marl_factory_grid.algorithms.marl.LoopSEAC
algorithm:
gamma: 0.99
entropy_coef: 0.01
vf_coef: 0.05
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
max_steps: 240000
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
max_steps: 140000
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "single" # Options: "single", "all" ("single" for training, "all" for eval)
auxiliary_piles: False # Option that is only considered when pile-order = "agents"
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
auxiliary_piles: False # Dirt quadrant does not use this option

View File

@ -0,0 +1,13 @@
env:
classname: marl_factory_grid.environment.configs.rl
env_name: "rl/two_rooms_eval_config"
n_agents: 1
eval_render: True
save_and_log: False
algorithm:
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "all" # Options: "single", "all" ("single" for training, "all" for eval)
auxiliary_piles: False # Auxiliary piles are only used during marl eval

View File

@ -1,35 +1,17 @@
agent:
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
n_agents: 1
obs_emb_size: 96
action_emb_size: 16
hidden_size_actor: 64
hidden_size_critic: 64
use_agent_embedding: False
env:
classname: marl_factory_grid.environment.configs.rl
env_name: "rl/two_rooms_train_config"
n_agents: 1
max_steps: 250
pomdp_r: 2
stack_n_frames: 0
individual_rewards: True
train_render: False
eval_render: True
save_and_log: False
record: False
method: marl_factory_grid.algorithms.marl.LoopSEAC
save_and_log: True
algorithm:
gamma: 0.99
entropy_coef: 0.01
vf_coef: 0.05
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
max_steps: 260000
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "single" # Options: "single", "all" ("single" for training, "all" for eval)
auxiliary_piles: False # Option that is only considered when pile-order = "agents"
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
auxiliary_piles: False # Auxiliary piles are only used during marl eval

View File

@ -6,7 +6,7 @@ from networkx.algorithms.approximation import traveling_salesman as tsp
import time
import copy
from marl_factory_grid.algorithms.static.utils import points_to_graph
from marl_factory_grid.algorithms.tsp.utils import points_to_graph
from marl_factory_grid.modules.doors import constants as do
from marl_factory_grid.environment import constants as c
from marl_factory_grid.utils.helpers import MOVEMAP

View File

@ -1,4 +1,4 @@
from marl_factory_grid.algorithms.static.TSP_base_agent import TSPBaseAgent
from marl_factory_grid.algorithms.tsp.TSP_base_agent import TSPBaseAgent
from marl_factory_grid.modules.clean_up import constants as di
from marl_factory_grid.environment import constants as c

View File

@ -1,4 +1,4 @@
from marl_factory_grid.algorithms.static.TSP_base_agent import TSPBaseAgent
from marl_factory_grid.algorithms.tsp.TSP_base_agent import TSPBaseAgent
from marl_factory_grid.modules.destinations import constants as d
from marl_factory_grid.modules.doors import constants as do

View File

@ -64,13 +64,6 @@ def add_env_props(cfg):
factory = Factory(env_path)
_ = factory.reset()
# Agent Init
if len(factory.state.moving_entites) == 1: # Single agent setting
observation_size = list(factory.observation_space.shape)
else: # Multi-agent setting
observation_size = list(factory.observation_space[0].shape)
cfg['agent'].update(dict(observation_size=observation_size, n_actions=factory.action_space[0].n))
return factory

View File

@ -5,18 +5,17 @@ General:
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# Radius of Partially observable Markov decision process
pomdp_r: 0 # default 3
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
# Define Agents, their actions, observations and spawnpoints
Agents:
# The clean agents
Sigmund:
Agent1:
Actions:
- Move4
- Noop
@ -25,7 +24,7 @@ Agents:
- Self
Positions:
- (9,1)
Wolfgang:
Agent2:
Actions:
- Move4
- Noop
@ -37,8 +36,8 @@ Agents:
Entities:
DirtPiles:
coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1)
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
@ -46,7 +45,6 @@ Entities:
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
@ -57,5 +55,3 @@ Rules:
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
#DoneAtMaxStepsReached:
#max_steps: 200

View File

@ -1,20 +1,20 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
# Define Agents, their actions, observations and spawnpoints
Agents:
Sigmund:
Agent1:
Actions:
- Move4
- DoorUse
@ -24,7 +24,7 @@ Agents:
- Self
Positions:
- (3,1)
Wolfgang:
Agent2:
Actions:
- Move4
- DoorUse
@ -36,9 +36,10 @@ Agents:
- (3,13)
Entities:
# For RL-agent we model the flags as dirt piles to be more flexible
DirtPiles:
coords_or_quantity: (2,1), (3,12), (2,13), (3,2) # Static form: auxiliary pile, primary pile, auxiliary pile, ...
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
@ -47,16 +48,13 @@ Entities:
Doors: { }
Rules:
# Environment Dynamics
#DoorAutoClose:
#close_frequency: 10
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Done Conditions
#DoneOnAllDirtCleaned:
# Define the conditions for the environment to stop. Either success or a fail conditions.
# Environment execution stops after 30 steps
DoneAtMaxStepsReached:
max_steps: 50
max_steps: 30

View File

@ -1,20 +1,20 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
# Define Agents, their actions, observations and spawnpoints
Agents:
Sigmund:
Agent1:
Actions:
- Move4
- DoorUse
@ -24,7 +24,7 @@ Agents:
- Self
Positions:
- (3,1)
Wolfgang:
Agent2:
Actions:
- Move4
- DoorUse
@ -36,9 +36,10 @@ Agents:
- (3,13)
Entities:
# For RL-agent we model the flags as dirt piles to be more flexible
DirtPiles:
coords_or_quantity: (3,12), (3,2) # Static form: auxiliary pile, primary pile, auxiliary pile, ...
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
coords_or_quantity: (3,12), (3,2) # Locations of flags
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
@ -47,16 +48,13 @@ Entities:
Doors: { }
Rules:
# Environment Dynamics
#DoorAutoClose:
#close_frequency: 10
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Done Conditions
#DoneOnAllDirtCleaned:
# Define the conditions for the environment to stop. Either success or a fail conditions
# Environment execution stops after 30 steps
DoneAtMaxStepsReached:
max_steps: 30

View File

@ -0,0 +1,48 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# Define Agents, their actions, observations and spawnpoints
Agents:
# The clean agents
Agent1:
Actions:
- Move4
- Noop
Observations:
- DirtPiles
- Self
Positions:
- (9,1)
Entities:
DirtPiles:
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:

View File

@ -5,61 +5,38 @@ General:
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# Radius of Partially observable Markov decision process
pomdp_r: 0 # default 3
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
# Define Agents, their actions, observations and spawnpoints
Agents:
# The clean agents
#Sigmund:
#Actions:
#- Move4
#Observations:
#- DirtPiles
#- Self
#Positions:
#- (9,1)
#- (1,1)
#- (2,4)
#- (4,7)
#- (6,8)
#- (7,9)
#- (2,4)
#- (4,7)
#- (6,8)
#- (7,9)
#- (9,9)
#- (9,1)
Wolfgang:
Agent1:
Actions:
- Move4
Observations:
- DirtPiles
- Self
Positions:
- (9,5)
- (9,1)
- (1,1)
- (2,4)
- (4,7)
- (6,8)
- (7,9)
- (2,4)
- (4,7)
- (6,8)
- (7,9)
- (9,9)
- (9,5)
- (9,1)
Entities:
DirtPiles:
coords_or_quantity: (1, 1), (2,4), (4,7), (6,8), (7,9), (9,9) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
@ -67,7 +44,6 @@ Entities:
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
@ -78,8 +54,6 @@ Rules:
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
#DoneAtMaxStepsReached: # An episode should last for at most max_steps steps
#max_steps: 1000
# Define how agents spawn.
# Options: "random" (Spawn agent at a random position from the list of defined positions)

View File

@ -1,78 +0,0 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# Radius of Partially observable Markov decision process
pomdp_r: 0 # default 3
# Print all messages and events
verbose: false
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
Agents:
# The clean agents
#Sigmund:
#Actions:
#- Move4
#- Noop
#Observations:
#- DirtPiles
#- Self
#Positions:
#- (9,1)
#- (1,1)
#- (2,4)
#- (4,7)
#- (7,9)
#- (2,4)
#- (4,7)
#- (7,9)
#- (9,9)
#- (9,1)
Wolfgang:
Actions:
- Move4
Observations:
- DirtPiles
- Self
Positions:
- (9,5)
#- (1,1)
#- (2,4)
#- (4,7)
#- (7,9)
#- (2,4)
#- (4,7)
#- (7,9)
#- (9,9)
#- (9,5)
Entities:
DirtPiles:
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) #(9,9), (7,9), (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
#DoneAtMaxStepsReached:
#max_steps: 200

View File

@ -0,0 +1,50 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# Define Agents, their actions, observations and spawnpoints
Agents:
Agent1:
Actions:
- Move4
- DoorUse
Observations:
- DirtPiles
- Self
Positions:
- (3,1)
- (2,1)
Entities:
DirtPiles:
coords_or_quantity: (2,1), (3,12) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
Doors: { }
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions
# Environment execution stops after 30 steps
DoneAtMaxStepsReached:
max_steps: 30

View File

@ -0,0 +1,60 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# Define Agents, their actions, observations and spawnpoints
Agents:
Agent1:
Actions:
- Move4
Observations:
- DirtPiles
- Self
Positions:
- (3,1)
- (1,1)
- (3,1)
- (5,1)
- (3,1)
- (1,8)
- (3,1)
- (5,8)
Entities:
DirtPiles:
coords_or_quantity: (2,1), (3,12) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
#Doors: { } # We leave out the door during training
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
# Define how agents spawn.
# Options: "random" (Spawn agent at a random position from the list of defined positions)
# "first" (Always spawn agent at first position regardless of the other provided positions)
# "order" (Loop through agent positions)
AgentSpawnRule:
spawn_rule: "order"

View File

@ -1,30 +1,20 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
# Define Agents, their actions, observations and spawnpoints
Agents:
#Sigmund:
#Actions:
#- Move4
#- DoorUse
#Observations:
#- DirtPiles
#- Self
#Positions:
#- (3,1)
#- (2,1)
Wolfgang:
Agent2:
Actions:
- Move4
- DoorUse
@ -37,8 +27,8 @@ Agents:
Entities:
DirtPiles:
coords_or_quantity: (2,13), (3,2) # (2,1), (3,12)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
coords_or_quantity: (2,13), (3,2) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
@ -46,17 +36,15 @@ Entities:
Doors: { }
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Environment Dynamics
#DoorAutoClose:
#close_frequency: 10
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Done Conditions
#DoneOnAllDirtCleaned:
# Define the conditions for the environment to stop. Either success or a fail conditions
# Environment execution stops after 30 steps
DoneAtMaxStepsReached:
max_steps: 50
max_steps: 30

View File

@ -1,35 +1,20 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
# Define Agents, their actions, observations and spawnpoints
Agents:
#Sigmund:
#Actions:
#- Move4
#Observations:
#- DirtPiles
#- Self
#Positions:
#- (3,1)
#- (1,1)
#- (3,1)
#- (5,1)
#- (3,1)
#- (1,8)
#- (3,1)
#- (5,8)
Wolfgang:
Agent2:
Actions:
- Move4
Observations:
@ -47,29 +32,30 @@ Agents:
Entities:
DirtPiles:
coords_or_quantity: (2,13), (3,2) # (2,1), (3,12)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
coords_or_quantity: (2,13), (3,2) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
#Doors: { }
#Doors: { } # We leave out the door during training
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Environment Dynamics
#DoorAutoClose:
#close_frequency: 10
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
#DoneAtMaxStepsReached:
#max_steps: 100
# Defines how agents spawn.
# Options: "random" (Spawn agent at a random position from the list of defined positions)
# "first" (Always spawn agent at first position regardless of the other provided positions)
# "order" (Loop through agent positions)
AgentSpawnRule:
spawn_rule: "order"

View File

@ -5,37 +5,34 @@ General:
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# Radius of Partially observable Markov decision process
pomdp_r: 0 # default 3
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
# Define Agents, their actions, observations and spawnpoints
Agents:
# The clean agents
Wolfgang:
Agent1:
Actions:
- Move4
- Clean
- Noop
Observations:
- Walls
- Other
- DirtPiles
- Self
Positions:
- (9,1)
Reiner:
Agent2:
Actions:
- Move4
- Clean
- Noop
Observations:
- Walls
- Other
- DirtPiles
- Self
Positions:
@ -44,7 +41,7 @@ Agents:
Entities:
DirtPiles:
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
@ -63,5 +60,3 @@ Rules:
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
DoneAtMaxStepsReached:
max_steps: 200

View File

@ -1,40 +1,38 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
# Define Agents, their actions, observations and spawnpoints
Agents:
Wolfgang:
Agent1:
Actions:
- Move4
- Noop
- DestAction
- DestAction # Action that is performed when the destination is reached
- DoorUse
Observations:
- Walls
- Other
- Doors
- Destination
Positions:
- (3,1) # Agent spawnpoint
Sigmund:
- (3,1)
Agent2:
Actions:
- Move4
- Noop
- DestAction
- DoorUse
Observations:
- Other
- Walls
- Destination
- Doors
@ -45,10 +43,11 @@ Entities:
Destinations:
spawnrule:
SpawnDestinationsPerAgent:
# Target coordinates
coords_or_quantity:
Wolfgang:
- (3,12) # Target coordinates
Sigmund:
Agent1:
- (3,12)
Agent2:
- (3,2)
Doors: { }
@ -68,10 +67,12 @@ Rules:
AssignGlobalPositions: { }
DoneAtDestinationReach:
reward_at_done: 1
reward_at_done: 50
# We want to give rewards only, when all targets have been reached.
condition: "all"
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions
# Environment execution stops after 30 steps
DoneAtMaxStepsReached:
max_steps: 50
max_steps: 30

View File

@ -293,9 +293,6 @@ class Factory(gym.Env):
render_entity.aux = self.obs_builder.curr_lightmaps[render_entity.real_name]
return self._renderer.render(render_entities)
def set_recorder(self, recorder):
self._recorder = recorder
def summarize_header(self):
header = {'rec_step': self.state.curr_step}
for entity_group in (x for x in self.state if x.name in ['Walls', 'DropOffLocations', 'ChargePods']):

View File

@ -3,7 +3,7 @@ from typing import List, Tuple
import numpy as np
from marl_factory_grid.algorithms.static.utils import points_to_graph
from marl_factory_grid.algorithms.tsp.utils import points_to_graph
from marl_factory_grid.environment import constants as c
from marl_factory_grid.environment.entity.entity import Entity
from marl_factory_grid.environment.rules import Rule, SpawnAgents

View File

@ -1,93 +0,0 @@
import copy
from pathlib import Path
from marl_factory_grid.algorithms.marl.a2c_dirt import A2C
from marl_factory_grid.algorithms.utils import load_yaml_file
def single_agent_training(config_name):
cfg_path = Path(f'../marl_factory_grid/algorithms/marl/single_agent_configs/{config_name}_config.yaml')
train_cfg = load_yaml_file(cfg_path)
# Use environment config with fixed spawnpoints for eval
eval_cfg = copy.deepcopy(train_cfg)
eval_cfg["env"]["env_name"] = f"rl/{config_name}_eval_config"
print("Training phase")
agent = A2C(train_cfg, eval_cfg)
agent.train_loop()
print("Evaluation phase")
# Have consecutive episode for eval in single agent case
train_cfg["algorithm"]["pile_all_done"] = "all"
agent.eval_loop(10)
def single_agent_eval(config_name, run):
cfg_path = Path(f'../marl_factory_grid/algorithms/marl/single_agent_configs/{config_name}_config.yaml')
train_cfg = load_yaml_file(cfg_path)
# Use environment config with fixed spawnpoints for eval
eval_cfg = copy.deepcopy(train_cfg)
eval_cfg["env"]["env_name"] = f"rl/{config_name}_eval_config"
agent = A2C(train_cfg, eval_cfg)
print("Evaluation phase")
agent.load_agents(run)
agent.eval_loop(1)
def multi_agent_eval(config_name, runs, emergent_phenomenon=False):
cfg_path = Path(f'../marl_factory_grid/algorithms/marl/multi_agent_configs/{config_name}_config.yaml')
eval_cfg = load_yaml_file(cfg_path)
# Sanity setting of required attributes and configs
if config_name == "two_rooms":
if emergent_phenomenon:
eval_cfg["env"]["env_name"] = f"marl_eval/{config_name}_eval_config_emergent"
eval_cfg["algorithm"]["auxiliary_piles"] = False
else:
eval_cfg["algorithm"]["auxiliary_piles"] = True
elif config_name == "dirt_quadrant":
if emergent_phenomenon:
eval_cfg["algorithm"]["pile-order"] = "dynamic"
else:
eval_cfg["algorithm"]["pile-order"] = "smart"
agent = A2C(train_cfg=eval_cfg, eval_cfg=eval_cfg)
print("Evaluation phase")
agent.load_agents(runs)
agent.eval_loop(1)
def dirt_quadrant_single_agent_training():
single_agent_training("dirt_quadrant")
def two_rooms_one_door_modified_single_agent_training():
single_agent_training("two_rooms")
def dirt_quadrant_single_agent_eval(agent_name):
if agent_name == "Sigmund":
run = "run0"
elif agent_name == "Wolfgang":
run = "run1"
single_agent_eval("dirt_quadrant", [run])
def two_rooms_one_door_modified_single_agent_eval(agent_name):
if agent_name == "Sigmund":
run = "run2"
elif agent_name == "Wolfgang":
run = "run3"
single_agent_eval("two_rooms", [run])
def dirt_quadrant_5_multi_agent_eval(emergent_phenomenon):
multi_agent_eval("dirt_quadrant", ["run4", "run5"], emergent_phenomenon)
def dirt_quadrant_5_multi_agent_ctde_eval(emergent_phenomenon): # run7 == run4
multi_agent_eval("dirt_quadrant", ["run4", "run7"], emergent_phenomenon)
def two_rooms_one_door_modified_multi_agent_eval(emergent_phenomenon):
multi_agent_eval("two_rooms", ["run2", "run3"], emergent_phenomenon)
if __name__ == '__main__':
two_rooms_one_door_modified_multi_agent_eval(False)

75
studies/rl_runs.py Normal file
View File

@ -0,0 +1,75 @@
from pathlib import Path
from marl_factory_grid.algorithms.rl.a2c_dirt import A2C
from marl_factory_grid.algorithms.utils import load_yaml_file
def dirt_quadrant_agent1_training():
train_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_train_config.yaml')
eval_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_eval_config.yaml')
train_cfg = load_yaml_file(train_cfg_path)
eval_cfg = load_yaml_file(eval_cfg_path)
print("Training phase")
agent = A2C(train_cfg, eval_cfg)
agent.train_loop()
print("Evaluation phase")
agent.eval_loop(n_episodes=1)
def two_rooms_training(max_steps, agent_name):
train_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_train_config.yaml')
eval_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_eval_config.yaml')
train_cfg = load_yaml_file(train_cfg_path)
eval_cfg = load_yaml_file(eval_cfg_path)
train_cfg["algorithm"]["max_steps"] = max_steps
train_cfg["env"]["env_name"] = f"rl/two_rooms_{agent_name}_train_config"
eval_cfg["env"]["env_name"] = f"rl/two_rooms_{agent_name}_eval_config"
print("Training phase")
agent = A2C(train_cfg, eval_cfg)
agent.train_loop()
print("Evaluation phase")
agent.eval_loop(n_episodes=1)
def two_rooms_agent1_training():
two_rooms_training(max_steps=190000, agent_name="agent1")
def two_rooms_agent2_training():
two_rooms_training(max_steps=260000, agent_name="agent2")
def single_agent_eval(config_name, run_folder_name):
eval_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/{config_name}_eval_config.yaml')
train_cfg = eval_cfg = load_yaml_file(eval_cfg_path)
# A value for train_cfg is required, but the train environment won't be used
agent = A2C(train_cfg=train_cfg, eval_cfg=eval_cfg)
print("Evaluation phase")
agent.load_agents([run_folder_name])
agent.eval_loop(1)
def multi_agent_eval(config_name, runs, emergent_phenomenon=False):
eval_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/multi_agent_configs/{config_name}' +
f'_eval_config{"_emergent" if emergent_phenomenon else ""}.yaml')
eval_cfg = load_yaml_file(eval_cfg_path)
# A value for train_cfg is required, but the train environment won't be used
agent = A2C(train_cfg=eval_cfg, eval_cfg=eval_cfg)
print("Evaluation phase")
agent.load_agents(runs)
agent.eval_loop(1)
def dirt_quadrant_multi_agent_ctde_eval(emergent_phenomenon):
multi_agent_eval("dirt_quadrant", ["run0", "run0"], emergent_phenomenon)
def two_rooms_multi_agent_eval(emergent_phenomenon):
multi_agent_eval("two_rooms", ["run1", "run2"], emergent_phenomenon)
if __name__ == '__main__':
dirt_quadrant_agent1_training()

View File

@ -4,10 +4,11 @@ from pathlib import Path
from tqdm import trange
from marl_factory_grid.algorithms.static.TSP_dirt_agent import TSPDirtAgent
from marl_factory_grid.algorithms.static.TSP_target_agent import TSPTargetAgent
from marl_factory_grid.algorithms.tsp.TSP_dirt_agent import TSPDirtAgent
from marl_factory_grid.algorithms.tsp.TSP_target_agent import TSPTargetAgent
from marl_factory_grid.environment.factory import Factory
def get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory):
agents = [TSPDirtAgent(factory, 0), TSPDirtAgent(factory, 1)]
if not emergent_phenomenon:
@ -31,13 +32,11 @@ def get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory):
for u, v, weight in agent._position_graph.edges(data='weight'):
agent._position_graph[u][v]['weight'] = edge_costs[f"{u}-{v}"]
"""for u, v, weight in agent._position_graph.edges(data='weight'):
print(f"Edge ({u}-{v}) has weight: {weight}")"""
return agents
def get_two_rooms_one_door_modified_tsp_agents(emergent_phenomenon, factory):
def get_two_rooms_tsp_agents(emergent_phenomenon, factory):
agents = [TSPTargetAgent(factory, 0), TSPTargetAgent(factory, 1)]
if not emergent_phenomenon:
print(emergent_phenomenon)
@ -45,6 +44,7 @@ def get_two_rooms_one_door_modified_tsp_agents(emergent_phenomenon, factory):
agent._position_graph[(3, 1)][(3, 2)]['weight'] = 4
return agents
def run_tsp_setting(config_name, emergent_phenomenon):
# Render at each step?
render = True
@ -74,7 +74,7 @@ def run_tsp_setting(config_name, emergent_phenomenon):
if config_name == "dirt_quadrant":
agents = get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory)
elif config_name == "two_rooms":
agents = get_two_rooms_one_door_modified_tsp_agents(emergent_phenomenon, factory)
agents = get_two_rooms_tsp_agents(emergent_phenomenon, factory)
else:
print("Config name does not exist. Abort...")
break
@ -95,7 +95,7 @@ def dirt_quadrant_multi_agent_tsp(emergent_phenomenon):
run_tsp_setting("dirt_quadrant", emergent_phenomenon)
def two_rooms_one_door_modified_multi_agent_tsp(emergent_phenomenon):
def two_rooms_multi_agent_tsp(emergent_phenomenon):
run_tsp_setting("two_rooms", emergent_phenomenon)