mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-07-08 02:21:36 +02:00
Code cleaning part 2
This commit is contained in:
@ -1,34 +0,0 @@
|
||||
agent:
|
||||
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
|
||||
n_agents: 2
|
||||
obs_emb_size: 96
|
||||
action_emb_size: 16
|
||||
hidden_size_actor: 64
|
||||
hidden_size_critic: 64
|
||||
use_agent_embedding: False
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.marl_eval
|
||||
env_name: "marl_eval/dirt_quadrant_eval_config"
|
||||
n_agents: 2
|
||||
max_steps: 250
|
||||
pomdp_r: 2
|
||||
stack_n_frames: 0
|
||||
individual_rewards: True
|
||||
train_render: False
|
||||
eval_render: True
|
||||
save_and_log: True
|
||||
record: False
|
||||
method: marl_factory_grid.algorithms.marl.LoopSEAC
|
||||
algorithm:
|
||||
gamma: 0.99
|
||||
entropy_coef: 0.01
|
||||
vf_coef: 0.05
|
||||
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
||||
max_steps: 200000
|
||||
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
||||
pile-order: "dynamic" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
|
||||
auxiliary_piles: False # Option that is only considered when pile-order = "agents"
|
||||
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
||||
|
@ -1,35 +0,0 @@
|
||||
agent:
|
||||
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
|
||||
n_agents: 2
|
||||
obs_emb_size: 96
|
||||
action_emb_size: 16
|
||||
hidden_size_actor: 64
|
||||
hidden_size_critic: 64
|
||||
use_agent_embedding: False
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.marl_eval
|
||||
env_name: "marl_eval/two_rooms_eval_config"
|
||||
n_agents: 2
|
||||
max_steps: 250
|
||||
pomdp_r: 2
|
||||
stack_n_frames: 0
|
||||
individual_rewards: True
|
||||
train_render: False
|
||||
eval_render: True
|
||||
save_and_log: True
|
||||
record: False
|
||||
method: marl_factory_grid.algorithms.marl.LoopSEAC
|
||||
algorithm:
|
||||
gamma: 0.99
|
||||
entropy_coef: 0.01
|
||||
vf_coef: 0.05
|
||||
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
||||
max_steps: 260000
|
||||
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
||||
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
|
||||
auxiliary_piles: True # Use True to see emergent phenomenon and False to prevent it
|
||||
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
||||
|
||||
|
@ -1,44 +1,23 @@
|
||||
import copy
|
||||
import os
|
||||
import random
|
||||
|
||||
import imageio # requires ffmpeg install on operating system and imageio-ffmpeg package for python
|
||||
from scipy import signal
|
||||
import matplotlib.pyplot as plt
|
||||
import torch
|
||||
from typing import Union, List, Dict
|
||||
from typing import Union, List
|
||||
import numpy as np
|
||||
from torch.distributions import Categorical
|
||||
|
||||
from marl_factory_grid.algorithms.marl.base_a2c import PolicyGradient, cumulate_discount
|
||||
from marl_factory_grid.algorithms.utils import add_env_props, instantiate_class
|
||||
from pathlib import Path
|
||||
from collections import deque
|
||||
|
||||
from marl_factory_grid.environment.actions import Noop
|
||||
from marl_factory_grid.modules import Clean, DoorUse
|
||||
from marl_factory_grid.algorithms.rl.base_a2c import PolicyGradient, cumulate_discount
|
||||
from marl_factory_grid.algorithms.utils import add_env_props
|
||||
from marl_factory_grid.utils.plotting.plot_single_runs import plot_action_maps
|
||||
|
||||
|
||||
class Names:
|
||||
REWARD = 'reward'
|
||||
DONE = 'done'
|
||||
ACTION = 'action'
|
||||
OBSERVATION = 'observation'
|
||||
LOGITS = 'logits'
|
||||
HIDDEN_ACTOR = 'hidden_actor'
|
||||
HIDDEN_CRITIC = 'hidden_critic'
|
||||
AGENT = 'agent'
|
||||
ENV = 'env'
|
||||
ENV_NAME = 'env_name'
|
||||
N_AGENTS = 'n_agents'
|
||||
ALGORITHM = 'algorithm'
|
||||
MAX_STEPS = 'max_steps'
|
||||
N_STEPS = 'n_steps'
|
||||
BUFFER_SIZE = 'buffer_size'
|
||||
CRITIC = 'critic'
|
||||
BATCH_SIZE = 'bnatch_size'
|
||||
N_ACTIONS = 'n_actions'
|
||||
TRAIN_RENDER = 'train_render'
|
||||
EVAL_RENDER = 'eval_render'
|
||||
|
||||
@ -55,7 +34,7 @@ class A2C:
|
||||
self.train_cfg = train_cfg
|
||||
self.eval_cfg = eval_cfg
|
||||
self.cfg = train_cfg
|
||||
self.n_agents = train_cfg[nms.AGENT][nms.N_AGENTS]
|
||||
self.n_agents = train_cfg[nms.ENV][nms.N_AGENTS]
|
||||
self.setup()
|
||||
self.reward_development = []
|
||||
self.action_probabilities = {agent_idx:[] for agent_idx in range(self.n_agents)}
|
||||
@ -80,8 +59,6 @@ class A2C:
|
||||
os.mkdir(self.results_path)
|
||||
# Save settings in results folder
|
||||
self.save_configs()
|
||||
if self.cfg[nms.ENV]["record"]:
|
||||
self.recorder = imageio.get_writer(f'{self.results_path}/pygame_recording.mp4', fps=5)
|
||||
|
||||
def set_cfg(self, eval=False):
|
||||
if eval:
|
||||
@ -610,8 +587,6 @@ class A2C:
|
||||
obs = env.reset()
|
||||
self.set_agent_spawnpoint(env)
|
||||
if self.cfg[nms.ENV][nms.EVAL_RENDER]:
|
||||
if self.cfg[nms.ENV]["save_and_log"] and self.cfg[nms.ENV]["record"]:
|
||||
env.set_recorder(self.recorder)
|
||||
if self.cfg[nms.ALGORITHM]["auxiliary_piles"]:
|
||||
# Don't render auxiliary piles
|
||||
auxiliary_piles = [pile for idx, pile in enumerate(env.state.entities['DirtPiles']) if idx % 2 == 0]
|
||||
@ -664,10 +639,6 @@ class A2C:
|
||||
|
||||
episode += 1
|
||||
|
||||
# Properly finalize the video file
|
||||
if self.cfg[nms.ENV]["save_and_log"] and self.cfg[nms.ENV]["record"]:
|
||||
self.recorder.close()
|
||||
|
||||
def plot_reward_development(self):
|
||||
smoothed_data = np.convolve(self.reward_development, np.ones(10) / 10, mode='valid')
|
||||
plt.plot(smoothed_data)
|
||||
@ -689,16 +660,14 @@ class A2C:
|
||||
|
||||
def save_agent_models(self):
|
||||
for idx, agent in enumerate(self.agents):
|
||||
agent_name = list(self.factory.state.agents_conf.keys())[idx]
|
||||
agent.pi.save_model_parameters(self.results_path, agent_name)
|
||||
agent.vf.save_model_parameters(self.results_path, agent_name)
|
||||
agent.pi.save_model_parameters(self.results_path)
|
||||
agent.vf.save_model_parameters(self.results_path)
|
||||
|
||||
def load_agents(self, runs_list):
|
||||
for idx, run in enumerate(runs_list):
|
||||
run_path = f"../study_out/{run}"
|
||||
agent_name = list(self.eval_factory.state.agents_conf.keys())[idx]
|
||||
self.agents[idx].pi.load_model_parameters(f"{run_path}/{agent_name}_PolicyNet_model_parameters.pth")
|
||||
self.agents[idx].vf.load_model_parameters(f"{run_path}/{agent_name}_ValueNet_model_parameters.pth")
|
||||
self.agents[idx].pi.load_model_parameters(f"{run_path}/PolicyNet_model_parameters.pth")
|
||||
self.agents[idx].vf.load_model_parameters(f"{run_path}/ValueNet_model_parameters.pth")
|
||||
|
||||
def create_info_maps(self, env, used_actions):
|
||||
# Create value map
|
@ -19,11 +19,11 @@ class Net(th.nn.Module):
|
||||
if module.bias is not None:
|
||||
nn.init.uniform_(module.bias, a=-0.1, b=0.1)
|
||||
|
||||
def save_model(self, path, agent_name):
|
||||
th.save(self.net, f"{path}/{agent_name}_{self.__class__.__name__}_model.pth")
|
||||
def save_model(self, path):
|
||||
th.save(self.net, f"{path}/{self.__class__.__name__}_model.pth")
|
||||
|
||||
def save_model_parameters(self, path, agent_name):
|
||||
th.save(self.net.state_dict(), f"{path}/{agent_name}_{self.__class__.__name__}_model_parameters.pth")
|
||||
def save_model_parameters(self, path):
|
||||
th.save(self.net.state_dict(), f"{path}/{self.__class__.__name__}_model_parameters.pth")
|
||||
|
||||
def load_model_parameters(self, path):
|
||||
self.net.load_state_dict(th.load(path))
|
@ -0,0 +1,11 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.marl_eval
|
||||
env_name: "marl_eval/dirt_quadrant_eval_config"
|
||||
n_agents: 2
|
||||
eval_render: True
|
||||
save_and_log: False
|
||||
algorithm:
|
||||
pile-order: "smart" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
|
||||
auxiliary_piles: False # Dirt quadrant does not use this option
|
@ -0,0 +1,11 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.marl_eval
|
||||
env_name: "marl_eval/dirt_quadrant_eval_config"
|
||||
n_agents: 2
|
||||
eval_render: True
|
||||
save_and_log: False
|
||||
algorithm:
|
||||
pile-order: "dynamic" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
|
||||
auxiliary_piles: False # Dirt quadrant does not use this option
|
@ -0,0 +1,13 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.marl_eval
|
||||
env_name: "marl_eval/two_rooms_eval_config"
|
||||
n_agents: 2
|
||||
eval_render: True
|
||||
save_and_log: False
|
||||
algorithm:
|
||||
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
|
||||
auxiliary_piles: True # Use True to see emergent phenomenon and False to prevent it
|
||||
|
||||
|
@ -0,0 +1,13 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.marl_eval
|
||||
env_name: "marl_eval/two_rooms_eval_config_emergent"
|
||||
n_agents: 2
|
||||
eval_render: True
|
||||
save_and_log: False
|
||||
algorithm:
|
||||
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
|
||||
auxiliary_piles: False # Use True to see emergent phenomenon and False to prevent it
|
||||
|
||||
|
@ -0,0 +1,12 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.rl
|
||||
env_name: "rl/dirt_quadrant_agent1_eval_config"
|
||||
n_agents: 1
|
||||
eval_render: True
|
||||
save_and_log: False
|
||||
algorithm:
|
||||
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "all" #
|
||||
auxiliary_piles: False # Dirt quadrant does not use this option
|
||||
|
@ -1,34 +1,17 @@
|
||||
agent:
|
||||
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
|
||||
n_agents: 1
|
||||
obs_emb_size: 96
|
||||
action_emb_size: 16
|
||||
hidden_size_actor: 64
|
||||
hidden_size_critic: 64
|
||||
use_agent_embedding: False
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.rl
|
||||
env_name: "rl/dirt_quadrant_train_config"
|
||||
env_name: "rl/dirt_quadrant_agent1_train_config"
|
||||
n_agents: 1
|
||||
max_steps: 250
|
||||
pomdp_r: 2
|
||||
stack_n_frames: 0
|
||||
individual_rewards: True
|
||||
train_render: False
|
||||
eval_render: True
|
||||
save_and_log: True
|
||||
record: False
|
||||
method: marl_factory_grid.algorithms.marl.LoopSEAC
|
||||
algorithm:
|
||||
gamma: 0.99
|
||||
entropy_coef: 0.01
|
||||
vf_coef: 0.05
|
||||
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
||||
max_steps: 240000
|
||||
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
||||
max_steps: 140000
|
||||
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
||||
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "single" # Options: "single", "all" ("single" for training, "all" for eval)
|
||||
auxiliary_piles: False # Option that is only considered when pile-order = "agents"
|
||||
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
||||
auxiliary_piles: False # Dirt quadrant does not use this option
|
||||
|
@ -0,0 +1,13 @@
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.rl
|
||||
env_name: "rl/two_rooms_eval_config"
|
||||
n_agents: 1
|
||||
eval_render: True
|
||||
save_and_log: False
|
||||
algorithm:
|
||||
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "all" # Options: "single", "all" ("single" for training, "all" for eval)
|
||||
auxiliary_piles: False # Auxiliary piles are only used during marl eval
|
||||
|
||||
|
@ -1,35 +1,17 @@
|
||||
agent:
|
||||
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
|
||||
n_agents: 1
|
||||
obs_emb_size: 96
|
||||
action_emb_size: 16
|
||||
hidden_size_actor: 64
|
||||
hidden_size_critic: 64
|
||||
use_agent_embedding: False
|
||||
env:
|
||||
classname: marl_factory_grid.environment.configs.rl
|
||||
env_name: "rl/two_rooms_train_config"
|
||||
n_agents: 1
|
||||
max_steps: 250
|
||||
pomdp_r: 2
|
||||
stack_n_frames: 0
|
||||
individual_rewards: True
|
||||
train_render: False
|
||||
eval_render: True
|
||||
save_and_log: False
|
||||
record: False
|
||||
method: marl_factory_grid.algorithms.marl.LoopSEAC
|
||||
save_and_log: True
|
||||
algorithm:
|
||||
gamma: 0.99
|
||||
entropy_coef: 0.01
|
||||
vf_coef: 0.05
|
||||
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
||||
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
||||
max_steps: 260000
|
||||
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
||||
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
pile_all_done: "single" # Options: "single", "all" ("single" for training, "all" for eval)
|
||||
auxiliary_piles: False # Option that is only considered when pile-order = "agents"
|
||||
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
|
||||
auxiliary_piles: False # Auxiliary piles are only used during marl eval
|
||||
|
||||
|
@ -6,7 +6,7 @@ from networkx.algorithms.approximation import traveling_salesman as tsp
|
||||
import time
|
||||
import copy
|
||||
|
||||
from marl_factory_grid.algorithms.static.utils import points_to_graph
|
||||
from marl_factory_grid.algorithms.tsp.utils import points_to_graph
|
||||
from marl_factory_grid.modules.doors import constants as do
|
||||
from marl_factory_grid.environment import constants as c
|
||||
from marl_factory_grid.utils.helpers import MOVEMAP
|
@ -1,4 +1,4 @@
|
||||
from marl_factory_grid.algorithms.static.TSP_base_agent import TSPBaseAgent
|
||||
from marl_factory_grid.algorithms.tsp.TSP_base_agent import TSPBaseAgent
|
||||
|
||||
from marl_factory_grid.modules.clean_up import constants as di
|
||||
from marl_factory_grid.environment import constants as c
|
@ -1,4 +1,4 @@
|
||||
from marl_factory_grid.algorithms.static.TSP_base_agent import TSPBaseAgent
|
||||
from marl_factory_grid.algorithms.tsp.TSP_base_agent import TSPBaseAgent
|
||||
|
||||
from marl_factory_grid.modules.destinations import constants as d
|
||||
from marl_factory_grid.modules.doors import constants as do
|
@ -64,13 +64,6 @@ def add_env_props(cfg):
|
||||
factory = Factory(env_path)
|
||||
_ = factory.reset()
|
||||
|
||||
# Agent Init
|
||||
if len(factory.state.moving_entites) == 1: # Single agent setting
|
||||
observation_size = list(factory.observation_space.shape)
|
||||
else: # Multi-agent setting
|
||||
observation_size = list(factory.observation_space[0].shape)
|
||||
cfg['agent'].update(dict(observation_size=observation_size, n_actions=factory.action_space[0].n))
|
||||
|
||||
return factory
|
||||
|
||||
|
||||
|
@ -5,18 +5,17 @@ General:
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: quadrant
|
||||
# Radius of Partially observable Markov decision process
|
||||
pomdp_r: 0 # default 3
|
||||
# View Radius
|
||||
pomdp_r: 0 # 0 = full observability
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
||||
# other agents aim to clean dirt piles.
|
||||
# Define Agents, their actions, observations and spawnpoints
|
||||
Agents:
|
||||
# The clean agents
|
||||
Sigmund:
|
||||
Agent1:
|
||||
Actions:
|
||||
- Move4
|
||||
- Noop
|
||||
@ -25,7 +24,7 @@ Agents:
|
||||
- Self
|
||||
Positions:
|
||||
- (9,1)
|
||||
Wolfgang:
|
||||
Agent2:
|
||||
Actions:
|
||||
- Move4
|
||||
- Noop
|
||||
@ -37,8 +36,8 @@ Agents:
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||
coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1)
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
max_global_amount: 12
|
||||
@ -46,7 +45,6 @@ Entities:
|
||||
|
||||
# Rules section specifies the rules governing the dynamics of the environment.
|
||||
Rules:
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||
@ -57,5 +55,3 @@ Rules:
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||
# The environment stops when all dirt is cleaned
|
||||
DoneOnAllDirtCleaned:
|
||||
#DoneAtMaxStepsReached:
|
||||
#max_steps: 200
|
||||
|
@ -1,20 +1,20 @@
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 0
|
||||
# View Radius
|
||||
pomdp_r: 0 # 0 = full observability
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
|
||||
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
|
||||
# Define Agents, their actions, observations and spawnpoints
|
||||
Agents:
|
||||
Sigmund:
|
||||
Agent1:
|
||||
Actions:
|
||||
- Move4
|
||||
- DoorUse
|
||||
@ -24,7 +24,7 @@ Agents:
|
||||
- Self
|
||||
Positions:
|
||||
- (3,1)
|
||||
Wolfgang:
|
||||
Agent2:
|
||||
Actions:
|
||||
- Move4
|
||||
- DoorUse
|
||||
@ -36,9 +36,10 @@ Agents:
|
||||
- (3,13)
|
||||
|
||||
Entities:
|
||||
# For RL-agent we model the flags as dirt piles to be more flexible
|
||||
DirtPiles:
|
||||
coords_or_quantity: (2,1), (3,12), (2,13), (3,2) # Static form: auxiliary pile, primary pile, auxiliary pile, ...
|
||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
max_global_amount: 12
|
||||
@ -47,16 +48,13 @@ Entities:
|
||||
Doors: { }
|
||||
|
||||
Rules:
|
||||
# Environment Dynamics
|
||||
#DoorAutoClose:
|
||||
#close_frequency: 10
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
#DoneOnAllDirtCleaned:
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||
# Environment execution stops after 30 steps
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 50
|
||||
max_steps: 30
|
||||
|
@ -1,20 +1,20 @@
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 0
|
||||
# View Radius
|
||||
pomdp_r: 0 # 0 = full observability
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
|
||||
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
|
||||
# Define Agents, their actions, observations and spawnpoints
|
||||
Agents:
|
||||
Sigmund:
|
||||
Agent1:
|
||||
Actions:
|
||||
- Move4
|
||||
- DoorUse
|
||||
@ -24,7 +24,7 @@ Agents:
|
||||
- Self
|
||||
Positions:
|
||||
- (3,1)
|
||||
Wolfgang:
|
||||
Agent2:
|
||||
Actions:
|
||||
- Move4
|
||||
- DoorUse
|
||||
@ -36,9 +36,10 @@ Agents:
|
||||
- (3,13)
|
||||
|
||||
Entities:
|
||||
# For RL-agent we model the flags as dirt piles to be more flexible
|
||||
DirtPiles:
|
||||
coords_or_quantity: (3,12), (3,2) # Static form: auxiliary pile, primary pile, auxiliary pile, ...
|
||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||
coords_or_quantity: (3,12), (3,2) # Locations of flags
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
max_global_amount: 12
|
||||
@ -47,16 +48,13 @@ Entities:
|
||||
Doors: { }
|
||||
|
||||
Rules:
|
||||
# Environment Dynamics
|
||||
#DoorAutoClose:
|
||||
#close_frequency: 10
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
#DoneOnAllDirtCleaned:
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions
|
||||
# Environment execution stops after 30 steps
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 30
|
||||
|
@ -0,0 +1,48 @@
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: quadrant
|
||||
# View Radius
|
||||
pomdp_r: 0 # 0 = full observability
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# Define Agents, their actions, observations and spawnpoints
|
||||
Agents:
|
||||
# The clean agents
|
||||
Agent1:
|
||||
Actions:
|
||||
- Move4
|
||||
- Noop
|
||||
Observations:
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
- (9,1)
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) # Locations of dirt piles
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
max_global_amount: 12
|
||||
max_local_amount: 1
|
||||
|
||||
# Rules section specifies the rules governing the dynamics of the environment.
|
||||
Rules:
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||
# The environment stops when all dirt is cleaned
|
||||
DoneOnAllDirtCleaned:
|
@ -5,61 +5,38 @@ General:
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: quadrant
|
||||
# Radius of Partially observable Markov decision process
|
||||
pomdp_r: 0 # default 3
|
||||
# View Radius
|
||||
pomdp_r: 0 # 0 = full observability
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
||||
# other agents aim to clean dirt piles.
|
||||
# Define Agents, their actions, observations and spawnpoints
|
||||
Agents:
|
||||
# The clean agents
|
||||
#Sigmund:
|
||||
#Actions:
|
||||
#- Move4
|
||||
#Observations:
|
||||
#- DirtPiles
|
||||
#- Self
|
||||
#Positions:
|
||||
#- (9,1)
|
||||
#- (1,1)
|
||||
#- (2,4)
|
||||
#- (4,7)
|
||||
#- (6,8)
|
||||
#- (7,9)
|
||||
#- (2,4)
|
||||
#- (4,7)
|
||||
#- (6,8)
|
||||
#- (7,9)
|
||||
#- (9,9)
|
||||
#- (9,1)
|
||||
Wolfgang:
|
||||
Agent1:
|
||||
Actions:
|
||||
- Move4
|
||||
Observations:
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
- (9,5)
|
||||
- (9,1)
|
||||
- (1,1)
|
||||
- (2,4)
|
||||
- (4,7)
|
||||
- (6,8)
|
||||
- (7,9)
|
||||
- (2,4)
|
||||
- (4,7)
|
||||
- (6,8)
|
||||
- (7,9)
|
||||
- (9,9)
|
||||
- (9,5)
|
||||
|
||||
- (9,1)
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (1, 1), (2,4), (4,7), (6,8), (7,9), (9,9) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) # Locations of dirt piles
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
max_global_amount: 12
|
||||
@ -67,7 +44,6 @@ Entities:
|
||||
|
||||
# Rules section specifies the rules governing the dynamics of the environment.
|
||||
Rules:
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||
@ -78,8 +54,6 @@ Rules:
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||
# The environment stops when all dirt is cleaned
|
||||
DoneOnAllDirtCleaned:
|
||||
#DoneAtMaxStepsReached: # An episode should last for at most max_steps steps
|
||||
#max_steps: 1000
|
||||
|
||||
# Define how agents spawn.
|
||||
# Options: "random" (Spawn agent at a random position from the list of defined positions)
|
@ -1,78 +0,0 @@
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: quadrant
|
||||
# Radius of Partially observable Markov decision process
|
||||
pomdp_r: 0 # default 3
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
||||
# other agents aim to clean dirt piles.
|
||||
Agents:
|
||||
# The clean agents
|
||||
#Sigmund:
|
||||
#Actions:
|
||||
#- Move4
|
||||
#- Noop
|
||||
#Observations:
|
||||
#- DirtPiles
|
||||
#- Self
|
||||
#Positions:
|
||||
#- (9,1)
|
||||
#- (1,1)
|
||||
#- (2,4)
|
||||
#- (4,7)
|
||||
#- (7,9)
|
||||
#- (2,4)
|
||||
#- (4,7)
|
||||
#- (7,9)
|
||||
#- (9,9)
|
||||
#- (9,1)
|
||||
Wolfgang:
|
||||
Actions:
|
||||
- Move4
|
||||
Observations:
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
- (9,5)
|
||||
#- (1,1)
|
||||
#- (2,4)
|
||||
#- (4,7)
|
||||
#- (7,9)
|
||||
#- (2,4)
|
||||
#- (4,7)
|
||||
#- (7,9)
|
||||
#- (9,9)
|
||||
#- (9,5)
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) #(9,9), (7,9), (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
max_global_amount: 12
|
||||
max_local_amount: 1
|
||||
|
||||
# Rules section specifies the rules governing the dynamics of the environment.
|
||||
Rules:
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||
# The environment stops when all dirt is cleaned
|
||||
DoneOnAllDirtCleaned:
|
||||
#DoneAtMaxStepsReached:
|
||||
#max_steps: 200
|
@ -0,0 +1,50 @@
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms
|
||||
# View Radius
|
||||
pomdp_r: 0 # 0 = full observability
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# Define Agents, their actions, observations and spawnpoints
|
||||
Agents:
|
||||
Agent1:
|
||||
Actions:
|
||||
- Move4
|
||||
- DoorUse
|
||||
Observations:
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
- (3,1)
|
||||
- (2,1)
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (2,1), (3,12) # Locations of dirt piles
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
max_global_amount: 12
|
||||
max_local_amount: 1
|
||||
|
||||
Doors: { }
|
||||
|
||||
# Rules section specifies the rules governing the dynamics of the environment.
|
||||
Rules:
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions
|
||||
# Environment execution stops after 30 steps
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 30
|
@ -0,0 +1,60 @@
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms
|
||||
# View Radius
|
||||
pomdp_r: 0 # 0 = full observability
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# Define Agents, their actions, observations and spawnpoints
|
||||
Agents:
|
||||
Agent1:
|
||||
Actions:
|
||||
- Move4
|
||||
Observations:
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
- (3,1)
|
||||
- (1,1)
|
||||
- (3,1)
|
||||
- (5,1)
|
||||
- (3,1)
|
||||
- (1,8)
|
||||
- (3,1)
|
||||
- (5,8)
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (2,1), (3,12) # Locations of dirt piles
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
max_global_amount: 12
|
||||
max_local_amount: 1
|
||||
|
||||
#Doors: { } # We leave out the door during training
|
||||
|
||||
Rules:
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions
|
||||
# The environment stops when all dirt is cleaned
|
||||
DoneOnAllDirtCleaned:
|
||||
|
||||
# Define how agents spawn.
|
||||
# Options: "random" (Spawn agent at a random position from the list of defined positions)
|
||||
# "first" (Always spawn agent at first position regardless of the other provided positions)
|
||||
# "order" (Loop through agent positions)
|
||||
AgentSpawnRule:
|
||||
spawn_rule: "order"
|
@ -1,30 +1,20 @@
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 0
|
||||
# View Radius
|
||||
pomdp_r: 0 # 0 = full observability
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
|
||||
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
|
||||
# Define Agents, their actions, observations and spawnpoints
|
||||
Agents:
|
||||
#Sigmund:
|
||||
#Actions:
|
||||
#- Move4
|
||||
#- DoorUse
|
||||
#Observations:
|
||||
#- DirtPiles
|
||||
#- Self
|
||||
#Positions:
|
||||
#- (3,1)
|
||||
#- (2,1)
|
||||
Wolfgang:
|
||||
Agent2:
|
||||
Actions:
|
||||
- Move4
|
||||
- DoorUse
|
||||
@ -37,8 +27,8 @@ Agents:
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (2,13), (3,2) # (2,1), (3,12)
|
||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||
coords_or_quantity: (2,13), (3,2) # Locations of dirt piles
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
max_global_amount: 12
|
||||
@ -46,17 +36,15 @@ Entities:
|
||||
|
||||
Doors: { }
|
||||
|
||||
# Rules section specifies the rules governing the dynamics of the environment.
|
||||
Rules:
|
||||
# Environment Dynamics
|
||||
#DoorAutoClose:
|
||||
#close_frequency: 10
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
#DoneOnAllDirtCleaned:
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions
|
||||
# Environment execution stops after 30 steps
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 50
|
||||
max_steps: 30
|
@ -1,35 +1,20 @@
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 0
|
||||
# View Radius
|
||||
pomdp_r: 0 # 0 = full observability
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
|
||||
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
|
||||
# Define Agents, their actions, observations and spawnpoints
|
||||
Agents:
|
||||
#Sigmund:
|
||||
#Actions:
|
||||
#- Move4
|
||||
#Observations:
|
||||
#- DirtPiles
|
||||
#- Self
|
||||
#Positions:
|
||||
#- (3,1)
|
||||
#- (1,1)
|
||||
#- (3,1)
|
||||
#- (5,1)
|
||||
#- (3,1)
|
||||
#- (1,8)
|
||||
#- (3,1)
|
||||
#- (5,8)
|
||||
Wolfgang:
|
||||
Agent2:
|
||||
Actions:
|
||||
- Move4
|
||||
Observations:
|
||||
@ -47,29 +32,30 @@ Agents:
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (2,13), (3,2) # (2,1), (3,12)
|
||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||
coords_or_quantity: (2,13), (3,2) # Locations of dirt piles
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
max_global_amount: 12
|
||||
max_local_amount: 1
|
||||
|
||||
#Doors: { }
|
||||
#Doors: { } # We leave out the door during training
|
||||
|
||||
# Rules section specifies the rules governing the dynamics of the environment.
|
||||
Rules:
|
||||
# Environment Dynamics
|
||||
#DoorAutoClose:
|
||||
#close_frequency: 10
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions
|
||||
# The environment stops when all dirt is cleaned
|
||||
DoneOnAllDirtCleaned:
|
||||
#DoneAtMaxStepsReached:
|
||||
#max_steps: 100
|
||||
|
||||
# Defines how agents spawn.
|
||||
# Options: "random" (Spawn agent at a random position from the list of defined positions)
|
||||
# "first" (Always spawn agent at first position regardless of the other provided positions)
|
||||
# "order" (Loop through agent positions)
|
||||
AgentSpawnRule:
|
||||
spawn_rule: "order"
|
@ -5,37 +5,34 @@ General:
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: quadrant
|
||||
# Radius of Partially observable Markov decision process
|
||||
pomdp_r: 0 # default 3
|
||||
# View Radius
|
||||
pomdp_r: 0 # 0 = full observability
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
||||
# other agents aim to clean dirt piles.
|
||||
# Define Agents, their actions, observations and spawnpoints
|
||||
Agents:
|
||||
# The clean agents
|
||||
Wolfgang:
|
||||
Agent1:
|
||||
Actions:
|
||||
- Move4
|
||||
- Clean
|
||||
- Noop
|
||||
Observations:
|
||||
- Walls
|
||||
- Other
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
- (9,1)
|
||||
Reiner:
|
||||
Agent2:
|
||||
Actions:
|
||||
- Move4
|
||||
- Clean
|
||||
- Noop
|
||||
Observations:
|
||||
- Walls
|
||||
- Other
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
@ -44,7 +41,7 @@ Agents:
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9)
|
||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||
initial_amount: 0.5
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
max_global_amount: 12
|
||||
@ -63,5 +60,3 @@ Rules:
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||
# The environment stops when all dirt is cleaned
|
||||
DoneOnAllDirtCleaned:
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 200
|
||||
|
@ -1,40 +1,38 @@
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 0
|
||||
# View Radius
|
||||
pomdp_r: 0 # 0 = full observability
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
|
||||
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
|
||||
# Define Agents, their actions, observations and spawnpoints
|
||||
Agents:
|
||||
Wolfgang:
|
||||
Agent1:
|
||||
Actions:
|
||||
- Move4
|
||||
- Noop
|
||||
- DestAction
|
||||
- DestAction # Action that is performed when the destination is reached
|
||||
- DoorUse
|
||||
Observations:
|
||||
- Walls
|
||||
- Other
|
||||
- Doors
|
||||
- Destination
|
||||
Positions:
|
||||
- (3,1) # Agent spawnpoint
|
||||
Sigmund:
|
||||
- (3,1)
|
||||
Agent2:
|
||||
Actions:
|
||||
- Move4
|
||||
- Noop
|
||||
- DestAction
|
||||
- DoorUse
|
||||
Observations:
|
||||
- Other
|
||||
- Walls
|
||||
- Destination
|
||||
- Doors
|
||||
@ -45,10 +43,11 @@ Entities:
|
||||
Destinations:
|
||||
spawnrule:
|
||||
SpawnDestinationsPerAgent:
|
||||
# Target coordinates
|
||||
coords_or_quantity:
|
||||
Wolfgang:
|
||||
- (3,12) # Target coordinates
|
||||
Sigmund:
|
||||
Agent1:
|
||||
- (3,12)
|
||||
Agent2:
|
||||
- (3,2)
|
||||
|
||||
Doors: { }
|
||||
@ -68,10 +67,12 @@ Rules:
|
||||
AssignGlobalPositions: { }
|
||||
|
||||
DoneAtDestinationReach:
|
||||
reward_at_done: 1
|
||||
reward_at_done: 50
|
||||
# We want to give rewards only, when all targets have been reached.
|
||||
condition: "all"
|
||||
|
||||
# Done Conditions
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions
|
||||
# Environment execution stops after 30 steps
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 50
|
||||
max_steps: 30
|
||||
|
@ -293,9 +293,6 @@ class Factory(gym.Env):
|
||||
render_entity.aux = self.obs_builder.curr_lightmaps[render_entity.real_name]
|
||||
return self._renderer.render(render_entities)
|
||||
|
||||
def set_recorder(self, recorder):
|
||||
self._recorder = recorder
|
||||
|
||||
def summarize_header(self):
|
||||
header = {'rec_step': self.state.curr_step}
|
||||
for entity_group in (x for x in self.state if x.name in ['Walls', 'DropOffLocations', 'ChargePods']):
|
||||
|
@ -3,7 +3,7 @@ from typing import List, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from marl_factory_grid.algorithms.static.utils import points_to_graph
|
||||
from marl_factory_grid.algorithms.tsp.utils import points_to_graph
|
||||
from marl_factory_grid.environment import constants as c
|
||||
from marl_factory_grid.environment.entity.entity import Entity
|
||||
from marl_factory_grid.environment.rules import Rule, SpawnAgents
|
||||
|
@ -1,93 +0,0 @@
|
||||
import copy
|
||||
from pathlib import Path
|
||||
from marl_factory_grid.algorithms.marl.a2c_dirt import A2C
|
||||
from marl_factory_grid.algorithms.utils import load_yaml_file
|
||||
|
||||
def single_agent_training(config_name):
|
||||
cfg_path = Path(f'../marl_factory_grid/algorithms/marl/single_agent_configs/{config_name}_config.yaml')
|
||||
|
||||
train_cfg = load_yaml_file(cfg_path)
|
||||
# Use environment config with fixed spawnpoints for eval
|
||||
eval_cfg = copy.deepcopy(train_cfg)
|
||||
eval_cfg["env"]["env_name"] = f"rl/{config_name}_eval_config"
|
||||
|
||||
print("Training phase")
|
||||
agent = A2C(train_cfg, eval_cfg)
|
||||
agent.train_loop()
|
||||
print("Evaluation phase")
|
||||
# Have consecutive episode for eval in single agent case
|
||||
train_cfg["algorithm"]["pile_all_done"] = "all"
|
||||
agent.eval_loop(10)
|
||||
|
||||
|
||||
def single_agent_eval(config_name, run):
|
||||
cfg_path = Path(f'../marl_factory_grid/algorithms/marl/single_agent_configs/{config_name}_config.yaml')
|
||||
|
||||
train_cfg = load_yaml_file(cfg_path)
|
||||
# Use environment config with fixed spawnpoints for eval
|
||||
eval_cfg = copy.deepcopy(train_cfg)
|
||||
eval_cfg["env"]["env_name"] = f"rl/{config_name}_eval_config"
|
||||
agent = A2C(train_cfg, eval_cfg)
|
||||
print("Evaluation phase")
|
||||
agent.load_agents(run)
|
||||
agent.eval_loop(1)
|
||||
|
||||
|
||||
def multi_agent_eval(config_name, runs, emergent_phenomenon=False):
|
||||
cfg_path = Path(f'../marl_factory_grid/algorithms/marl/multi_agent_configs/{config_name}_config.yaml')
|
||||
|
||||
eval_cfg = load_yaml_file(cfg_path)
|
||||
# Sanity setting of required attributes and configs
|
||||
if config_name == "two_rooms":
|
||||
if emergent_phenomenon:
|
||||
eval_cfg["env"]["env_name"] = f"marl_eval/{config_name}_eval_config_emergent"
|
||||
eval_cfg["algorithm"]["auxiliary_piles"] = False
|
||||
else:
|
||||
eval_cfg["algorithm"]["auxiliary_piles"] = True
|
||||
elif config_name == "dirt_quadrant":
|
||||
if emergent_phenomenon:
|
||||
eval_cfg["algorithm"]["pile-order"] = "dynamic"
|
||||
else:
|
||||
eval_cfg["algorithm"]["pile-order"] = "smart"
|
||||
agent = A2C(train_cfg=eval_cfg, eval_cfg=eval_cfg)
|
||||
print("Evaluation phase")
|
||||
agent.load_agents(runs)
|
||||
agent.eval_loop(1)
|
||||
|
||||
|
||||
def dirt_quadrant_single_agent_training():
|
||||
single_agent_training("dirt_quadrant")
|
||||
|
||||
|
||||
def two_rooms_one_door_modified_single_agent_training():
|
||||
single_agent_training("two_rooms")
|
||||
|
||||
|
||||
def dirt_quadrant_single_agent_eval(agent_name):
|
||||
if agent_name == "Sigmund":
|
||||
run = "run0"
|
||||
elif agent_name == "Wolfgang":
|
||||
run = "run1"
|
||||
single_agent_eval("dirt_quadrant", [run])
|
||||
|
||||
|
||||
def two_rooms_one_door_modified_single_agent_eval(agent_name):
|
||||
if agent_name == "Sigmund":
|
||||
run = "run2"
|
||||
elif agent_name == "Wolfgang":
|
||||
run = "run3"
|
||||
single_agent_eval("two_rooms", [run])
|
||||
|
||||
|
||||
def dirt_quadrant_5_multi_agent_eval(emergent_phenomenon):
|
||||
multi_agent_eval("dirt_quadrant", ["run4", "run5"], emergent_phenomenon)
|
||||
|
||||
def dirt_quadrant_5_multi_agent_ctde_eval(emergent_phenomenon): # run7 == run4
|
||||
multi_agent_eval("dirt_quadrant", ["run4", "run7"], emergent_phenomenon)
|
||||
|
||||
def two_rooms_one_door_modified_multi_agent_eval(emergent_phenomenon):
|
||||
multi_agent_eval("two_rooms", ["run2", "run3"], emergent_phenomenon)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
two_rooms_one_door_modified_multi_agent_eval(False)
|
75
studies/rl_runs.py
Normal file
75
studies/rl_runs.py
Normal file
@ -0,0 +1,75 @@
|
||||
from pathlib import Path
|
||||
from marl_factory_grid.algorithms.rl.a2c_dirt import A2C
|
||||
from marl_factory_grid.algorithms.utils import load_yaml_file
|
||||
|
||||
|
||||
def dirt_quadrant_agent1_training():
|
||||
train_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_train_config.yaml')
|
||||
eval_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/dirt_quadrant_eval_config.yaml')
|
||||
train_cfg = load_yaml_file(train_cfg_path)
|
||||
eval_cfg = load_yaml_file(eval_cfg_path)
|
||||
|
||||
print("Training phase")
|
||||
agent = A2C(train_cfg, eval_cfg)
|
||||
agent.train_loop()
|
||||
print("Evaluation phase")
|
||||
agent.eval_loop(n_episodes=1)
|
||||
|
||||
|
||||
def two_rooms_training(max_steps, agent_name):
|
||||
train_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_train_config.yaml')
|
||||
eval_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/two_rooms_eval_config.yaml')
|
||||
train_cfg = load_yaml_file(train_cfg_path)
|
||||
eval_cfg = load_yaml_file(eval_cfg_path)
|
||||
|
||||
train_cfg["algorithm"]["max_steps"] = max_steps
|
||||
train_cfg["env"]["env_name"] = f"rl/two_rooms_{agent_name}_train_config"
|
||||
eval_cfg["env"]["env_name"] = f"rl/two_rooms_{agent_name}_eval_config"
|
||||
print("Training phase")
|
||||
agent = A2C(train_cfg, eval_cfg)
|
||||
agent.train_loop()
|
||||
print("Evaluation phase")
|
||||
agent.eval_loop(n_episodes=1)
|
||||
|
||||
|
||||
def two_rooms_agent1_training():
|
||||
two_rooms_training(max_steps=190000, agent_name="agent1")
|
||||
|
||||
|
||||
def two_rooms_agent2_training():
|
||||
two_rooms_training(max_steps=260000, agent_name="agent2")
|
||||
|
||||
|
||||
def single_agent_eval(config_name, run_folder_name):
|
||||
eval_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/single_agent_configs/{config_name}_eval_config.yaml')
|
||||
train_cfg = eval_cfg = load_yaml_file(eval_cfg_path)
|
||||
|
||||
# A value for train_cfg is required, but the train environment won't be used
|
||||
agent = A2C(train_cfg=train_cfg, eval_cfg=eval_cfg)
|
||||
print("Evaluation phase")
|
||||
agent.load_agents([run_folder_name])
|
||||
agent.eval_loop(1)
|
||||
|
||||
|
||||
def multi_agent_eval(config_name, runs, emergent_phenomenon=False):
|
||||
eval_cfg_path = Path(f'../marl_factory_grid/algorithms/rl/multi_agent_configs/{config_name}' +
|
||||
f'_eval_config{"_emergent" if emergent_phenomenon else ""}.yaml')
|
||||
eval_cfg = load_yaml_file(eval_cfg_path)
|
||||
|
||||
# A value for train_cfg is required, but the train environment won't be used
|
||||
agent = A2C(train_cfg=eval_cfg, eval_cfg=eval_cfg)
|
||||
print("Evaluation phase")
|
||||
agent.load_agents(runs)
|
||||
agent.eval_loop(1)
|
||||
|
||||
|
||||
def dirt_quadrant_multi_agent_ctde_eval(emergent_phenomenon):
|
||||
multi_agent_eval("dirt_quadrant", ["run0", "run0"], emergent_phenomenon)
|
||||
|
||||
|
||||
def two_rooms_multi_agent_eval(emergent_phenomenon):
|
||||
multi_agent_eval("two_rooms", ["run1", "run2"], emergent_phenomenon)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
dirt_quadrant_agent1_training()
|
@ -4,10 +4,11 @@ from pathlib import Path
|
||||
|
||||
from tqdm import trange
|
||||
|
||||
from marl_factory_grid.algorithms.static.TSP_dirt_agent import TSPDirtAgent
|
||||
from marl_factory_grid.algorithms.static.TSP_target_agent import TSPTargetAgent
|
||||
from marl_factory_grid.algorithms.tsp.TSP_dirt_agent import TSPDirtAgent
|
||||
from marl_factory_grid.algorithms.tsp.TSP_target_agent import TSPTargetAgent
|
||||
from marl_factory_grid.environment.factory import Factory
|
||||
|
||||
|
||||
def get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory):
|
||||
agents = [TSPDirtAgent(factory, 0), TSPDirtAgent(factory, 1)]
|
||||
if not emergent_phenomenon:
|
||||
@ -31,13 +32,11 @@ def get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory):
|
||||
for u, v, weight in agent._position_graph.edges(data='weight'):
|
||||
agent._position_graph[u][v]['weight'] = edge_costs[f"{u}-{v}"]
|
||||
|
||||
"""for u, v, weight in agent._position_graph.edges(data='weight'):
|
||||
print(f"Edge ({u}-{v}) has weight: {weight}")"""
|
||||
|
||||
return agents
|
||||
|
||||
|
||||
def get_two_rooms_one_door_modified_tsp_agents(emergent_phenomenon, factory):
|
||||
def get_two_rooms_tsp_agents(emergent_phenomenon, factory):
|
||||
agents = [TSPTargetAgent(factory, 0), TSPTargetAgent(factory, 1)]
|
||||
if not emergent_phenomenon:
|
||||
print(emergent_phenomenon)
|
||||
@ -45,6 +44,7 @@ def get_two_rooms_one_door_modified_tsp_agents(emergent_phenomenon, factory):
|
||||
agent._position_graph[(3, 1)][(3, 2)]['weight'] = 4
|
||||
return agents
|
||||
|
||||
|
||||
def run_tsp_setting(config_name, emergent_phenomenon):
|
||||
# Render at each step?
|
||||
render = True
|
||||
@ -74,7 +74,7 @@ def run_tsp_setting(config_name, emergent_phenomenon):
|
||||
if config_name == "dirt_quadrant":
|
||||
agents = get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory)
|
||||
elif config_name == "two_rooms":
|
||||
agents = get_two_rooms_one_door_modified_tsp_agents(emergent_phenomenon, factory)
|
||||
agents = get_two_rooms_tsp_agents(emergent_phenomenon, factory)
|
||||
else:
|
||||
print("Config name does not exist. Abort...")
|
||||
break
|
||||
@ -95,7 +95,7 @@ def dirt_quadrant_multi_agent_tsp(emergent_phenomenon):
|
||||
run_tsp_setting("dirt_quadrant", emergent_phenomenon)
|
||||
|
||||
|
||||
def two_rooms_one_door_modified_multi_agent_tsp(emergent_phenomenon):
|
||||
def two_rooms_multi_agent_tsp(emergent_phenomenon):
|
||||
run_tsp_setting("two_rooms", emergent_phenomenon)
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user