Code cleaning part 2

This commit is contained in:
Julian Schönberger
2024-05-24 23:56:00 +02:00
parent 6e6ce9dc5d
commit 81f0f6e209
36 changed files with 421 additions and 495 deletions

View File

@@ -1,34 +0,0 @@
agent:
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
n_agents: 2
obs_emb_size: 96
action_emb_size: 16
hidden_size_actor: 64
hidden_size_critic: 64
use_agent_embedding: False
env:
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/dirt_quadrant_eval_config"
n_agents: 2
max_steps: 250
pomdp_r: 2
stack_n_frames: 0
individual_rewards: True
train_render: False
eval_render: True
save_and_log: True
record: False
method: marl_factory_grid.algorithms.marl.LoopSEAC
algorithm:
gamma: 0.99
entropy_coef: 0.01
vf_coef: 0.05
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
max_steps: 200000
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
pile-order: "dynamic" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
pile-observability: "single" # Options: "single", "all"
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
auxiliary_piles: False # Option that is only considered when pile-order = "agents"
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)

View File

@@ -1,35 +0,0 @@
agent:
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
n_agents: 2
obs_emb_size: 96
action_emb_size: 16
hidden_size_actor: 64
hidden_size_critic: 64
use_agent_embedding: False
env:
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/two_rooms_eval_config"
n_agents: 2
max_steps: 250
pomdp_r: 2
stack_n_frames: 0
individual_rewards: True
train_render: False
eval_render: True
save_and_log: True
record: False
method: marl_factory_grid.algorithms.marl.LoopSEAC
algorithm:
gamma: 0.99
entropy_coef: 0.01
vf_coef: 0.05
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
max_steps: 260000
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
auxiliary_piles: True # Use True to see emergent phenomenon and False to prevent it
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)

View File

@@ -1,44 +1,23 @@
import copy
import os
import random
import imageio # requires ffmpeg install on operating system and imageio-ffmpeg package for python
from scipy import signal
import matplotlib.pyplot as plt
import torch
from typing import Union, List, Dict
from typing import Union, List
import numpy as np
from torch.distributions import Categorical
from marl_factory_grid.algorithms.marl.base_a2c import PolicyGradient, cumulate_discount
from marl_factory_grid.algorithms.utils import add_env_props, instantiate_class
from pathlib import Path
from collections import deque
from marl_factory_grid.environment.actions import Noop
from marl_factory_grid.modules import Clean, DoorUse
from marl_factory_grid.algorithms.rl.base_a2c import PolicyGradient, cumulate_discount
from marl_factory_grid.algorithms.utils import add_env_props
from marl_factory_grid.utils.plotting.plot_single_runs import plot_action_maps
class Names:
REWARD = 'reward'
DONE = 'done'
ACTION = 'action'
OBSERVATION = 'observation'
LOGITS = 'logits'
HIDDEN_ACTOR = 'hidden_actor'
HIDDEN_CRITIC = 'hidden_critic'
AGENT = 'agent'
ENV = 'env'
ENV_NAME = 'env_name'
N_AGENTS = 'n_agents'
ALGORITHM = 'algorithm'
MAX_STEPS = 'max_steps'
N_STEPS = 'n_steps'
BUFFER_SIZE = 'buffer_size'
CRITIC = 'critic'
BATCH_SIZE = 'bnatch_size'
N_ACTIONS = 'n_actions'
TRAIN_RENDER = 'train_render'
EVAL_RENDER = 'eval_render'
@@ -55,7 +34,7 @@ class A2C:
self.train_cfg = train_cfg
self.eval_cfg = eval_cfg
self.cfg = train_cfg
self.n_agents = train_cfg[nms.AGENT][nms.N_AGENTS]
self.n_agents = train_cfg[nms.ENV][nms.N_AGENTS]
self.setup()
self.reward_development = []
self.action_probabilities = {agent_idx:[] for agent_idx in range(self.n_agents)}
@@ -80,8 +59,6 @@ class A2C:
os.mkdir(self.results_path)
# Save settings in results folder
self.save_configs()
if self.cfg[nms.ENV]["record"]:
self.recorder = imageio.get_writer(f'{self.results_path}/pygame_recording.mp4', fps=5)
def set_cfg(self, eval=False):
if eval:
@@ -610,8 +587,6 @@ class A2C:
obs = env.reset()
self.set_agent_spawnpoint(env)
if self.cfg[nms.ENV][nms.EVAL_RENDER]:
if self.cfg[nms.ENV]["save_and_log"] and self.cfg[nms.ENV]["record"]:
env.set_recorder(self.recorder)
if self.cfg[nms.ALGORITHM]["auxiliary_piles"]:
# Don't render auxiliary piles
auxiliary_piles = [pile for idx, pile in enumerate(env.state.entities['DirtPiles']) if idx % 2 == 0]
@@ -664,10 +639,6 @@ class A2C:
episode += 1
# Properly finalize the video file
if self.cfg[nms.ENV]["save_and_log"] and self.cfg[nms.ENV]["record"]:
self.recorder.close()
def plot_reward_development(self):
smoothed_data = np.convolve(self.reward_development, np.ones(10) / 10, mode='valid')
plt.plot(smoothed_data)
@@ -689,16 +660,14 @@ class A2C:
def save_agent_models(self):
for idx, agent in enumerate(self.agents):
agent_name = list(self.factory.state.agents_conf.keys())[idx]
agent.pi.save_model_parameters(self.results_path, agent_name)
agent.vf.save_model_parameters(self.results_path, agent_name)
agent.pi.save_model_parameters(self.results_path)
agent.vf.save_model_parameters(self.results_path)
def load_agents(self, runs_list):
for idx, run in enumerate(runs_list):
run_path = f"../study_out/{run}"
agent_name = list(self.eval_factory.state.agents_conf.keys())[idx]
self.agents[idx].pi.load_model_parameters(f"{run_path}/{agent_name}_PolicyNet_model_parameters.pth")
self.agents[idx].vf.load_model_parameters(f"{run_path}/{agent_name}_ValueNet_model_parameters.pth")
self.agents[idx].pi.load_model_parameters(f"{run_path}/PolicyNet_model_parameters.pth")
self.agents[idx].vf.load_model_parameters(f"{run_path}/ValueNet_model_parameters.pth")
def create_info_maps(self, env, used_actions):
# Create value map

View File

@@ -19,11 +19,11 @@ class Net(th.nn.Module):
if module.bias is not None:
nn.init.uniform_(module.bias, a=-0.1, b=0.1)
def save_model(self, path, agent_name):
th.save(self.net, f"{path}/{agent_name}_{self.__class__.__name__}_model.pth")
def save_model(self, path):
th.save(self.net, f"{path}/{self.__class__.__name__}_model.pth")
def save_model_parameters(self, path, agent_name):
th.save(self.net.state_dict(), f"{path}/{agent_name}_{self.__class__.__name__}_model_parameters.pth")
def save_model_parameters(self, path):
th.save(self.net.state_dict(), f"{path}/{self.__class__.__name__}_model_parameters.pth")
def load_model_parameters(self, path):
self.net.load_state_dict(th.load(path))

View File

@@ -0,0 +1,11 @@
env:
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/dirt_quadrant_eval_config"
n_agents: 2
eval_render: True
save_and_log: False
algorithm:
pile-order: "smart" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
pile-observability: "single" # Options: "single", "all"
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
auxiliary_piles: False # Dirt quadrant does not use this option

View File

@@ -0,0 +1,11 @@
env:
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/dirt_quadrant_eval_config"
n_agents: 2
eval_render: True
save_and_log: False
algorithm:
pile-order: "dynamic" # Use "dynamic" to see emergent phenomenon and "smart" to prevent it
pile-observability: "single" # Options: "single", "all"
pile_all_done: "shared" # Options: "single", "all" ("single" for training, "all" for eval), "shared"
auxiliary_piles: False # Dirt quadrant does not use this option

View File

@@ -0,0 +1,13 @@
env:
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/two_rooms_eval_config"
n_agents: 2
eval_render: True
save_and_log: False
algorithm:
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
auxiliary_piles: True # Use True to see emergent phenomenon and False to prevent it

View File

@@ -0,0 +1,13 @@
env:
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/two_rooms_eval_config_emergent"
n_agents: 2
eval_render: True
save_and_log: False
algorithm:
pile-order: "agents" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "distributed" # Options: "single", "all" ("single" for training, "all" and "distributed" for eval)
auxiliary_piles: False # Use True to see emergent phenomenon and False to prevent it

View File

@@ -0,0 +1,12 @@
env:
classname: marl_factory_grid.environment.configs.rl
env_name: "rl/dirt_quadrant_agent1_eval_config"
n_agents: 1
eval_render: True
save_and_log: False
algorithm:
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "all" #
auxiliary_piles: False # Dirt quadrant does not use this option

View File

@@ -1,34 +1,17 @@
agent:
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
n_agents: 1
obs_emb_size: 96
action_emb_size: 16
hidden_size_actor: 64
hidden_size_critic: 64
use_agent_embedding: False
env:
classname: marl_factory_grid.environment.configs.rl
env_name: "rl/dirt_quadrant_train_config"
env_name: "rl/dirt_quadrant_agent1_train_config"
n_agents: 1
max_steps: 250
pomdp_r: 2
stack_n_frames: 0
individual_rewards: True
train_render: False
eval_render: True
save_and_log: True
record: False
method: marl_factory_grid.algorithms.marl.LoopSEAC
algorithm:
gamma: 0.99
entropy_coef: 0.01
vf_coef: 0.05
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
max_steps: 240000
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
max_steps: 140000
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "single" # Options: "single", "all" ("single" for training, "all" for eval)
auxiliary_piles: False # Option that is only considered when pile-order = "agents"
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
auxiliary_piles: False # Dirt quadrant does not use this option

View File

@@ -0,0 +1,13 @@
env:
classname: marl_factory_grid.environment.configs.rl
env_name: "rl/two_rooms_eval_config"
n_agents: 1
eval_render: True
save_and_log: False
algorithm:
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "all" # Options: "single", "all" ("single" for training, "all" for eval)
auxiliary_piles: False # Auxiliary piles are only used during marl eval

View File

@@ -1,35 +1,17 @@
agent:
classname: marl_factory_grid.algorithms.marl.networks.RecurrentAC
n_agents: 1
obs_emb_size: 96
action_emb_size: 16
hidden_size_actor: 64
hidden_size_critic: 64
use_agent_embedding: False
env:
classname: marl_factory_grid.environment.configs.rl
env_name: "rl/two_rooms_train_config"
n_agents: 1
max_steps: 250
pomdp_r: 2
stack_n_frames: 0
individual_rewards: True
train_render: False
eval_render: True
save_and_log: False
record: False
method: marl_factory_grid.algorithms.marl.LoopSEAC
save_and_log: True
algorithm:
gamma: 0.99
entropy_coef: 0.01
vf_coef: 0.05
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
max_steps: 260000
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
pile-observability: "single" # Options: "single", "all"
pile_all_done: "single" # Options: "single", "all" ("single" for training, "all" for eval)
auxiliary_piles: False # Option that is only considered when pile-order = "agents"
chunk-episode: 20000 # Chunk size. (0 = update networks with full episode at once)
auxiliary_piles: False # Auxiliary piles are only used during marl eval

View File

@@ -6,7 +6,7 @@ from networkx.algorithms.approximation import traveling_salesman as tsp
import time
import copy
from marl_factory_grid.algorithms.static.utils import points_to_graph
from marl_factory_grid.algorithms.tsp.utils import points_to_graph
from marl_factory_grid.modules.doors import constants as do
from marl_factory_grid.environment import constants as c
from marl_factory_grid.utils.helpers import MOVEMAP

View File

@@ -1,4 +1,4 @@
from marl_factory_grid.algorithms.static.TSP_base_agent import TSPBaseAgent
from marl_factory_grid.algorithms.tsp.TSP_base_agent import TSPBaseAgent
from marl_factory_grid.modules.clean_up import constants as di
from marl_factory_grid.environment import constants as c

View File

@@ -1,4 +1,4 @@
from marl_factory_grid.algorithms.static.TSP_base_agent import TSPBaseAgent
from marl_factory_grid.algorithms.tsp.TSP_base_agent import TSPBaseAgent
from marl_factory_grid.modules.destinations import constants as d
from marl_factory_grid.modules.doors import constants as do

View File

@@ -64,13 +64,6 @@ def add_env_props(cfg):
factory = Factory(env_path)
_ = factory.reset()
# Agent Init
if len(factory.state.moving_entites) == 1: # Single agent setting
observation_size = list(factory.observation_space.shape)
else: # Multi-agent setting
observation_size = list(factory.observation_space[0].shape)
cfg['agent'].update(dict(observation_size=observation_size, n_actions=factory.action_space[0].n))
return factory

View File

@@ -5,18 +5,17 @@ General:
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# Radius of Partially observable Markov decision process
pomdp_r: 0 # default 3
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
# Define Agents, their actions, observations and spawnpoints
Agents:
# The clean agents
Sigmund:
Agent1:
Actions:
- Move4
- Noop
@@ -25,7 +24,7 @@ Agents:
- Self
Positions:
- (9,1)
Wolfgang:
Agent2:
Actions:
- Move4
- Noop
@@ -37,8 +36,8 @@ Agents:
Entities:
DirtPiles:
coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1)
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
@@ -46,7 +45,6 @@ Entities:
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
@@ -57,5 +55,3 @@ Rules:
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
#DoneAtMaxStepsReached:
#max_steps: 200

View File

@@ -1,20 +1,20 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
# Define Agents, their actions, observations and spawnpoints
Agents:
Sigmund:
Agent1:
Actions:
- Move4
- DoorUse
@@ -24,7 +24,7 @@ Agents:
- Self
Positions:
- (3,1)
Wolfgang:
Agent2:
Actions:
- Move4
- DoorUse
@@ -36,9 +36,10 @@ Agents:
- (3,13)
Entities:
# For RL-agent we model the flags as dirt piles to be more flexible
DirtPiles:
coords_or_quantity: (2,1), (3,12), (2,13), (3,2) # Static form: auxiliary pile, primary pile, auxiliary pile, ...
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
@@ -47,16 +48,13 @@ Entities:
Doors: { }
Rules:
# Environment Dynamics
#DoorAutoClose:
#close_frequency: 10
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Done Conditions
#DoneOnAllDirtCleaned:
# Define the conditions for the environment to stop. Either success or a fail conditions.
# Environment execution stops after 30 steps
DoneAtMaxStepsReached:
max_steps: 50
max_steps: 30

View File

@@ -1,20 +1,20 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
# Define Agents, their actions, observations and spawnpoints
Agents:
Sigmund:
Agent1:
Actions:
- Move4
- DoorUse
@@ -24,7 +24,7 @@ Agents:
- Self
Positions:
- (3,1)
Wolfgang:
Agent2:
Actions:
- Move4
- DoorUse
@@ -36,9 +36,10 @@ Agents:
- (3,13)
Entities:
# For RL-agent we model the flags as dirt piles to be more flexible
DirtPiles:
coords_or_quantity: (3,12), (3,2) # Static form: auxiliary pile, primary pile, auxiliary pile, ...
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
coords_or_quantity: (3,12), (3,2) # Locations of flags
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
@@ -47,16 +48,13 @@ Entities:
Doors: { }
Rules:
# Environment Dynamics
#DoorAutoClose:
#close_frequency: 10
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Done Conditions
#DoneOnAllDirtCleaned:
# Define the conditions for the environment to stop. Either success or a fail conditions
# Environment execution stops after 30 steps
DoneAtMaxStepsReached:
max_steps: 30

View File

@@ -0,0 +1,48 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# Define Agents, their actions, observations and spawnpoints
Agents:
# The clean agents
Agent1:
Actions:
- Move4
- Noop
Observations:
- DirtPiles
- Self
Positions:
- (9,1)
Entities:
DirtPiles:
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:

View File

@@ -5,61 +5,38 @@ General:
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# Radius of Partially observable Markov decision process
pomdp_r: 0 # default 3
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
# Define Agents, their actions, observations and spawnpoints
Agents:
# The clean agents
#Sigmund:
#Actions:
#- Move4
#Observations:
#- DirtPiles
#- Self
#Positions:
#- (9,1)
#- (1,1)
#- (2,4)
#- (4,7)
#- (6,8)
#- (7,9)
#- (2,4)
#- (4,7)
#- (6,8)
#- (7,9)
#- (9,9)
#- (9,1)
Wolfgang:
Agent1:
Actions:
- Move4
Observations:
- DirtPiles
- Self
Positions:
- (9,5)
- (9,1)
- (1,1)
- (2,4)
- (4,7)
- (6,8)
- (7,9)
- (2,4)
- (4,7)
- (6,8)
- (7,9)
- (9,9)
- (9,5)
- (9,1)
Entities:
DirtPiles:
coords_or_quantity: (1, 1), (2,4), (4,7), (6,8), (7,9), (9,9) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
@@ -67,7 +44,6 @@ Entities:
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
@@ -78,8 +54,6 @@ Rules:
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
#DoneAtMaxStepsReached: # An episode should last for at most max_steps steps
#max_steps: 1000
# Define how agents spawn.
# Options: "random" (Spawn agent at a random position from the list of defined positions)

View File

@@ -1,78 +0,0 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# Radius of Partially observable Markov decision process
pomdp_r: 0 # default 3
# Print all messages and events
verbose: false
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
Agents:
# The clean agents
#Sigmund:
#Actions:
#- Move4
#- Noop
#Observations:
#- DirtPiles
#- Self
#Positions:
#- (9,1)
#- (1,1)
#- (2,4)
#- (4,7)
#- (7,9)
#- (2,4)
#- (4,7)
#- (7,9)
#- (9,9)
#- (9,1)
Wolfgang:
Actions:
- Move4
Observations:
- DirtPiles
- Self
Positions:
- (9,5)
#- (1,1)
#- (2,4)
#- (4,7)
#- (7,9)
#- (2,4)
#- (4,7)
#- (7,9)
#- (9,9)
#- (9,5)
Entities:
DirtPiles:
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) #(9,9), (7,9), (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
#DoneAtMaxStepsReached:
#max_steps: 200

View File

@@ -0,0 +1,50 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# Define Agents, their actions, observations and spawnpoints
Agents:
Agent1:
Actions:
- Move4
- DoorUse
Observations:
- DirtPiles
- Self
Positions:
- (3,1)
- (2,1)
Entities:
DirtPiles:
coords_or_quantity: (2,1), (3,12) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
Doors: { }
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions
# Environment execution stops after 30 steps
DoneAtMaxStepsReached:
max_steps: 30

View File

@@ -0,0 +1,60 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# Define Agents, their actions, observations and spawnpoints
Agents:
Agent1:
Actions:
- Move4
Observations:
- DirtPiles
- Self
Positions:
- (3,1)
- (1,1)
- (3,1)
- (5,1)
- (3,1)
- (1,8)
- (3,1)
- (5,8)
Entities:
DirtPiles:
coords_or_quantity: (2,1), (3,12) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
#Doors: { } # We leave out the door during training
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
# Define how agents spawn.
# Options: "random" (Spawn agent at a random position from the list of defined positions)
# "first" (Always spawn agent at first position regardless of the other provided positions)
# "order" (Loop through agent positions)
AgentSpawnRule:
spawn_rule: "order"

View File

@@ -1,30 +1,20 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
# Define Agents, their actions, observations and spawnpoints
Agents:
#Sigmund:
#Actions:
#- Move4
#- DoorUse
#Observations:
#- DirtPiles
#- Self
#Positions:
#- (3,1)
#- (2,1)
Wolfgang:
Agent2:
Actions:
- Move4
- DoorUse
@@ -37,8 +27,8 @@ Agents:
Entities:
DirtPiles:
coords_or_quantity: (2,13), (3,2) # (2,1), (3,12)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
coords_or_quantity: (2,13), (3,2) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
@@ -46,17 +36,15 @@ Entities:
Doors: { }
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Environment Dynamics
#DoorAutoClose:
#close_frequency: 10
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Done Conditions
#DoneOnAllDirtCleaned:
# Define the conditions for the environment to stop. Either success or a fail conditions
# Environment execution stops after 30 steps
DoneAtMaxStepsReached:
max_steps: 50
max_steps: 30

View File

@@ -1,35 +1,20 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
# Define Agents, their actions, observations and spawnpoints
Agents:
#Sigmund:
#Actions:
#- Move4
#Observations:
#- DirtPiles
#- Self
#Positions:
#- (3,1)
#- (1,1)
#- (3,1)
#- (5,1)
#- (3,1)
#- (1,8)
#- (3,1)
#- (5,8)
Wolfgang:
Agent2:
Actions:
- Move4
Observations:
@@ -47,29 +32,30 @@ Agents:
Entities:
DirtPiles:
coords_or_quantity: (2,13), (3,2) # (2,1), (3,12)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
coords_or_quantity: (2,13), (3,2) # Locations of dirt piles
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
#Doors: { }
#Doors: { } # We leave out the door during training
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Environment Dynamics
#DoorAutoClose:
#close_frequency: 10
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
#DoneAtMaxStepsReached:
#max_steps: 100
# Defines how agents spawn.
# Options: "random" (Spawn agent at a random position from the list of defined positions)
# "first" (Always spawn agent at first position regardless of the other provided positions)
# "order" (Loop through agent positions)
AgentSpawnRule:
spawn_rule: "order"

View File

@@ -5,37 +5,34 @@ General:
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# Radius of Partially observable Markov decision process
pomdp_r: 0 # default 3
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
# Define Agents, their actions, observations and spawnpoints
Agents:
# The clean agents
Wolfgang:
Agent1:
Actions:
- Move4
- Clean
- Noop
Observations:
- Walls
- Other
- DirtPiles
- Self
Positions:
- (9,1)
Reiner:
Agent2:
Actions:
- Move4
- Clean
- Noop
Observations:
- Walls
- Other
- DirtPiles
- Self
Positions:
@@ -44,7 +41,7 @@ Agents:
Entities:
DirtPiles:
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
initial_amount: 0.5
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
@@ -63,5 +60,3 @@ Rules:
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
DoneAtMaxStepsReached:
max_steps: 200

View File

@@ -1,40 +1,38 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# View Radius
pomdp_r: 0 # 0 = full observability
# Print all messages and events
verbose: false
# Run tests
tests: false
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
# Define Agents, their actions, observations and spawnpoints
Agents:
Wolfgang:
Agent1:
Actions:
- Move4
- Noop
- DestAction
- DestAction # Action that is performed when the destination is reached
- DoorUse
Observations:
- Walls
- Other
- Doors
- Destination
Positions:
- (3,1) # Agent spawnpoint
Sigmund:
- (3,1)
Agent2:
Actions:
- Move4
- Noop
- DestAction
- DoorUse
Observations:
- Other
- Walls
- Destination
- Doors
@@ -45,10 +43,11 @@ Entities:
Destinations:
spawnrule:
SpawnDestinationsPerAgent:
# Target coordinates
coords_or_quantity:
Wolfgang:
- (3,12) # Target coordinates
Sigmund:
Agent1:
- (3,12)
Agent2:
- (3,2)
Doors: { }
@@ -68,10 +67,12 @@ Rules:
AssignGlobalPositions: { }
DoneAtDestinationReach:
reward_at_done: 1
reward_at_done: 50
# We want to give rewards only, when all targets have been reached.
condition: "all"
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions
# Environment execution stops after 30 steps
DoneAtMaxStepsReached:
max_steps: 50
max_steps: 30

View File

@@ -293,9 +293,6 @@ class Factory(gym.Env):
render_entity.aux = self.obs_builder.curr_lightmaps[render_entity.real_name]
return self._renderer.render(render_entities)
def set_recorder(self, recorder):
self._recorder = recorder
def summarize_header(self):
header = {'rec_step': self.state.curr_step}
for entity_group in (x for x in self.state if x.name in ['Walls', 'DropOffLocations', 'ChargePods']):

View File

@@ -3,7 +3,7 @@ from typing import List, Tuple
import numpy as np
from marl_factory_grid.algorithms.static.utils import points_to_graph
from marl_factory_grid.algorithms.tsp.utils import points_to_graph
from marl_factory_grid.environment import constants as c
from marl_factory_grid.environment.entity.entity import Entity
from marl_factory_grid.environment.rules import Rule, SpawnAgents