Reset tsp route caching + renamed and moved configs + removed unnecessary files

This commit is contained in:
Julian Schönberger
2024-05-24 16:12:05 +02:00
parent 98113ea849
commit c8336e8f78
144 changed files with 86 additions and 8056 deletions

View File

@ -1,19 +0,0 @@
# Required
version: 2
# Set the OS, Python version and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.12"
# Optional but recommended, declare the Python requirements required
# to build your documentation
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
python:
install:
- requirements: docs/requirements.txt
# Build documentation in the "docs/" directory with Sphinx
sphinx:
configuration: docs/source/conf.py

View File

@ -56,7 +56,7 @@ Refer to [quickstart](_quickstart) for specific scenarios.
The majority of environment objects, including entities, rules, and assets, can be loaded automatically.
Simply specify the requirements of your environment in a [
*yaml*-config file](marl_factory_grid/configs/default_config.yaml).
*yaml*-config file](marl_factory_grid/environment/configs/default_config.yaml).
If you only plan on using the environment without making any modifications, use ``quickstart_use``.
This creates a default config-file and another one that lists all possible options of the environment.

View File

@ -1,112 +0,0 @@
---
General:
level_name: large
env_seed: 69
verbose: !!bool False
pomdp_r: 3
individual_rewards: !!bool True
Entities:
Defaults: {}
DirtPiles:
initial_dirt_ratio: 0.01 # On INIT, on max how many tiles does the dirt spawn in percent.
dirt_spawn_r_var: 0.5 # How much does the dirt spawn amount vary?
initial_amount: 1
max_local_amount: 3 # Max dirt amount per tile.
max_global_amount: 30 # Max dirt amount in the whole environment.
Doors:
closed_on_init: True
auto_close_interval: 10
indicate_area: False
Batteries: {}
ChargePods: {}
Destinations: {}
ReachedDestinations: {}
Items: {}
Inventories: {}
DropOffLocations: {}
Agents:
Wolfgang:
Actions:
- Noop
- Noop
- Noop
- CleanUp
Observations:
- Self
- Placeholder
- Walls
- DirtPiles
- Placeholder
- Doors
- Doors
Bjoern:
Actions:
# Move4, Noop
- Move8
- DoorUse
- ItemAction
Observations:
- Defaults
- Combined:
- Other
- Walls
- Items
- Inventory
Karl-Heinz:
Actions:
- Move8
- DoorUse
Observations:
# Wall, Only Other Agents
- Defaults
- Combined:
- Other
- Self
- Walls
- Doors
- Destinations
Manfred:
Actions:
- Move8
- ItemAction
- DoorUse
- CleanUp
- DestAction
- BtryCharge
Observations:
- Defaults
- Battery
- Destinations
- DirtPiles
- Doors
- Items
- Inventory
- DropOffLocations
Rules:
Defaults: {}
Collision:
done_at_collisions: !!bool False
DirtRespawnRule:
spawn_freq: 15
DirtSmearOnMove:
smear_amount: 0.12
DoorAutoClose: {}
DirtAllCleanDone: {}
Btry: {}
BtryDoneAtDischarge: {}
DestinationReach: {}
DestinationSpawn: {}
DestinationDone: {}
ItemRules: {}
Assets:
- Defaults
- Dirt
- Door
- Machine
- Item
- Destination
- DropOffLocation
- Chargepod

View File

@ -1,189 +0,0 @@
import sys
from pathlib import Path
##############################################
# keep this for stand alone script execution #
##############################################
from environments.factory.base.base_factory import BaseFactory
from environments.logging.recorder import EnvRecorder
try:
# noinspection PyUnboundLocalVariable
if __package__ is None:
DIR = Path(__file__).resolve().parent
sys.path.insert(0, str(DIR.parent))
__package__ = DIR.name
else:
DIR = None
except NameError:
DIR = None
pass
##############################################
##############################################
##############################################
import simplejson
from environments import helpers as h
from environments.factory.additional.combined_factories import DestBatteryFactory
from environments.factory.additional.dest.factory_dest import DestFactory
from environments.factory.additional.dirt.factory_dirt import DirtFactory
from environments.factory.additional.item.factory_item import ItemFactory
from environments.helpers import ObservationTranslator, ActionTranslator
from environments.logging.envmonitor import EnvMonitor
from environments.utility_classes import ObservationProperties, AgentRenderOptions, MovementProperties
def policy_model_kwargs():
return dict(ent_coef=0.01)
def dqn_model_kwargs():
return dict(buffer_size=50000,
learning_starts=64,
batch_size=64,
target_update_interval=5000,
exploration_fraction=0.25,
exploration_final_eps=0.025
)
def encapsule_env_factory(env_fctry, env_kwrgs):
def _init():
with env_fctry(**env_kwrgs) as init_env:
return init_env
return _init
if __name__ == '__main__':
render = False
# Define Global Env Parameters
# Define properties object parameters
factory_kwargs = dict(
max_steps=400, parse_doors=True,
level_name='rooms',
doors_have_area=True, verbose=False,
mv_prop=MovementProperties(allow_diagonal_movement=True,
allow_square_movement=True,
allow_no_op=False),
obs_prop=ObservationProperties(
frames_to_stack=3,
cast_shadows=True,
omit_agent_self=True,
render_agents=AgentRenderOptions.LEVEL,
additional_agent_placeholder=None,
)
)
# Bundle both environments with global kwargs and parameters
# Todo: find a better solution, like outo module loading
env_map = {'DirtFactory': DirtFactory,
'ItemFactory': ItemFactory,
'DestFactory': DestFactory,
'DestBatteryFactory': DestBatteryFactory
}
env_names = list(env_map.keys())
# Put all your multi-seed agends in a single folder, we do not need specific names etc.
available_models = dict()
available_envs = dict()
available_runs_kwargs = dict()
available_runs_agents = dict()
max_seed = 0
# Define this folder
combinations_path = Path('combinations')
# Those are all differently trained combinations of mdoels, environment and parameters
for combination in (x for x in combinations_path.iterdir() if x.is_dir()):
# These are all the models for this specific combination
for model_run in (x for x in combination.iterdir() if x.is_dir()):
model_name, env_name = model_run.name.split('_')[:2]
if model_name not in available_models:
available_models[model_name] = h.MODEL_MAP[model_name]
if env_name not in available_envs:
available_envs[env_name] = env_map[env_name]
# Those are all available seeds
for seed_run in (x for x in model_run.iterdir() if x.is_dir()):
max_seed = max(int(seed_run.name.split('_')[0]), max_seed)
# Read the environment configuration from ROM
with next(seed_run.glob('env_params.json')).open('r') as f:
env_kwargs = simplejson.load(f)
available_runs_kwargs[seed_run.name] = env_kwargs
# Read the trained model_path from ROM
model_path = next(seed_run.glob('model.zip'))
available_runs_agents[seed_run.name] = model_path
# We start by combining all SAME MODEL CLASSES per available Seed, across ALL available ENVIRONMENTS.
for model_name, model_cls in available_models.items():
for seed in range(max_seed):
combined_env_kwargs = dict()
model_paths = list()
comparable_runs = {key: val for key, val in available_runs_kwargs.items() if (
key.startswith(str(seed)) and model_name in key and key != 'key')
}
for name, run_kwargs in comparable_runs.items():
# Select trained agent as a candidate:
model_paths.append(available_runs_agents[name])
# Sort Env Kwars:
for key, val in run_kwargs.items():
if key not in combined_env_kwargs:
combined_env_kwargs.update(dict(key=val))
else:
assert combined_env_kwargs[key] == val, "Check the combinations you try to make!"
# Update and combine all kwargs to account for multiple agent etc.
# We cannot capture all configuration cases!
for key, val in factory_kwargs.items():
if key not in combined_env_kwargs:
combined_env_kwargs[key] = val
else:
assert combined_env_kwargs[key] == val
del combined_env_kwargs['key']
combined_env_kwargs.update(n_agents=len(comparable_runs))
with type("CombinedEnv", tuple(available_envs.values()), {})(**combined_env_kwargs) as combEnv:
# EnvMonitor Init
comb = f'comb_{model_name}_{seed}'
comb_monitor_path = combinations_path / comb / f'{comb}_monitor.pick'
comb_recorder_path = combinations_path / comb / f'{comb}_recorder.json'
comb_monitor_path.parent.mkdir(parents=True, exist_ok=True)
monitoredCombEnv = EnvMonitor(combEnv, filepath=comb_monitor_path)
monitoredCombEnv = EnvRecorder(monitoredCombEnv, filepath=comb_recorder_path, freq=1)
# Evaluation starts here #####################################################
# Load all models
loaded_models = [available_models[model_name].load(model_path) for model_path in model_paths]
obs_translators = ObservationTranslator(
monitoredCombEnv.named_observation_space,
*[agent.named_observation_space for agent in loaded_models],
placeholder_fill_value='n')
act_translators = ActionTranslator(
monitoredCombEnv.named_action_space,
*(agent.named_action_space for agent in loaded_models)
)
for episode in range(1):
obs = monitoredCombEnv.reset()
if render: monitoredCombEnv.render()
rew, done_bool = 0, False
while not done_bool:
actions = []
for i, model in enumerate(loaded_models):
pred = model.predict(obs_translators.translate_observation(i, obs[i]))[0]
actions.append(act_translators.translate_action(i, pred))
obs, step_r, done_bool, info_obj = monitoredCombEnv.step(actions)
rew += step_r
if render: monitoredCombEnv.render()
if done_bool:
break
print(f'Factory run {episode} done, reward is:\n {rew}')
# Eval monitor outputs are automatically stored by the monitor object
# TODO: Plotting
monitoredCombEnv.save_records()
monitoredCombEnv.save_run()
pass

View File

@ -1,203 +0,0 @@
import sys
import time
from pathlib import Path
import simplejson
import stable_baselines3 as sb3
# This is needed, when you put this file in a subfolder.
try:
# noinspection PyUnboundLocalVariable
if __package__ is None:
DIR = Path(__file__).resolve().parent
sys.path.insert(0, str(DIR.parent))
__package__ = DIR.name
else:
DIR = None
except NameError:
DIR = None
pass
from environments import helpers as h
from environments.factory.additional.dest.dest_util import DestModeOptions, DestProperties
from environments.factory.additional.btry.btry_util import BatteryProperties
from environments.logging.envmonitor import EnvMonitor
from environments.logging.recorder import EnvRecorder
from environments.factory.additional.combined_factories import DestBatteryFactory
from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
from plotting.compare_runs import compare_seed_runs
"""
Welcome to this quick start file. Here we will see how to:
0. Setup I/O Paths
1. Setup parameters for the environments (dirt-factory).
2. Setup parameters for the agent training (SB3: PPO) and save metrics.
Run the training.
3. Save environment and agent for later analysis.
4. Load the agent from drive
5. Rendering the environment with a run of the trained agent.
6. Plot metrics
"""
if __name__ == '__main__':
#########################################################
# 0. Setup I/O Paths
# Define some general parameters
train_steps = 1e6
n_seeds = 3
model_class = sb3.PPO
env_class = DestBatteryFactory
env_params_json = 'env_params.json'
# Define a global studi save path
start_time = int(time.time())
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
# Create an _identifier, which is unique for every combination and easy to read in filesystem
identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}'
exp_path = study_root_path / identifier
#########################################################
# 1. Setup parameters for the environments (dirt-factory).
# Define property object parameters.
# 'ObservationProperties' are for specifying how the agent sees the environment.
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all
omit_agent_self=True, # This is default
additional_agent_placeholder=None, # We will not take care of future agent
frames_to_stack=3, # To give the agent a notion of time
pomdp_r=2 # the agent view-radius
)
# 'MovementProperties' are for specifying how the agent is allowed to move in the environment.
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
allow_square_movement=True, # Manhattan (edges)
allow_no_op=False) # Pause movement (do nothing)
# 'DirtProperties' control if and how dirt is spawned
# TODO: Comments
dest_props = DestProperties(
n_dests = 2, # How many destinations are there
dwell_time = 0, # How long does the agent need to "wait" on a destination
spawn_frequency = 0,
spawn_in_other_zone = True, #
spawn_mode = DestModeOptions.DONE,
)
btry_props = BatteryProperties(
initial_charge = 0.9, #
charge_rate = 0.4, #
charge_locations = 3, #
per_action_costs = 0.01,
done_when_discharged = True,
multi_charge = False,
)
# These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
# TODO: Comments
factory_kwargs = dict(n_agents=1,
max_steps=400,
parse_doors=True,
level_name='rooms',
doors_have_area=True, #
verbose=False,
mv_prop=move_props, # See Above
obs_prop=obs_props, # See Above
done_at_collision=True,
dest_prop=dest_props,
btry_prop=btry_props
)
#########################################################
# 2. Setup parameters for the agent training (SB3: PPO) and save metrics.
agent_kwargs = dict()
#########################################################
# Run the Training
for seed in range(n_seeds):
# Make a copy if you want to alter things in the training loop; like the seed.
env_kwargs = factory_kwargs.copy()
env_kwargs.update(env_seed=seed)
# Output folder
seed_path = exp_path / f'{str(seed)}_{identifier}'
seed_path.mkdir(parents=True, exist_ok=True)
# Parameter Storage
param_path = seed_path / env_params_json
# Observation (measures) Storage
monitor_path = seed_path / 'monitor.pick'
recorder_path = seed_path / 'recorder.json'
# Model save Path for the trained model
model_save_path = seed_path / f'model.zip'
# Env Init & Model kwargs definition
with env_class(**env_kwargs) as env_factory:
# EnvMonitor Init
env_monitor_callback = EnvMonitor(env_factory)
# EnvRecorder Init
env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10))
# Model Init
model = model_class("MlpPolicy", env_factory, verbose=1, seed=seed, device='cpu')
# Model train
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
#########################################################
# 3. Save environment and agent for later analysis.
# Save the trained Model, the monitor (environment measures) and the environment parameters
model.named_observation_space = env_factory.named_observation_space
model.named_action_space = env_factory.named_action_space
model.save(model_save_path)
env_factory.save_params(param_path)
env_monitor_callback.save_run(monitor_path)
env_recorder_callback.save_records(recorder_path, save_occupation_map=False)
# Compare performance runs, for each seed within a model
try:
compare_seed_runs(exp_path, use_tex=False)
except ValueError:
pass
# Train ends here ############################################################
# Evaluation starts here #####################################################
# First Iterate over every model and monitor "as trained"
print('Start Measurement Tracking')
# For trained policy in study_root_path / _identifier
for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]:
# retrieve model class
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
# Load the agent agent
model = model_cls.load(policy_path / 'model.zip', device='cpu')
# Load old environment kwargs
with next(policy_path.glob(env_params_json)).open('r') as f:
env_kwargs = simplejson.load(f)
# Make the environment stop ar collisions
# (you only want to have a single collision per episode hence the statistics)
env_kwargs.update(done_at_collision=True)
# Init Env
with env_class(**env_kwargs) as env_factory:
monitored_env_factory = EnvMonitor(env_factory)
# Evaluation Loop for i in range(n Episodes)
for episode in range(100):
# noinspection PyRedeclaration
env_state = monitored_env_factory.reset()
rew, done_bool = 0, False
while not done_bool:
action = model.predict(env_state, deterministic=True)[0]
env_state, step_r, done_bool, info_obj = monitored_env_factory.step(action)
rew += step_r
if done_bool:
break
print(f'Factory run {episode} done, reward is:\n {rew}')
monitored_env_factory.save_run(filepath=policy_path / 'eval_run_monitor.pick')
print('Measurements Done')

View File

@ -1,193 +0,0 @@
import sys
import time
from pathlib import Path
import simplejson
import stable_baselines3 as sb3
# This is needed, when you put this file in a subfolder.
try:
# noinspection PyUnboundLocalVariable
if __package__ is None:
DIR = Path(__file__).resolve().parent
sys.path.insert(0, str(DIR.parent))
__package__ = DIR.name
else:
DIR = None
except NameError:
DIR = None
pass
from environments import helpers as h
from environments.factory.additional.dest.dest_util import DestModeOptions, DestProperties
from environments.logging.envmonitor import EnvMonitor
from environments.logging.recorder import EnvRecorder
from environments.factory.additional.dest.factory_dest import DestFactory
from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
from plotting.compare_runs import compare_seed_runs
"""
Welcome to this quick start file. Here we will see how to:
0. Setup I/O Paths
1. Setup parameters for the environments (dest-factory).
2. Setup parameters for the agent training (SB3: PPO) and save metrics.
Run the training.
3. Save environment and agent for later analysis.
4. Load the agent from drive
5. Rendering the environment with a run of the trained agent.
6. Plot metrics
"""
if __name__ == '__main__':
#########################################################
# 0. Setup I/O Paths
# Define some general parameters
train_steps = 1e6
n_seeds = 3
model_class = sb3.PPO
env_class = DestFactory
env_params_json = 'env_params.json'
# Define a global studi save path
start_time = int(time.time())
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
# Create an _identifier, which is unique for every combination and easy to read in filesystem
identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}'
exp_path = study_root_path / identifier
#########################################################
# 1. Setup parameters for the environments (dest-factory).
# Define property object parameters.
# 'ObservationProperties' are for specifying how the agent sees the environment.
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all
omit_agent_self=True, # This is default
additional_agent_placeholder=None, # We will not take care of future agent
frames_to_stack=3, # To give the agent a notion of time
pomdp_r=2 # the agent view-radius
)
# 'MovementProperties' are for specifying how the agent is allowed to move in the environment.
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
allow_square_movement=True, # Manhattan (edges)
allow_no_op=False) # Pause movement (do nothing)
# 'DestProperties' control if and how dest is spawned
# TODO: Comments
dest_props = DestProperties(
n_dests = 2, # How many destinations are there
dwell_time = 0, # How long does the agent need to "wait" on a destination
spawn_frequency = 0,
spawn_in_other_zone = True, #
spawn_mode = DestModeOptions.DONE,
)
# These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
# TODO: Comments
factory_kwargs = dict(n_agents=1,
max_steps=400,
parse_doors=True,
level_name='rooms',
doors_have_area=True, #
verbose=False,
mv_prop=move_props, # See Above
obs_prop=obs_props, # See Above
done_at_collision=True,
dest_prop=dest_props
)
#########################################################
# 2. Setup parameters for the agent training (SB3: PPO) and save metrics.
agent_kwargs = dict()
#########################################################
# Run the Training
for seed in range(n_seeds):
# Make a copy if you want to alter things in the training loop; like the seed.
env_kwargs = factory_kwargs.copy()
env_kwargs.update(env_seed=seed)
# Output folder
seed_path = exp_path / f'{str(seed)}_{identifier}'
seed_path.mkdir(parents=True, exist_ok=True)
# Parameter Storage
param_path = seed_path / env_params_json
# Observation (measures) Storage
monitor_path = seed_path / 'monitor.pick'
recorder_path = seed_path / 'recorder.json'
# Model save Path for the trained model
model_save_path = seed_path / f'model.zip'
# Env Init & Model kwargs definition
with env_class(**env_kwargs) as env_factory:
# EnvMonitor Init
env_monitor_callback = EnvMonitor(env_factory)
# EnvRecorder Init
env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10))
# Model Init
model = model_class("MlpPolicy", env_factory,verbose=1, seed=seed, device='cpu')
# Model train
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
#########################################################
# 3. Save environment and agent for later analysis.
# Save the trained Model, the monitor (environment measures) and the environment parameters
model.named_observation_space = env_factory.named_observation_space
model.named_action_space = env_factory.named_action_space
model.save(model_save_path)
env_factory.save_params(param_path)
env_monitor_callback.save_run(monitor_path)
env_recorder_callback.save_records(recorder_path, save_occupation_map=False)
# Compare performance runs, for each seed within a model
try:
compare_seed_runs(exp_path, use_tex=False)
except ValueError:
pass
# Train ends here ############################################################
# Evaluation starts here #####################################################
# First Iterate over every model and monitor "as trained"
print('Start Measurement Tracking')
# For trained policy in study_root_path / _identifier
for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]:
# retrieve model class
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
# Load the agent agent
model = model_cls.load(policy_path / 'model.zip', device='cpu')
# Load old environment kwargs
with next(policy_path.glob(env_params_json)).open('r') as f:
env_kwargs = simplejson.load(f)
# Make the environment stop ar collisions
# (you only want to have a single collision per episode hence the statistics)
env_kwargs.update(done_at_collision=True)
# Init Env
with env_class(**env_kwargs) as env_factory:
monitored_env_factory = EnvMonitor(env_factory)
# Evaluation Loop for i in range(n Episodes)
for episode in range(100):
# noinspection PyRedeclaration
env_state = monitored_env_factory.reset()
rew, done_bool = 0, False
while not done_bool:
action = model.predict(env_state, deterministic=True)[0]
env_state, step_r, done_bool, info_obj = monitored_env_factory.step(action)
rew += step_r
if done_bool:
break
print(f'Factory run {episode} done, reward is:\n {rew}')
monitored_env_factory.save_run(filepath=policy_path / 'eval_run_monitor.pick')
print('Measurements Done')

View File

@ -1,195 +0,0 @@
import sys
import time
from pathlib import Path
import simplejson
import stable_baselines3 as sb3
# This is needed, when you put this file in a subfolder.
try:
# noinspection PyUnboundLocalVariable
if __package__ is None:
DIR = Path(__file__).resolve().parent
sys.path.insert(0, str(DIR.parent))
__package__ = DIR.name
else:
DIR = None
except NameError:
DIR = None
pass
from environments import helpers as h
from environments.logging.envmonitor import EnvMonitor
from environments.logging.recorder import EnvRecorder
from environments.factory.additional.dirt.dirt_util import DirtProperties
from environments.factory.additional.dirt.factory_dirt import DirtFactory
from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
from plotting.compare_runs import compare_seed_runs
"""
Welcome to this quick start file. Here we will see how to:
0. Setup I/O Paths
1. Setup parameters for the environments (dirt-factory).
2. Setup parameters for the agent training (SB3: PPO) and save metrics.
Run the training.
3. Save environment and agent for later analysis.
4. Load the agent from drive
5. Rendering the environment with a run of the trained agent.
6. Plot metrics
"""
if __name__ == '__main__':
#########################################################
# 0. Setup I/O Paths
# Define some general parameters
train_steps = 1e6
n_seeds = 3
model_class = sb3.PPO
env_class = DirtFactory
env_params_json = 'env_params.json'
# Define a global studi save path
start_time = int(time.time())
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
# Create an _identifier, which is unique for every combination and easy to read in filesystem
identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}'
exp_path = study_root_path / identifier
#########################################################
# 1. Setup parameters for the environments (dirt-factory).
# Define property object parameters.
# 'ObservationProperties' are for specifying how the agent sees the environment.
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all
omit_agent_self=True, # This is default
additional_agent_placeholder=None, # We will not take care of future agent
frames_to_stack=3, # To give the agent a notion of time
pomdp_r=2 # the agent' view-radius
)
# 'MovementProperties' are for specifying how the agent is allowed to move in the environment.
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
allow_square_movement=True, # Manhattan (edges)
allow_no_op=False) # Pause movement (do nothing)
# 'DirtProperties' control if and how dirt is spawned
# TODO: Comments
dirt_props = DirtProperties(initial_dirt_ratio=0.35,
initial_dirt_spawn_r_var=0.1,
clean_amount=0.34,
max_spawn_amount=0.1,
max_global_amount=20,
max_local_amount=1,
spawn_frequency=0,
max_spawn_ratio=0.05,
dirt_smear_amount=0.0)
# These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
# TODO: Comments
factory_kwargs = dict(n_agents=1,
max_steps=400,
parse_doors=True,
level_name='rooms',
doors_have_area=True, #
verbose=False,
mv_prop=move_props, # See Above
obs_prop=obs_props, # See Above
done_at_collision=True,
dirt_prop=dirt_props
)
#########################################################
# 2. Setup parameters for the agent training (SB3: PPO) and save metrics.
agent_kwargs = dict()
#########################################################
# Run the Training
for seed in range(n_seeds):
# Make a copy if you want to alter things in the training loop; like the seed.
env_kwargs = factory_kwargs.copy()
env_kwargs.update(env_seed=seed)
# Output folder
seed_path = exp_path / f'{str(seed)}_{identifier}'
seed_path.mkdir(parents=True, exist_ok=True)
# Parameter Storage
param_path = seed_path / env_params_json
# Observation (measures) Storage
monitor_path = seed_path / 'monitor.pick'
recorder_path = seed_path / 'recorder.json'
# Model save Path for the trained model
model_save_path = seed_path / f'model.zip'
# Env Init & Model kwargs definition
with env_class(**env_kwargs) as env_factory:
# EnvMonitor Init
env_monitor_callback = EnvMonitor(env_factory)
# EnvRecorder Init
env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10))
# Model Init
model = model_class("MlpPolicy", env_factory, verbose=1, seed=seed, device='cpu')
# Model train
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
#########################################################
# 3. Save environment and agent for later analysis.
# Save the trained Model, the monitor (environment measures) and the environment parameters
model.named_observation_space = env_factory.named_observation_space
model.named_action_space = env_factory.named_action_space
model.save(model_save_path)
env_factory.save_params(param_path)
env_monitor_callback.save_run(monitor_path)
env_recorder_callback.save_records(recorder_path, save_occupation_map=False)
# Compare performance runs, for each seed within a model
try:
compare_seed_runs(exp_path, use_tex=False)
except ValueError:
pass
# Train ends here ############################################################
# Evaluation starts here #####################################################
# First Iterate over every model and monitor "as trained"
print('Start Measurement Tracking')
# For trained policy in study_root_path / _identifier
for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]:
# retrieve model class
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
# Load the agent
model = model_cls.load(policy_path / 'model.zip', device='cpu')
# Load old environment kwargs
with next(policy_path.glob(env_params_json)).open('r') as f:
env_kwargs = simplejson.load(f)
# Make the environment stop ar collisions
# (you only want to have a single collision per episode hence the statistics)
env_kwargs.update(done_at_collision=True)
# Init Env
with env_class(**env_kwargs) as env_factory:
monitored_env_factory = EnvMonitor(env_factory)
# Evaluation Loop for i in range(n Episodes)
for episode in range(100):
# noinspection PyRedeclaration
env_state = monitored_env_factory.reset()
rew, done_bool = 0, False
while not done_bool:
action = model.predict(env_state, deterministic=True)[0]
env_state, step_r, done_bool, info_obj = monitored_env_factory.step(action)
rew += step_r
if done_bool:
break
print(f'Factory run {episode} done, reward is:\n {rew}')
monitored_env_factory.save_run(filepath=policy_path / 'eval_run_monitor.pick')
print('Measurements Done')

View File

@ -1,191 +0,0 @@
import sys
import time
from pathlib import Path
import simplejson
import stable_baselines3 as sb3
# This is needed, when you put this file in a subfolder.
try:
# noinspection PyUnboundLocalVariable
if __package__ is None:
DIR = Path(__file__).resolve().parent
sys.path.insert(0, str(DIR.parent))
__package__ = DIR.name
else:
DIR = None
except NameError:
DIR = None
pass
from environments import helpers as h
from environments.factory.additional.item.factory_item import ItemFactory
from environments.factory.additional.item.item_util import ItemProperties
from environments.logging.envmonitor import EnvMonitor
from environments.logging.recorder import EnvRecorder
from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
from plotting.compare_runs import compare_seed_runs
"""
Welcome to this quick start file. Here we will see how to:
0. Setup I/O Paths
1. Setup parameters for the environments (item-factory).
2. Setup parameters for the agent training (SB3: PPO) and save metrics.
Run the training.
3. Save environment and agent for later analysis.
4. Load the agent from drive
5. Rendering the environment with a run of the trained agent.
6. Plot metrics
"""
if __name__ == '__main__':
#########################################################
# 0. Setup I/O Paths
# Define some general parameters
train_steps = 1e6
n_seeds = 3
model_class = sb3.PPO
env_class = ItemFactory
env_params_json = 'env_params.json'
# Define a global studi save path
start_time = int(time.time())
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
# Create an _identifier, which is unique for every combination and easy to read in filesystem
identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}'
exp_path = study_root_path / identifier
#########################################################
# 1. Setup parameters for the environments (item-factory).
#
# Define property object parameters.
# 'ObservationProperties' are for specifying how the agent sees the environment.
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all
omit_agent_self=True, # This is default
additional_agent_placeholder=None, # We will not take care of future agent
frames_to_stack=3, # To give the agent a notion of time
pomdp_r=2 # the agent view-radius
)
# 'MovementProperties' are for specifying how the agent is allowed to move in the environment.
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
allow_square_movement=True, # Manhattan (edges)
allow_no_op=False) # Pause movement (do nothing)
# 'ItemProperties' control if and how item is spawned
# TODO: Comments
item_props = ItemProperties(
n_items = 7, # How many items are there at the same time
spawn_frequency = 50, # Spawn Frequency in Steps
n_drop_off_locations = 10, # How many DropOff locations are there at the same time
max_dropoff_storage_size = 0, # How many items are needed until the dropoff is full
max_agent_inventory_capacity = 5, # How many items are needed until the agent inventory is full)
)
# These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
# TODO: Comments
factory_kwargs = dict(n_agents=1,
max_steps=400,
parse_doors=True,
level_name='rooms',
doors_have_area=True, #
verbose=False,
mv_prop=move_props, # See Above
obs_prop=obs_props, # See Above
done_at_collision=True,
item_prop=item_props
)
#########################################################
# 2. Setup parameters for the agent training (SB3: PPO) and save metrics.
agent_kwargs = dict()
#########################################################
# Run the Training
for seed in range(n_seeds):
# Make a copy if you want to alter things in the training loop; like the seed.
env_kwargs = factory_kwargs.copy()
env_kwargs.update(env_seed=seed)
# Output folder
seed_path = exp_path / f'{str(seed)}_{identifier}'
seed_path.mkdir(parents=True, exist_ok=True)
# Parameter Storage
param_path = seed_path / env_params_json
# Observation (measures) Storage
monitor_path = seed_path / 'monitor.pick'
recorder_path = seed_path / 'recorder.json'
# Model save Path for the trained model
model_save_path = seed_path / f'model.zip'
# Env Init & Model kwargs definition
with ItemFactory(**env_kwargs) as env_factory:
# EnvMonitor Init
env_monitor_callback = EnvMonitor(env_factory)
# EnvRecorder Init
env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10))
# Model Init
model = model_class("MlpPolicy", env_factory,verbose=1, seed=seed, device='cpu')
# Model train
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
#########################################################
# 3. Save environment and agent for later analysis.
# Save the trained Model, the monitor (environment measures) and the environment parameters
model.named_observation_space = env_factory.named_observation_space
model.named_action_space = env_factory.named_action_space
model.save(model_save_path)
env_factory.save_params(param_path)
env_monitor_callback.save_run(monitor_path)
env_recorder_callback.save_records(recorder_path, save_occupation_map=False)
# Compare performance runs, for each seed within a model
try:
compare_seed_runs(exp_path, use_tex=False)
except ValueError:
pass
# Train ends here ############################################################
# Evaluation starts here #####################################################
# First Iterate over every model and monitor "as trained"
print('Start Measurement Tracking')
# For trained policy in study_root_path / _identifier
for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]:
# retrieve model class
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
# Load the agent agent
model = model_cls.load(policy_path / 'model.zip', device='cpu')
# Load old environment kwargs
with next(policy_path.glob(env_params_json)).open('r') as f:
env_kwargs = simplejson.load(f)
# Make the environment stop ar collisions
# (you only want to have a single collision per episode hence the statistics)
env_kwargs.update(done_at_collision=True)
# Init Env
with ItemFactory(**env_kwargs) as env_factory:
monitored_env_factory = EnvMonitor(env_factory)
# Evaluation Loop for i in range(n Episodes)
for episode in range(100):
# noinspection PyRedeclaration
env_state = monitored_env_factory.reset()
rew, done_bool = 0, False
while not done_bool:
action = model.predict(env_state, deterministic=True)[0]
env_state, step_r, done_bool, info_obj = monitored_env_factory.step(action)
rew += step_r
if done_bool:
break
print(f'Factory run {episode} done, reward is:\n {rew}')
monitored_env_factory.save_run(filepath=policy_path / 'eval_run_monitor.pick')
print('Measurements Done')

View File

@ -1,25 +0,0 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
buildapi:
sphinx-apidoc.exe -fEM -T -t _templates -o source/source ../marl_factory_grid "../**/marl", "../**/proto"
@echo "Auto-generation of 'SOURCEAPI' documentation finished. " \
"The generated files were placed in 'source/'"
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

View File

@ -1,35 +0,0 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

View File

@ -1,4 +0,0 @@
myst_parser
sphinx-pdj-theme
sphinx-mdinclude
sphinx-book-theme

View File

@ -1,72 +0,0 @@
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
project = 'marl-factory-grid'
copyright = '2023, Steffen Illium, Robert Mueller, Joel Friedrich'
author = 'Steffen Illium, Robert Mueller, Joel Friedrich'
release = '2.5.0'
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
extensions = [#'myst_parser',
'sphinx.ext.todo',
'sphinx.ext.autodoc',
'sphinx.ext.intersphinx',
# 'sphinx.ext.autosummary',
'sphinx.ext.linkcode',
'sphinx_mdinclude',
]
templates_path = ['_templates']
exclude_patterns = ['marl_factory_grid.utils.proto', 'marl_factory_grid.utils.proto.fiksProto_pb2*']
autoclass_content = 'both'
autodoc_class_signature = 'separated'
autodoc_typehints = 'description'
autodoc_inherit_docstrings = True
autodoc_typehints_format = 'short'
autodoc_default_options = {
'members': True,
# 'member-order': 'bysource',
'special-members': '__init__',
'undoc-members': True,
# 'exclude-members': '__weakref__',
'show-inheritance': True,
}
autosummary_generate = True
add_module_names = False
toc_object_entries = False
modindex_common_prefix = ['marl_factory_grid.']
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here.
from pathlib import Path
import sys
sys.path.insert(0, (Path(__file__).parents[2]).resolve().as_posix())
sys.path.insert(0, (Path(__file__).parents[2] / 'marl_factory_grid').resolve().as_posix())
# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
html_theme = "sphinx_book_theme" # 'alabaster'
# html_static_path = ['_static']
# In your configuration, you need to specify a linkcode_resolve function that returns an URL based on the object.
# https://www.sphinx-doc.org/en/master/usage/extensions/linkcode.html
def linkcode_resolve(domain, info):
if domain in ['py', '__init__.py']:
return None
if not info['module']:
return None
filename = info['module'].replace('.', '/')
return "https://github.com/illiumst/marl-factory-grid/%s.py" % filename
print(sys.executable)

View File

@ -1,99 +0,0 @@
Creating a New Scenario
=======================
Creating a new scenario in the `marl-factory-grid` environment allows you to customize the environment to fit your specific requirements. This guide provides step-by-step instructions on how to create a new scenario, including defining a configuration file, designing a level, and potentially adding new entities, rules, and assets. See the "modifications.rst" file for more information on how to modify existing entities, levels, rules, groups and assets.
Step 1: Define Configuration File
-----------------
1. **Create a Configuration File:** Start by creating a new configuration file (`.yaml`) for your scenario. This file will contain settings such as the number of agents, environment dimensions, and other parameters. You can use existing configuration files as templates.
2. **Specify Custom Parameters:** Modify the configuration file to include any custom parameters specific to your scenario. For example, you can set the respawn rate of entities or define specific rewards.
Step 2: Design the Level
-----------------
1. **Create a Level File:** Design the layout of your environment by creating a new level file (`.txt`). Use symbols such as `#` for walls, `-` for walkable floors, and introduce new symbols for custom entities.
2. **Define Entity Locations:** Specify the initial locations of entities, including agents and any new entities introduced in your scenario. These spawn locations are typically provided in the conf file.
Step 3: Introduce New Entities
-----------------
1. **Create New Entity Modules:** If your scenario involves introducing new entities, create new entity modules in the `marl_factory_grid/environment/entity` directory. Define their behavior, properties, and any custom actions they can perform. Check out the template module.
2. **Update Configuration:** Update the configuration file to include settings related to your new entities, such as spawn rates, initial quantities, or any specific behaviors.
Step 4: Implement Custom Rules
-----------------
1. **Create Rule Modules:** If your scenario requires custom rules, create new rule modules in the `marl_factory_grid/environment/rules` directory. Implement the necessary logic to govern the behavior of entities in your scenario and use the provided environment hooks.
2. **Update Configuration:** If your custom rules have configurable parameters, update the configuration file to include these settings and activate the rule by adding it to the conf file.
Step 5: Add Custom Assets (Optional)
-----------------
1. **Include Custom Asset Files:** If your scenario introduces new assets (e.g., images for entities), include the necessary asset files in the appropriate directories, such as `marl_factory_grid/environment/assets`.
Step 6: Test and Experiment
-----------------
1. **Run Your Scenario:** Use the provided scripts or write your own script to run the scenario with your customized configuration. Observe the behavior of agents and entities in the environment.
2. **Iterate and Experiment:** Adjust configuration parameters, level design, or introduce new elements based on your observations. Iterate through this process until your scenario meets your desired specifications.
Congratulations! You have successfully created a new scenario in the `marl-factory-grid` environment. Experiment with different configurations, levels, entities, and rules to design unique and engaging environments for your simulations. Below you find an example of how to create a new scenario.
New Example Scenario: Apple Resource Dilemma
-----------------
To provide you with an example, we'll guide you through creating the "Apple Resource Dilemma" scenario using the steps outlined in the tutorial.
In this example scenario, agents face a dilemma of collecting apples. The apples only spawn if there are already enough in the environment. If agents collect them at the beginning, they won't respawn as quickly as if they wait for more to spawn before collecting.
**Step 1: Define Configuration File**
1. **Create a Configuration File:** Start by creating a new configuration file, e.g., `apple_dilemma_config.yaml`. Use the default config file as a good starting point.
2. **Specify Custom Parameters:** Add custom parameters to control the behavior of your scenario. Also delete unused entities, actions and observations from the default config file such as dirt piles.
**Step 2: Design the Level**
1. Create a Level File: Design the layout of your environment by creating a new level file, e.g., apple_dilemma_level.txt.
Of course you can also just use or modify an existing level.
2. Define Entity Locations: Specify the initial locations of entities, including doors (D). Since the apples will likely be spawning randomly, it would not make sense to encode their spawn in the level file.
**Step 3: Introduce New Entities**
1. Create New Entity Modules: Create a new entity module for the apple in the `marl_factory_grid/environment/entity` directory. Use the module template or existing modules as inspiration. Instead of creating a new agent, the item agent can be used as he is already configured to collect all items and drop them off at designated locations.
2. Update Configuration: Update the configuration file to include settings related to your new entities. Agents need to be able to interact and observe them.
**Step 4: Implement Custom Rules**
1. Create Rule Modules: You might want to create new rule modules. For example, apple_respawn_rule.py could be inspired from the dirt respawn rule:
>>> from marl_factory_grid.environment.rules.rule import Rule
class AppleRespawnRule(Rule):
def __init__(self, apple_spawn_rate=0.1):
super().__init__()
self.apple_spawn_rate = apple_spawn_rate
def tick_post_step(self, state):
# Logic to respawn apples based on spawn rate
pass
2. Update Configuration: Update the configuration file to include the new rule.
**Step 5: Add Custom Assets (Optional)**
1. Include Custom Asset Files: If your scenario introduces new assets (e.g., images for entities), include the necessary files in the appropriate directories, such as `marl_factory_grid/environment/assets`.
**Step 6: Test and Experiment**

View File

@ -1,23 +0,0 @@
.. toctree::
:maxdepth: 1
:caption: Table of Contents
:titlesonly:
installation
usage
modifications
creating a new scenario
testing
source
.. note::
This project is under active development.
.. mdinclude:: ../../README.md
Indices and tables
------------------
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -1,22 +0,0 @@
Installation
============
How to install the environment
------------------------------
To use `marl-factory-grid`, first install it using pip:
.. code-block:: console
(.venv) $ pip install marl-factory-grid
Indices and tables
------------------
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -1,92 +0,0 @@
Custom Modifications
====================
This section covers main aspects of working with the environment.
Modifying levels
----------------
Varying levels are created by defining Walls, Floor or Doors in *.txt*-files (see `levels`_ for examples).
Define which *level* to use in your *config file* as:
.. _levels: marl_factory_grid/levels
>>> General:
level_name: rooms # 'simple', 'narrow_corridor', 'eight_puzzle',...
... or create your own. Maybe with the help of `asciiflow.com <https://asciiflow.com/#/>`_.
Make sure to use `#` as `Walls`_ , `-` as free (walkable) floor and `D` for `Doors`_.
Other Entities (define your own) may bring their own `Symbols`.
.. _Walls: marl_factory_grid/environment/entity/wall.py
.. _Doors: modules/doors/entities.py
Modifying Entites
-----------------
Entities are `Objects`_ that can additionally be assigned a position.
Abstract Entities are provided.
If you wish to introduce new entities to the environment just create a new module that implements the entity class. If
necessary, provide additional classe such as custom actions or rewards and load the entity into the environment using
the config file.
.. _Objects: marl_factory_grid/environment/entity/object.py
Modifying Groups
----------------
`Groups`_ are entity Sets that provide administrative access to all group members.
All `Entity Collections`_ are available at runtime as a property of the env state.
If you add an entity, you probably also want a collection of that entity.
.. _Groups: marl_factory_grid/environment/groups/objects.py
.. _Entity Collections: marl_factory_grid/environment/entity/global_entities.py
Modifying Rules
---------------
`Rules <https://marl-factory-grid.readthedocs.io/en/latest/code/marl_factory_grid.environment.rules.html>`_ define how
the environment behaves on micro scale. Each of the hooks (`on_init`, `pre_step`, `on_step`, '`post_step`', `on_done`)
provide env-access to implement custom logic, calculate rewards, or gather information.
If you wish to introduce new rules to the environment make sure it implements the Rule class and override its' hooks
to implement your own rule logic.
.. image:: ../../images/Hooks_FIKS.png
:alt: Hooks Image
Modifying Constants and Rewards
-------------------------------
Customizing rewards and constants allows you to tailor the environment to specific requirements.
You can set custom rewards in the configuration file. If no specific rewards are defined, the environment
will utilize default rewards, which are provided in the constants file of each module.
In addition to rewards, you can also customize other constants used in the environment's rules or actions. Each module has
its dedicated constants file, while global constants are centrally located in the environment's constants file.
Be careful when making changes to constants, as they can radically impact the behavior of the environment. Only modify
constants if you have a solid understanding of their implications and are confident in the adjustments you're making.
Modifying Results
-----------------
`Results <https://marl-factory-grid.readthedocs.io/en/latest/code/marl_factory_grid.utils.results.html>`_
provide a way to return `rule` evaluations such as rewards and state reports back to the environment.
Modifying Assets
----------------
Make sure to bring your own assets for each Entity living in the Gridworld as the `Renderer` relies on it.
PNG-files (transparent background) of square aspect-ratio should do the job, in general.
.. image:: ../../marl_factory_grid/environment/assets/wall.png
:alt: Wall Image
.. image:: ../../marl_factory_grid/environment/assets/agent/agent.png
:alt: Agent Image
Indices and tables
------------------
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -1,17 +0,0 @@
Source
======
.. toctree::
:maxdepth: 2
:glob:
:caption: Table of Contents
:titlesonly:
source/*
Indices and tables
------------------
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -1,40 +0,0 @@
marl\_factory\_grid.environment.entity package
==============================================
.. automodule:: marl_factory_grid.environment.entity
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. automodule:: marl_factory_grid.environment.entity.agent
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.entity.entity
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.entity.object
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.entity.util
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.entity.wall
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,52 +0,0 @@
marl\_factory\_grid.environment.groups package
==============================================
.. automodule:: marl_factory_grid.environment.groups
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. automodule:: marl_factory_grid.environment.groups.agents
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.groups.collection
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.groups.global_entities
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.groups.mixins
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.groups.objects
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.groups.utils
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.groups.walls
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,49 +0,0 @@
marl\_factory\_grid.environment package
=======================================
.. automodule:: marl_factory_grid.environment
:members:
:undoc-members:
:show-inheritance:
Subpackages
-----------
.. toctree::
:maxdepth: 4
marl_factory_grid.environment.entity
marl_factory_grid.environment.groups
Submodules
----------
.. automodule:: marl_factory_grid.environment.actions
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.constants
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.factory
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.rewards
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.environment.rules
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,7 +0,0 @@
marl\_factory\_grid.levels package
==================================
.. automodule:: marl_factory_grid.levels
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,40 +0,0 @@
marl\_factory\_grid.modules.batteries package
=============================================
.. automodule:: marl_factory_grid.modules.batteries
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. automodule:: marl_factory_grid.modules.batteries.actions
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.batteries.constants
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.batteries.entitites
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.batteries.groups
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.batteries.rules
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,40 +0,0 @@
marl\_factory\_grid.modules.clean\_up package
=============================================
.. automodule:: marl_factory_grid.modules.clean_up
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. automodule:: marl_factory_grid.modules.clean_up.actions
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.clean_up.constants
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.clean_up.entitites
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.clean_up.groups
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.clean_up.rules
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,40 +0,0 @@
marl\_factory\_grid.modules.destinations package
================================================
.. automodule:: marl_factory_grid.modules.destinations
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. automodule:: marl_factory_grid.modules.destinations.actions
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.destinations.constants
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.destinations.entitites
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.destinations.groups
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.destinations.rules
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,40 +0,0 @@
marl\_factory\_grid.modules.doors package
=========================================
.. automodule:: marl_factory_grid.modules.doors
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. automodule:: marl_factory_grid.modules.doors.actions
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.doors.constants
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.doors.entitites
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.doors.groups
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.doors.rules
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,40 +0,0 @@
marl\_factory\_grid.modules.items package
=========================================
.. automodule:: marl_factory_grid.modules.items
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. automodule:: marl_factory_grid.modules.items.actions
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.items.constants
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.items.entitites
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.items.groups
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.items.rules
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,40 +0,0 @@
marl\_factory\_grid.modules.machines package
============================================
.. automodule:: marl_factory_grid.modules.machines
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. automodule:: marl_factory_grid.modules.machines.actions
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.machines.constants
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.machines.entitites
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.machines.groups
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.machines.rules
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,34 +0,0 @@
marl\_factory\_grid.modules.maintenance package
===============================================
.. automodule:: marl_factory_grid.modules.maintenance
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. automodule:: marl_factory_grid.modules.maintenance.constants
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.maintenance.entities
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.maintenance.groups
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.maintenance.rules
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,22 +0,0 @@
marl\_factory\_grid.modules package
===================================
.. automodule:: marl_factory_grid.modules
:members:
:undoc-members:
:show-inheritance:
Subpackages
-----------
.. toctree::
:maxdepth: 4
marl_factory_grid.modules.batteries
marl_factory_grid.modules.clean_up
marl_factory_grid.modules.destinations
marl_factory_grid.modules.doors
marl_factory_grid.modules.items
marl_factory_grid.modules.machines
marl_factory_grid.modules.maintenance
marl_factory_grid.modules.zones

View File

@ -1,34 +0,0 @@
marl\_factory\_grid.modules.zones package
=========================================
.. automodule:: marl_factory_grid.modules.zones
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. automodule:: marl_factory_grid.modules.zones.constants
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.zones.entitites
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.zones.groups
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.modules.zones.rules
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,28 +0,0 @@
marl\_factory\_grid package
===========================
.. automodule:: marl_factory_grid
:members:
:undoc-members:
:show-inheritance:
Subpackages
-----------
.. toctree::
:maxdepth: 4
marl_factory_grid.algorithms
marl_factory_grid.environment
marl_factory_grid.levels
marl_factory_grid.modules
marl_factory_grid.utils
Submodules
----------
.. automodule:: marl_factory_grid.quickstart
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,22 +0,0 @@
marl\_factory\_grid.utils.logging package
=========================================
.. automodule:: marl_factory_grid.utils.logging
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. automodule:: marl_factory_grid.utils.logging.envmonitor
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.utils.logging.recorder
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,28 +0,0 @@
marl\_factory\_grid.utils.plotting package
==========================================
.. automodule:: marl_factory_grid.utils.plotting
:members:
:undoc-members:
:show-inheritance:
Submodules
----------
.. automodule:: marl_factory_grid.utils.plotting.plot_compare_runs
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.utils.plotting.plot_single_runs
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.utils.plotting.plotting_utils
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,79 +0,0 @@
marl\_factory\_grid.utils package
=================================
.. automodule:: marl_factory_grid.utils
:members:
:undoc-members:
:show-inheritance:
Subpackages
-----------
.. toctree::
:maxdepth: 4
marl_factory_grid.utils.logging
marl_factory_grid.utils.plotting
Submodules
----------
.. automodule:: marl_factory_grid.utils.config_parser
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.utils.helpers
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.utils.level_parser
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.utils.observation_builder
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.utils.ray_caster
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.utils.renderer
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.utils.results
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.utils.states
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.utils.tools
:members:
:undoc-members:
:show-inheritance:
.. automodule:: marl_factory_grid.utils.utility_classes
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,15 +0,0 @@
Testing
=======
In EDYS, tests are seamlessly integrated through environment hooks, mirroring the organization of rules, as explained in the README.md file.
Running tests
-------------
To include specific tests in your run, simply append them to the "tests" section within the configuration file.
If the test requires a specific entity in the environment (i.e the clean up test requires a TSPDirtAgent that can observe
and clean dirt in its environment), make sure to include it in the config file.
Writing tests
------------
If you intend to create additional tests, refer to the tests.py file for examples.
Ensure that any new tests implement the corresponding test class and make use of its hooks.
There are no additional steps required, except for the inclusion of your custom tests in the config file.

View File

@ -1,75 +0,0 @@
Basic Usage
===========
Environment objects, including agents, entities and rules, that are specified in a *yaml*-configfile will be loaded automatically.
Using ``quickstart_use`` creates a default config-file and another one that lists all possible options of the environment.
Also, it generates an initial script where an agent is executed in the environment specified by the config-file.
After initializing the environment using the specified configuration file, the script enters a reinforcement learning loop.
The loop consists of episodes, where each episode involves resetting the environment, executing actions, and receiving feedback.
Here's a breakdown of the key components in the provided script. Feel free to customize it based on your specific requirements:
1. **Initialization:**
>>> path = Path('marl_factory_grid/configs/default_config.yaml')
factory = Factory(path)
factory = EnvMonitor(factory)
factory = EnvRecorder(factory)
- The `path` variable points to the location of your configuration file. Ensure it corresponds to the correct path.
- `Factory` initializes the environment based on the provided configuration.
- `EnvMonitor` and `EnvRecorder` are optional components. They add monitoring and recording functionalities to the environment, respectively.
2. **Reinforcement Learning Loop:**
>>> for episode in trange(10):
_ = factory.reset()
done = False
if render:
factory.render()
action_spaces = factory.action_space
agents = []
- The loop iterates over a specified number of episodes (in this case, 10).
- `factory.reset()` resets the environment for a new episode.
- `factory.render()` is used for visualization if rendering is enabled.
- `action_spaces` stores the action spaces available for the agents.
- `agents` will store agent-specific information during the episode.
3. **Taking Actions:**
>>> while not done:
a = [randint(0, x.n - 1) for x in action_spaces]
obs_type, _, reward, done, info = factory.step(a)
if render:
factory.render()
- Within each episode, the loop continues until the environment signals completion (`done`).
- `a` represents a list of random actions for each agent based on their action space.
- `factory.step(a)` executes the actions, returning observation types, rewards, completion status, and additional information.
4. **Handling Episode Completion:**
>>> if done:
print(f'Episode {episode} done...')
- After each episode, a message is printed indicating its completion.
Evaluating the run
------------------
If monitoring and recording are enabled, the environment states will be traced and recorded automatically.
The EnvMonitor class acts as a wrapper for Gym environments, monitoring and logging key information during interactions,
while the EnvRecorder class records state summaries during interactions in the environment.
At the end of each run a plot displaying the step reward is generated. The step reward represents the cumulative sum of rewards obtained by all agents throughout the episode.
Furthermore a comparative plot that shows the achieved score (step reward) over several runs with different seeds or different parameter settings can be generated using the methods provided in plotting/plot_compare_runs.py.
For a more comprehensive evaluation, we recommend using the `Weights and Biases (W&B) <https://wandb.ai/site>`_ framework, with the dataframes generated by the monitor and recorder. These can be found in the run path specified in your script. W&B provides a powerful API for logging and visualizing model training metrics, enabling analysis using predefined or also custom metrics.
Indices and tables
------------------
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

Binary file not shown.

Before

Width:  |  Height:  |  Size: 296 KiB

View File

@ -1,4 +1,3 @@
from .quickstart import init
from marl_factory_grid.environment.factory import Factory
"""
Main module of the 'marl-factory-grid'-environment.

View File

@ -1 +1 @@
from marl_factory_grid.algorithms.marl.memory import MARLActorCriticMemory

View File

@ -11,7 +11,6 @@ import numpy as np
from torch.distributions import Categorical
from marl_factory_grid.algorithms.marl.base_a2c import PolicyGradient, cumulate_discount
from marl_factory_grid.algorithms.marl.memory import MARLActorCriticMemory
from marl_factory_grid.algorithms.utils import add_env_props, instantiate_class
from pathlib import Path
from collections import deque

View File

@ -2,8 +2,6 @@ import numpy as np; import torch as th; import scipy as sp;
from collections import deque
from torch import nn
# RLLab Magic for calculating the discounted return G(t) = R(t) + gamma * R(t-1)
# cf. https://github.com/rll/rllab/blob/ba78e4c16dc492982e648f117875b22af3965579/rllab/misc/special.py#L107
cumulate_discount = lambda x, gamma: sp.signal.lfilter([1], [1, - gamma], x[::-1], axis=0)[::-1]
class Net(th.nn.Module):

View File

@ -1,242 +0,0 @@
import torch
from typing import Union, List, Dict
import numpy as np
from torch.distributions import Categorical
from marl_factory_grid.algorithms.marl.memory import MARLActorCriticMemory
from marl_factory_grid.algorithms.utils import add_env_props, instantiate_class
from pathlib import Path
import pandas as pd
from collections import deque
class Names:
REWARD = 'reward'
DONE = 'done'
ACTION = 'action'
OBSERVATION = 'observation'
LOGITS = 'logits'
HIDDEN_ACTOR = 'hidden_actor'
HIDDEN_CRITIC = 'hidden_critic'
AGENT = 'agent'
ENV = 'env'
ENV_NAME = 'env_name'
N_AGENTS = 'n_agents'
ALGORITHM = 'algorithm'
MAX_STEPS = 'max_steps'
N_STEPS = 'n_steps'
BUFFER_SIZE = 'buffer_size'
CRITIC = 'critic'
BATCH_SIZE = 'bnatch_size'
N_ACTIONS = 'n_actions'
TRAIN_RENDER = 'train_render'
EVAL_RENDER = 'eval_render'
nms = Names
ListOrTensor = Union[List, torch.Tensor]
class BaseActorCritic:
def __init__(self, cfg):
self.factory = add_env_props(cfg)
self.__training = True
self.cfg = cfg
self.n_agents = cfg[nms.AGENT][nms.N_AGENTS]
self.reset_memory_after_epoch = True
self.setup()
def setup(self):
self.net = instantiate_class(self.cfg[nms.AGENT])
self.optimizer = torch.optim.RMSprop(self.net.parameters(), lr=3e-4, eps=1e-5)
@classmethod
def _as_torch(cls, x):
if isinstance(x, np.ndarray):
return torch.from_numpy(x)
elif isinstance(x, List):
return torch.tensor(x)
elif isinstance(x, (int, float)):
return torch.tensor([x])
return x
def train(self):
self.__training = False
networks = [self.net] if not isinstance(self.net, List) else self.net
for net in networks:
net.train()
def eval(self):
self.__training = False
networks = [self.net] if not isinstance(self.net, List) else self.net
for net in networks:
net.eval()
def load_state_dict(self, path: Path):
pass
def get_actions(self, out) -> ListOrTensor:
actions = [Categorical(logits=logits).sample().item() for logits in out[nms.LOGITS]]
return actions
def init_hidden(self) -> Dict[str, ListOrTensor]:
pass
def forward(self,
observations: ListOrTensor,
actions: ListOrTensor,
hidden_actor: ListOrTensor,
hidden_critic: ListOrTensor
) -> Dict[str, ListOrTensor]:
pass
@torch.no_grad()
def train_loop(self, checkpointer=None):
env = self.factory
if self.cfg[nms.ENV][nms.TRAIN_RENDER]:
env.render()
n_steps, max_steps = [self.cfg[nms.ALGORITHM][k] for k in [nms.N_STEPS, nms.MAX_STEPS]]
tm = MARLActorCriticMemory(self.n_agents, self.cfg[nms.ALGORITHM].get(nms.BUFFER_SIZE, n_steps))
global_steps, episode, df_results = 0, 0, []
reward_queue = deque(maxlen=2000)
while global_steps < max_steps:
obs = env.reset()
obs = list(obs.values())
last_hiddens = self.init_hidden()
last_action, reward = [-1] * self.n_agents, [0.] * self.n_agents
done, rew_log = [False] * self.n_agents, 0
if self.reset_memory_after_epoch:
tm.reset()
tm.add(observation=obs, action=last_action,
logits=torch.zeros(self.n_agents, 1, self.cfg[nms.AGENT][nms.N_ACTIONS]),
values=torch.zeros(self.n_agents, 1), reward=reward, done=done, **last_hiddens)
while not all(done):
out = self.forward(obs, last_action, **last_hiddens)
action = self.get_actions(out)
_, next_obs, reward, done, info = env.step(action)
done = [done] * self.n_agents if isinstance(done, bool) else done
if self.cfg[nms.ENV][nms.TRAIN_RENDER]:
env.render()
last_hiddens = dict(hidden_actor=out[nms.HIDDEN_ACTOR],
hidden_critic=out[nms.HIDDEN_CRITIC])
logits = torch.stack([tensor.squeeze(0) for tensor in out.get(nms.LOGITS, None)], dim=0)
values = torch.stack([tensor.squeeze(0) for tensor in out.get(nms.CRITIC, None)], dim=0)
tm.add(observation=obs, action=action, reward=reward, done=done,
logits=logits, values=values,
**last_hiddens)
obs = next_obs
last_action = action
if (global_steps+1) % n_steps == 0 or all(done):
with torch.inference_mode(False):
self.learn(tm)
global_steps += 1
rew_log += sum(reward)
reward_queue.extend(reward)
if checkpointer is not None:
checkpointer.step([
(f'agent#{i}', agent)
for i, agent in enumerate([self.net] if not isinstance(self.net, List) else self.net)
])
if global_steps >= max_steps:
break
if global_steps%100 == 0:
print(f'reward at episode: {episode} = {rew_log}')
episode += 1
df_results.append([episode, rew_log, *reward])
df_results = pd.DataFrame(df_results,
columns=['steps', 'reward', *[f'agent#{i}' for i in range(self.n_agents)]]
)
if checkpointer is not None:
df_results.to_csv(checkpointer.path / 'results.csv', index=False)
return df_results
@torch.inference_mode(True)
def eval_loop(self, n_episodes, render=False):
env = self.factory
if self.cfg[nms.ENV][nms.EVAL_RENDER]:
env.render()
episode, results = 0, []
while episode < n_episodes:
obs = env.reset()
obs = list(obs.values())
last_hiddens = self.init_hidden()
last_action, reward = [-1] * self.n_agents, [0.] * self.n_agents
done, rew_log, eps_rew = [False] * self.n_agents, 0, torch.zeros(self.n_agents)
while not all(done):
out = self.forward(obs, last_action, **last_hiddens)
action = self.get_actions(out)
_, next_obs, reward, done, info = env.step(action)
if self.cfg[nms.ENV][nms.EVAL_RENDER]:
env.render()
if isinstance(done, bool):
done = [done] * obs[0].shape[0]
obs = next_obs
last_action = action
last_hiddens = dict(hidden_actor=out.get(nms.HIDDEN_ACTOR, None),
hidden_critic=out.get(nms.HIDDEN_CRITIC, None)
)
eps_rew += torch.tensor(reward)
results.append(eps_rew.tolist() + [sum(eps_rew).item()] + [episode])
episode += 1
agent_columns = [f'agent#{i}' for i in range(self.cfg[nms.ENV][nms.N_AGENTS])]
results = pd.DataFrame(results, columns=agent_columns + ['sum', 'episode'])
results = pd.melt(results, id_vars=['episode'], value_vars=agent_columns + ['sum'],
value_name='reward', var_name='agent')
return results
@staticmethod
def compute_advantages(critic, reward, done, gamma, gae_coef=0.0):
tds = (reward + gamma * (1.0 - done) * critic[:, 1:].detach()) - critic[:, :-1]
if gae_coef <= 0:
return tds
gae = torch.zeros_like(tds[:, -1])
gaes = []
for t in range(tds.shape[1]-1, -1, -1):
gae = tds[:, t] + gamma * gae_coef * (1.0 - done[:, t]) * gae
gaes.insert(0, gae)
gaes = torch.stack(gaes, dim=1)
return gaes
def actor_critic(self, tm, network, gamma, entropy_coef, vf_coef, gae_coef=0.0, **kwargs):
obs, actions, done, reward = tm.observation, tm.action, tm.done[:, 1:], tm.reward[:, 1:]
out = network(obs, actions, tm.hidden_actor[:, 0].squeeze(0), tm.hidden_critic[:, 0].squeeze(0))
logits = out[nms.LOGITS][:, :-1] # last one only needed for v_{t+1}
critic = out[nms.CRITIC]
entropy_loss = Categorical(logits=logits).entropy().mean(-1)
advantages = self.compute_advantages(critic, reward, done, gamma, gae_coef)
value_loss = advantages.pow(2).mean(-1) # n_agent
# policy loss
log_ap = torch.log_softmax(logits, -1)
log_ap = torch.gather(log_ap, dim=-1, index=actions[:, 1:].unsqueeze(-1)).squeeze()
a2c_loss = -(advantages.detach() * log_ap).mean(-1)
# weighted loss
loss = a2c_loss + vf_coef*value_loss - entropy_coef * entropy_loss
return loss.mean()
def learn(self, tm: MARLActorCriticMemory, **kwargs):
loss = self.actor_critic(tm, self.net, **self.cfg[nms.ALGORITHM], **kwargs)
# remove next_obs, will be added in next iter
self.optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(self.net.parameters(), 0.5)
self.optimizer.step()

View File

@ -1,8 +0,0 @@
marl_factory_grid>environment>rules.py#SpawnEntity.on_reset()
marl_factory_grid>environment>rewards.py
marl_factory_grid>modules>clean_up>groups.py#DirtPiles.trigger_spawn()
marl_factory_grid>environment>rules.py#AgentSpawnRule
marl_factory_grid>utils>states.py#GameState.__init__()
marl_factory_grid>environment>factory.py>Factory#render
marl_factory_grid>environment>factory.py>Factory#set_recorder
marl_factory_grid>utils>renderer.py>Renderer#render

View File

@ -1,57 +0,0 @@
import torch
from marl_factory_grid.algorithms.marl.base_ac import BaseActorCritic, nms
from marl_factory_grid.algorithms.utils import instantiate_class
from pathlib import Path
from natsort import natsorted
from marl_factory_grid.algorithms.marl.memory import MARLActorCriticMemory
class LoopIAC(BaseActorCritic):
def __init__(self, cfg):
super(LoopIAC, self).__init__(cfg)
def setup(self):
self.net = [
instantiate_class(self.cfg[nms.AGENT]) for _ in range(self.n_agents)
]
self.optimizer = [
torch.optim.RMSprop(self.net[ag_i].parameters(), lr=3e-4, eps=1e-5) for ag_i in range(self.n_agents)
]
def load_state_dict(self, path: Path):
paths = natsorted(list(path.glob('*.pt')))
for path, net in zip(paths, self.net):
net.load_state_dict(torch.load(path))
@staticmethod
def merge_dicts(ds): # todo could be recursive for more than 1 hierarchy
d = {}
for k in ds[0].keys():
d[k] = [d[k] for d in ds]
return d
def init_hidden(self):
ha = [net.init_hidden_actor() for net in self.net]
hc = [net.init_hidden_critic() for net in self.net]
return dict(hidden_actor=ha, hidden_critic=hc)
def forward(self, observations, actions, hidden_actor, hidden_critic):
outputs = [
net(
self._as_torch(observations[ag_i]).unsqueeze(0).unsqueeze(0), # agent x time
self._as_torch(actions[ag_i]).unsqueeze(0),
hidden_actor[ag_i],
hidden_critic[ag_i]
) for ag_i, net in enumerate(self.net)
]
return self.merge_dicts(outputs)
def learn(self, tms: MARLActorCriticMemory, **kwargs):
for ag_i in range(self.n_agents):
tm, net = tms(ag_i), self.net[ag_i]
loss = self.actor_critic(tm, net, **self.cfg[nms.ALGORITHM], **kwargs)
self.optimizer[ag_i].zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(net.parameters(), 0.5)
self.optimizer[ag_i].step()

View File

@ -1,66 +0,0 @@
from marl_factory_grid.algorithms.marl.base_ac import Names as nms
from marl_factory_grid.algorithms.marl.snac import LoopSNAC
from marl_factory_grid.algorithms.marl.memory import MARLActorCriticMemory
import torch
from torch.distributions import Categorical
from marl_factory_grid.algorithms.utils import instantiate_class
class LoopMAPPO(LoopSNAC):
def __init__(self, *args, **kwargs):
super(LoopMAPPO, self).__init__(*args, **kwargs)
self.reset_memory_after_epoch = False
def setup(self):
self.net = instantiate_class(self.cfg[nms.AGENT])
self.optimizer = torch.optim.Adam(self.net.parameters(), lr=3e-4, eps=1e-5)
def learn(self, tm: MARLActorCriticMemory, **kwargs):
if len(tm) >= self.cfg['algorithm']['buffer_size']:
# only learn when buffer is full
for batch_i in range(self.cfg['algorithm']['n_updates']):
batch = tm.chunk_dataloader(chunk_len=self.cfg['algorithm']['n_steps'],
k=self.cfg['algorithm']['batch_size'])
loss = self.mappo(batch, self.net, **self.cfg[nms.ALGORITHM], **kwargs)
self.optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(self.net.parameters(), 0.5)
self.optimizer.step()
def monte_carlo_returns(self, rewards, done, gamma):
rewards_ = []
discounted_reward = torch.zeros_like(rewards[:, -1])
for t in range(rewards.shape[1]-1, -1, -1):
discounted_reward = rewards[:, t] + (gamma * (1.0 - done[:, t]) * discounted_reward)
rewards_.insert(0, discounted_reward)
rewards_ = torch.stack(rewards_, dim=1)
return rewards_
def mappo(self, batch, network, gamma, entropy_coef, vf_coef, clip_range, **__):
out = network(batch[nms.OBSERVATION], batch[nms.ACTION], batch[nms.HIDDEN_ACTOR], batch[nms.HIDDEN_CRITIC])
logits = out[nms.LOGITS][:, :-1] # last one only needed for v_{t+1}
old_log_probs = torch.log_softmax(batch[nms.LOGITS], -1)
old_log_probs = torch.gather(old_log_probs, index=batch[nms.ACTION][:, 1:].unsqueeze(-1), dim=-1).squeeze()
# monte carlo returns
mc_returns = self.monte_carlo_returns(batch[nms.REWARD], batch[nms.DONE], gamma)
mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) # todo: norm across agent ok?
advantages = mc_returns - out[nms.CRITIC][:, :-1]
# policy loss
log_ap = torch.log_softmax(logits, -1)
log_ap = torch.gather(log_ap, dim=-1, index=batch[nms.ACTION][:, 1:].unsqueeze(-1)).squeeze()
ratio = (log_ap - old_log_probs).exp()
surr1 = ratio * advantages.detach()
surr2 = torch.clamp(ratio, 1 - clip_range, 1 + clip_range) * advantages.detach()
policy_loss = -torch.min(surr1, surr2).mean(-1)
# entropy & value loss
entropy_loss = Categorical(logits=logits).entropy().mean(-1)
value_loss = advantages.pow(2).mean(-1) # n_agent
# weighted loss
loss = policy_loss + vf_coef*value_loss - entropy_coef * entropy_loss
return loss.mean()

View File

@ -1,221 +0,0 @@
import numpy as np
from collections import deque
import torch
from typing import Union
from torch import Tensor
from torch.utils.data import Dataset, ConcatDataset
import random
class ActorCriticMemory(object):
def __init__(self, capacity=10):
self.capacity = capacity
self.reset()
def reset(self):
self.__actions = LazyTensorFiFoQueue(maxlen=self.capacity+1)
self.__hidden_actor = LazyTensorFiFoQueue(maxlen=self.capacity+1)
self.__hidden_critic = LazyTensorFiFoQueue(maxlen=self.capacity+1)
self.__states = LazyTensorFiFoQueue(maxlen=self.capacity+1)
self.__rewards = LazyTensorFiFoQueue(maxlen=self.capacity+1)
self.__dones = LazyTensorFiFoQueue(maxlen=self.capacity+1)
self.__logits = LazyTensorFiFoQueue(maxlen=self.capacity+1)
self.__values = LazyTensorFiFoQueue(maxlen=self.capacity+1)
def __len__(self):
return len(self.__rewards) - 1
@property
def observation(self, sls=slice(0, None)): # add time dimension through stacking
return self.__states[sls].unsqueeze(0) # 1 x time x hidden dim
@property
def hidden_actor(self, sls=slice(0, None)): # 1 x n_layers x dim
return self.__hidden_actor[sls].unsqueeze(0) # 1 x time x n_layers x dim
@property
def hidden_critic(self, sls=slice(0, None)): # 1 x n_layers x dim
return self.__hidden_critic[sls].unsqueeze(0) # 1 x time x n_layers x dim
@property
def reward(self, sls=slice(0, None)):
return self.__rewards[sls].squeeze().unsqueeze(0) # 1 x time
@property
def action(self, sls=slice(0, None)):
return self.__actions[sls].long().squeeze().unsqueeze(0) # 1 x time
@property
def done(self, sls=slice(0, None)):
return self.__dones[sls].float().squeeze().unsqueeze(0) # 1 x time
@property
def logits(self, sls=slice(0, None)): # assumes a trailing 1 for time dimension - common when using output from NN
return self.__logits[sls].squeeze().unsqueeze(0) # 1 x time x actions
@property
def values(self, sls=slice(0, None)):
return self.__values[sls].squeeze().unsqueeze(0) # 1 x time x actions
def add_observation(self, state: Union[Tensor, np.ndarray]):
self.__states.append(state if isinstance(state, Tensor) else torch.from_numpy(state))
def add_hidden_actor(self, hidden: Tensor):
# layers x hidden dim
self.__hidden_actor.append(hidden)
def add_hidden_critic(self, hidden: Tensor):
# layers x hidden dim
self.__hidden_critic.append(hidden)
def add_action(self, action: Union[int, Tensor]):
if not isinstance(action, Tensor):
action = torch.tensor(action)
self.__actions.append(action)
def add_reward(self, reward: Union[float, Tensor]):
if not isinstance(reward, Tensor):
reward = torch.tensor(reward)
self.__rewards.append(reward)
def add_done(self, done: bool):
if not isinstance(done, Tensor):
done = torch.tensor(done)
self.__dones.append(done)
def add_logits(self, logits: Tensor):
self.__logits.append(logits)
def add_values(self, values: Tensor):
self.__values.append(values)
def add(self, **kwargs):
for k, v in kwargs.items():
func = getattr(ActorCriticMemory, f'add_{k}')
func(self, v)
class MARLActorCriticMemory(object):
def __init__(self, n_agents, capacity):
self.n_agents = n_agents
self.memories = [
ActorCriticMemory(capacity) for _ in range(n_agents)
]
def __call__(self, agent_i):
return self.memories[agent_i]
def __len__(self):
return len(self.memories[0]) # todo add assertion check!
def reset(self):
for mem in self.memories:
mem.reset()
def add(self, **kwargs):
for agent_i in range(self.n_agents):
for k, v in kwargs.items():
func = getattr(ActorCriticMemory, f'add_{k}')
func(self.memories[agent_i], v[agent_i])
def __getattr__(self, attr):
all_attrs = [getattr(mem, attr) for mem in self.memories]
return torch.cat(all_attrs, 0) # agent x time ...
def chunk_dataloader(self, chunk_len, k):
datasets = [ExperienceChunks(mem, chunk_len, k) for mem in self.memories]
dataset = ConcatDataset(datasets)
data = [dataset[i] for i in range(len(dataset))]
data = custom_collate_fn(data)
return data
def custom_collate_fn(batch):
elem = batch[0]
return {key: torch.cat([d[key] for d in batch], dim=0) for key in elem}
class ExperienceChunks(Dataset):
def __init__(self, memory, chunk_len, k):
assert chunk_len <= len(memory), 'chunk_len cannot be longer than the size of the memory'
self.memory = memory
self.chunk_len = chunk_len
self.k = k
@property
def whitelist(self):
whitelist = torch.ones(len(self.memory) - self.chunk_len)
for d in self.memory.done.squeeze().nonzero().flatten():
whitelist[max((0, d-self.chunk_len-1)):d+2] = 0
whitelist[0] = 0
return whitelist.tolist()
def sample(self, start=1):
cl = self.chunk_len
sample = dict(observation=self.memory.observation[:, start:start+cl+1],
action=self.memory.action[:, start-1:start+cl],
hidden_actor=self.memory.hidden_actor[:, start-1],
hidden_critic=self.memory.hidden_critic[:, start-1],
reward=self.memory.reward[:, start:start + cl],
done=self.memory.done[:, start:start + cl],
logits=self.memory.logits[:, start:start + cl],
values=self.memory.values[:, start:start + cl])
return sample
def __len__(self):
return self.k
def __getitem__(self, i):
idx = random.choices(range(0, len(self.memory) - self.chunk_len), weights=self.whitelist, k=1)
return self.sample(idx[0])
class LazyTensorFiFoQueue:
def __init__(self, maxlen):
self.maxlen = maxlen
self.reset()
def reset(self):
self.__lazy_queue = deque(maxlen=self.maxlen)
self.shape = None
self.queue = None
def shape_init(self, tensor: Tensor):
self.shape = torch.Size([self.maxlen, *tensor.shape])
def build_tensor_queue(self):
if len(self.__lazy_queue) > 0:
block = torch.stack(list(self.__lazy_queue), dim=0)
l = block.shape[0]
if self.queue is None:
self.queue = block
elif self.true_len() <= self.maxlen:
self.queue = torch.cat((self.queue, block), dim=0)
else:
self.queue = torch.cat((self.queue[l:], block), dim=0)
self.__lazy_queue.clear()
def append(self, data):
if self.shape is None:
self.shape_init(data)
self.__lazy_queue.append(data)
if len(self.__lazy_queue) >= self.maxlen:
self.build_tensor_queue()
def true_len(self):
return len(self.__lazy_queue) + (0 if self.queue is None else self.queue.shape[0])
def __len__(self):
return min((self.true_len(), self.maxlen))
def __str__(self):
return f'LazyTensorFiFoQueue\tmaxlen: {self.maxlen}, shape: {self.shape}, ' \
f'len: {len(self)}, true_len: {self.true_len()}, elements in lazy queue: {len(self.__lazy_queue)}'
def __getitem__(self, item_or_slice):
self.build_tensor_queue()
return self.queue[item_or_slice]

View File

@ -7,8 +7,8 @@ agent:
hidden_size_critic: 64
use_agent_embedding: False
env:
classname: marl_factory_grid.configs.custom
env_name: "custom/MultiAgentConfigs/dirt_quadrant_train_config"
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/dirt_quadrant_eval_config"
n_agents: 2
max_steps: 250
pomdp_r: 2

View File

@ -7,8 +7,8 @@ agent:
hidden_size_critic: 64
use_agent_embedding: False
env:
classname: marl_factory_grid.configs.custom
env_name: "custom/two_rooms_one_door_modified_train_config"
classname: marl_factory_grid.environment.configs.marl_eval
env_name: "marl_eval/two_rooms_eval_config"
n_agents: 2
max_steps: 250
pomdp_r: 2

View File

@ -1,103 +0,0 @@
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
class RecurrentAC(nn.Module):
def __init__(self, observation_size, n_actions, obs_emb_size,
action_emb_size, hidden_size_actor, hidden_size_critic,
n_agents, use_agent_embedding=True):
super(RecurrentAC, self).__init__()
observation_size = np.prod(observation_size)
self.n_layers = 1
self.n_actions = n_actions
self.use_agent_embedding = use_agent_embedding
self.hidden_size_actor = hidden_size_actor
self.hidden_size_critic = hidden_size_critic
self.action_emb_size = action_emb_size
self.obs_proj = nn.Linear(observation_size, obs_emb_size)
self.action_emb = nn.Embedding(n_actions+1, action_emb_size, padding_idx=0)
self.agent_emb = nn.Embedding(n_agents, action_emb_size)
mix_in_size = obs_emb_size+action_emb_size if not use_agent_embedding else obs_emb_size+n_agents*action_emb_size
self.mix = nn.Sequential(nn.Tanh(),
nn.Linear(mix_in_size, obs_emb_size),
nn.Tanh(),
nn.Linear(obs_emb_size, obs_emb_size)
)
self.gru_actor = nn.GRU(obs_emb_size, hidden_size_actor, batch_first=True, num_layers=self.n_layers)
self.gru_critic = nn.GRU(obs_emb_size, hidden_size_critic, batch_first=True, num_layers=self.n_layers)
self.action_head = nn.Sequential(
nn.Linear(hidden_size_actor, hidden_size_actor),
nn.Tanh(),
nn.Linear(hidden_size_actor, n_actions)
)
# spectral_norm(nn.Linear(hidden_size_actor, hidden_size_actor)),
self.critic_head = nn.Sequential(
nn.Linear(hidden_size_critic, hidden_size_critic),
nn.Tanh(),
nn.Linear(hidden_size_critic, 1)
)
#self.action_head[-1].weight.data.uniform_(-3e-3, 3e-3)
#self.action_head[-1].bias.data.uniform_(-3e-3, 3e-3)
def init_hidden_actor(self):
return torch.zeros(1, self.n_layers, self.hidden_size_actor)
def init_hidden_critic(self):
return torch.zeros(1, self.n_layers, self.hidden_size_critic)
def forward(self, observations, actions, hidden_actor=None, hidden_critic=None):
n_agents, t, *_ = observations.shape
obs_emb = self.obs_proj(observations.view(n_agents, t, -1).float())
action_emb = self.action_emb(actions+1) # shift by one due to padding idx
if not self.use_agent_embedding:
x_t = torch.cat((obs_emb, action_emb), -1)
else:
agent_emb = self.agent_emb(
torch.cat([torch.arange(0, n_agents, 1).view(-1, 1)] * t, 1)
)
x_t = torch.cat((obs_emb, agent_emb, action_emb), -1)
mixed_x_t = self.mix(x_t)
output_p, _ = self.gru_actor(input=mixed_x_t, hx=hidden_actor.swapaxes(1, 0))
output_c, _ = self.gru_critic(input=mixed_x_t, hx=hidden_critic.swapaxes(1, 0))
logits = self.action_head(output_p)
critic = self.critic_head(output_c).squeeze(-1)
return dict(logits=logits, critic=critic, hidden_actor=output_p, hidden_critic=output_c)
class RecurrentACL2(RecurrentAC):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.action_head = nn.Sequential(
nn.Linear(self.hidden_size_actor, self.hidden_size_actor),
nn.Tanh(),
NormalizedLinear(self.hidden_size_actor, self.n_actions, trainable_magnitude=True)
)
class NormalizedLinear(nn.Linear):
def __init__(self, in_features: int, out_features: int,
device=None, dtype=None, trainable_magnitude=False):
super(NormalizedLinear, self).__init__(in_features, out_features, False, device, dtype)
self.d_sqrt = in_features**0.5
self.trainable_magnitude = trainable_magnitude
self.scale = nn.Parameter(torch.tensor([1.]), requires_grad=trainable_magnitude)
def forward(self, in_array):
normalized_input = F.normalize(in_array, dim=-1, p=2, eps=1e-5)
normalized_weight = F.normalize(self.weight, dim=-1, p=2, eps=1e-5)
return F.linear(normalized_input, normalized_weight) * self.d_sqrt * self.scale
class L2Norm(nn.Module):
def __init__(self, in_features, trainable_magnitude=False):
super(L2Norm, self).__init__()
self.d_sqrt = in_features**0.5
self.scale = nn.Parameter(torch.tensor([1.]), requires_grad=trainable_magnitude)
def forward(self, x):
return F.normalize(x, dim=-1, p=2, eps=1e-5) * self.d_sqrt * self.scale

View File

@ -1,55 +0,0 @@
import torch
from torch.distributions import Categorical
from marl_factory_grid.algorithms.marl.iac import LoopIAC
from marl_factory_grid.algorithms.marl.base_ac import nms
from marl_factory_grid.algorithms.marl.memory import MARLActorCriticMemory
class LoopSEAC(LoopIAC):
def __init__(self, cfg):
super(LoopSEAC, self).__init__(cfg)
def actor_critic(self, tm, networks, gamma, entropy_coef, vf_coef, gae_coef=0.0, **kwargs):
obs, actions, done, reward = tm.observation, tm.action, tm.done[:, 1:], tm.reward[:, 1:]
outputs = [net(obs, actions, tm.hidden_actor[:, 0], tm.hidden_critic[:, 0]) for net in networks]
with torch.inference_mode(True):
true_action_logp = torch.stack([
torch.log_softmax(out[nms.LOGITS][ag_i, :-1], -1)
.gather(index=actions[ag_i, 1:, None], dim=-1)
for ag_i, out in enumerate(outputs)
], 0).squeeze()
losses = []
for ag_i, out in enumerate(outputs):
logits = out[nms.LOGITS][:, :-1] # last one only needed for v_{t+1}
critic = out[nms.CRITIC]
entropy_loss = Categorical(logits=logits[ag_i]).entropy().mean()
advantages = self.compute_advantages(critic, reward, done, gamma, gae_coef)
# policy loss
log_ap = torch.log_softmax(logits, -1)
log_ap = torch.gather(log_ap, dim=-1, index=actions[:, 1:].unsqueeze(-1)).squeeze()
# importance weights
iw = (log_ap - true_action_logp).exp().detach() # importance_weights
a2c_loss = (-iw*log_ap * advantages.detach()).mean(-1)
value_loss = (iw*advantages.pow(2)).mean(-1) # n_agent
# weighted loss
loss = (a2c_loss + vf_coef*value_loss - entropy_coef * entropy_loss).mean()
losses.append(loss)
return losses
def learn(self, tms: MARLActorCriticMemory, **kwargs):
losses = self.actor_critic(tms, self.net, **self.cfg[nms.ALGORITHM], **kwargs)
for ag_i, loss in enumerate(losses):
self.optimizer[ag_i].zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(self.net[ag_i].parameters(), 0.5)
self.optimizer[ag_i].step()

View File

@ -7,8 +7,8 @@ agent:
hidden_size_critic: 64
use_agent_embedding: False
env:
classname: marl_factory_grid.configs.custom
env_name: "custom/dirt_quadrant_train_config"
classname: marl_factory_grid.environment.configs.rl
env_name: "rl/dirt_quadrant_train_config"
n_agents: 1
max_steps: 250
pomdp_r: 2

View File

@ -7,8 +7,8 @@ agent:
hidden_size_critic: 64
use_agent_embedding: False
env:
classname: marl_factory_grid.configs.custom
env_name: "custom/two_rooms_one_door_modified_train_config"
classname: marl_factory_grid.environment.configs.rl
env_name: "rl/two_rooms_train_config"
n_agents: 1
max_steps: 250
pomdp_r: 2

View File

@ -1,33 +0,0 @@
from marl_factory_grid.algorithms.marl.base_ac import BaseActorCritic
from marl_factory_grid.algorithms.marl.base_ac import nms
import torch
from torch.distributions import Categorical
from pathlib import Path
class LoopSNAC(BaseActorCritic):
def __init__(self, cfg):
super().__init__(cfg)
def load_state_dict(self, path: Path):
path2weights = list(path.glob('*.pt'))
assert len(path2weights) == 1, f'Expected a single set of weights but got {len(path2weights)}'
self.net.load_state_dict(torch.load(path2weights[0]))
def init_hidden(self):
hidden_actor = self.net.init_hidden_actor()
hidden_critic = self.net.init_hidden_critic()
return dict(hidden_actor=torch.cat([hidden_actor] * self.n_agents, 0),
hidden_critic=torch.cat([hidden_critic] * self.n_agents, 0)
)
def get_actions(self, out):
actions = Categorical(logits=out[nms.LOGITS]).sample().squeeze()
return actions
def forward(self, observations, actions, hidden_actor, hidden_critic):
out = self.net(self._as_torch(observations).unsqueeze(1),
self._as_torch(actions).unsqueeze(1),
hidden_actor, hidden_critic
)
return out

View File

@ -37,7 +37,6 @@ class TSPBaseAgent(ABC):
self._position_graph = self.generate_pos_graph()
self._static_route = None
self.cached_route = None
self.fallback_action = None
self.action_list = []
@abstractmethod
@ -50,46 +49,6 @@ class TSPBaseAgent(ABC):
"""
return 0
def calculate_tsp_route(self, target_identifier):
"""
Calculate the TSP route to reach a target.
:param target_identifier: Identifier of the target entity
:type target_identifier: str
:return: TSP route
:rtype: List[int]
"""
target_positions = [x for x in self._env.state[target_identifier].positions if x != c.VALUE_NO_POS]
# if there are cached routes, search for one matching the current and target position
if self._env.state.route_cache and (
route := self._env.state.get_cached_route(self.state.pos, target_positions)) is not None:
# print(f"Retrieved cached route: {route}")
return route
# if none are found, calculate tsp route and cache it
else:
start_time = time.time()
if self.local_optimization:
nodes = \
[self.state.pos] + \
[x for x in target_positions if max(abs(np.subtract(x, self.state.pos))) < 3]
try:
while len(nodes) < 7:
nodes += [next(x for x in target_positions if x not in nodes)]
except StopIteration:
nodes = [self.state.pos] + target_positions
else:
nodes = [self.state.pos] + target_positions
route = tsp.traveling_salesman_problem(self._position_graph,
nodes=nodes, cycle=True, method=tsp.greedy_tsp)
duration = time.time() - start_time
print("TSP calculation took {:.2f} seconds to execute".format(duration))
self._env.state.cache_route(route)
return route
def _use_door_or_move(self, door, target):
"""
Helper method to decide whether to use a door or move towards a target.
@ -108,6 +67,47 @@ class TSPBaseAgent(ABC):
action = self._predict_move(target)
return action
def calculate_tsp_route(self, target_identifier):
"""
Calculate the TSP route to reach a target.
:param target_identifier: Identifier of the target entity
:type target_identifier: str
:return: TSP route
:rtype: List[int]
"""
start_time = time.time()
if self.cached_route is not None:
#print(f" Used cached route: {self.cached_route}")
return copy.deepcopy(self.cached_route)
else:
positions = [x for x in self._env.state[target_identifier].positions if x != c.VALUE_NO_POS]
if self.local_optimization:
nodes = \
[self.state.pos] + \
[x for x in positions if max(abs(np.subtract(x, self.state.pos))) < 3]
try:
while len(nodes) < 7:
nodes += [next(x for x in positions if x not in nodes)]
except StopIteration:
nodes = [self.state.pos] + positions
else:
nodes = [self.state.pos] + positions
route = tsp.traveling_salesman_problem(self._position_graph,
nodes=nodes, cycle=True, method=tsp.greedy_tsp)
self.cached_route = copy.deepcopy(route)
#print(f"Cached route: {self.cached_route}")
end_time = time.time()
duration = end_time - start_time
#print("TSP calculation took {:.2f} seconds to execute".format(duration))
return route
def _door_is_close(self, state):
"""
Check if a door is close to the agent's position.
@ -173,11 +173,8 @@ class TSPBaseAgent(ABC):
action = next(action for action, pos_diff in MOVEMAP.items() if
np.all(diff == pos_diff) and action in allowed_directions)
except StopIteration:
print(f"No valid action found for pos diff: {diff}. Using fallback action: {self.fallback_action}.")
if self.fallback_action and any(self.fallback_action == action.name for action in self.state.actions):
action = self.fallback_action
else:
action = choice(self.state.actions).name
print(f"No valid action found for pos diff: {diff}. Using fallback action.")
action = choice(self.state.actions).name
else:
action = choice(self.state.actions).name
# noinspection PyUnboundLocalVariable

View File

@ -1,76 +0,0 @@
import numpy as np
from marl_factory_grid.algorithms.static.TSP_base_agent import TSPBaseAgent
from marl_factory_grid.modules.items import constants as i
from marl_factory_grid.environment import constants as c
future_planning = 7
inventory_size = 3
MODE_GET = 'Mode_Get'
MODE_BRING = 'Mode_Bring'
class TSPItemAgent(TSPBaseAgent):
def __init__(self, *args, mode=MODE_GET, **kwargs):
"""
Initializes a TSPItemAgent that colects items in the environment, stores them in his inventory and drops them off
at a drop-off location.
:param mode: Mode of the agent, either MODE_GET or MODE_BRING.
"""
super(TSPItemAgent, self).__init__(*args, **kwargs)
self.mode = mode
self.fallback_action = c.NOOP
def predict(self, *_, **__):
item_at_position = self._env.state[i.ITEM].by_pos(self.state.pos)
dropoff_at_position = self._env.state[i.DROP_OFF].by_pos(self.state.pos)
if item_at_position:
# Translate the action_object to an integer to have the same output as any other model
action = i.ITEM_ACTION
elif dropoff_at_position:
# Translate the action_object to an integer to have the same output as any other model
action = i.ITEM_ACTION
elif door := self._door_is_close(self._env.state):
action = self._use_door_or_move(door, i.DROP_OFF if self.mode == MODE_BRING else i.ITEM)
else:
action = self._choose()
self.action_list.append(action)
# Translate the action_object to an integer to have the same output as any other model
try:
action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
except (StopIteration, UnboundLocalError):
print('Will not happen')
raise EnvironmentError
# noinspection PyUnboundLocalVariable
if self.mode == MODE_BRING and len(self._env[i.INVENTORY].by_entity(self.state)):
pass
elif self.mode == MODE_BRING and not len(self._env[i.INVENTORY].by_entity(self.state)):
self.mode = MODE_GET
elif self.mode == MODE_GET and len(self._env[i.INVENTORY].by_entity(self.state)) > inventory_size:
self.mode = MODE_BRING
else:
pass
return action_obj
def _choose(self):
"""
Internal Usage. Chooses the action based on the agent's mode and the environment state.
:return: Chosen action.
:rtype: int
"""
target = i.DROP_OFF if self.mode == MODE_BRING else i.ITEM
if len(self._env.state[i.ITEM]) >= 1:
action = self._predict_move(target)
elif len(self._env[i.INVENTORY].by_entity(self.state)):
self.mode = MODE_BRING
action = self._predict_move(target)
else:
action = int(np.random.randint(self._env.action_space.n))
# noinspection PyUnboundLocalVariable
return action

View File

@ -1,27 +0,0 @@
from random import randint
from marl_factory_grid.algorithms.static.TSP_base_agent import TSPBaseAgent
future_planning = 7
class TSPRandomAgent(TSPBaseAgent):
def __init__(self, n_actions, *args, **kwargs):
"""
Initializes a TSPRandomAgent that performs random actions from within his action space.
:param n_actions: Number of possible actions.
:type n_actions: int
"""
super(TSPRandomAgent, self).__init__(*args, **kwargs)
self.n_action = n_actions
def predict(self, *_, **__):
"""
Predicts the next action randomly.
:return: Predicted action.
:rtype: int
"""
return randint(0, self.n_action - 1)

View File

@ -58,7 +58,7 @@ def load_yaml_file(path: Path):
def add_env_props(cfg):
# Path to config File
env_path = Path(f'../marl_factory_grid/configs/{cfg["env"]["env_name"]}.yaml')
env_path = Path(f'../marl_factory_grid/environment/configs/{cfg["env"]["env_name"]}.yaml')
# Env Init
factory = Factory(env_path)

View File

@ -1,66 +0,0 @@
General:
env_seed: 69
individual_rewards: true
level_name: obs_test_map
pomdp_r: 0
verbose: True
tests: false
Agents:
Wolfgang:
Actions:
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
Positions:
- (1, 3)
Soeren:
Actions:
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
Positions:
- (1, 1)
Juergen:
Actions:
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
Positions:
- (1, 2)
Walter:
Actions:
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
Positions:
- (1, 4)
Entities:
DirtPiles:
Doors:
Rules:
# Utilities
WatchCollisions:
done_at_collisions: false
# Done Conditions
DoneAtMaxStepsReached:
max_steps: 500

View File

@ -1,92 +0,0 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: rooms
# Radius of Partially observable Markov decision process
pomdp_r: 3
# Print all messages and events
verbose: false
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
Agents:
# The clean agents
Wolfgang:
Actions:
- Move8
- DoorUse
- Clean
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
Clones: 8
# The item agent
Juergen:
Actions:
- Move8
- DoorUse
- ItemAction
- Noop
Observations:
- Walls
- Doors
- Other
- Items
- DropOffLocations
- Inventory
Entities:
DirtPiles:
coords_or_quantity: 10
initial_amount: 2
clean_amount: 1
dirt_spawn_r_var: 0.1
max_global_amount: 20
max_local_amount: 5
Doors:
DropOffLocations:
coords_or_quantity: 1
max_dropoff_storage_size: 0
Inventories: { }
Items:
coords_or_quantity: 5
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Environment Dynamics
# When stepping over a dirt pile, entities carry a ratio of the dirt to their next position
EntitiesSmearDirtOnMove:
smear_ratio: 0.2
# Doors automatically close after a certain number of time steps
DoorAutoClose:
close_frequency: 7
# Respawn Stuff
# Define how dirt should respawn after the initial spawn
RespawnDirt:
respawn_freq: 30
# Define how items should respawn after the initial spawn
RespawnItems:
respawn_freq: 50
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
DoneAtMaxStepsReached:
max_steps: 500

View File

@ -1,73 +0,0 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: quadrant
# Radius of Partially observable Markov decision process
pomdp_r: 0 # default 3
# Print all messages and events
verbose: false
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
Agents:
# The clean agents
Sigmund:
Actions:
- Move4
#- Clean
#- Noop
Observations:
- DirtPiles
- Self
Positions:
- (9,1)
- (4,5)
- (1,1)
- (4,5)
- (9,1)
- (9,9)
Wolfgang:
Actions:
- Move4
#- Clean
#- Noop
Observations:
- DirtPiles
- Self
Positions:
- (9,5)
- (4,5)
- (1,1)
- (4,5)
- (9,5)
- (9,9)
Entities:
DirtPiles:
coords_or_quantity: (9,9), (1,1), (4,5) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
clean_amount: 1
dirt_spawn_r_var: 0
max_global_amount: 12
max_local_amount: 1
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
#DoneAtMaxStepsReached: # An episode should last for at most max_steps steps
#max_steps: 100

View File

@ -1,146 +0,0 @@
# Default Configuration File
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: large
# View Radius; 0 = full observatbility
pomdp_r: 3
# Print all messages and events
verbose: false
# Run tests
tests: false
# Agents section defines the characteristics of different agents in the environment.
# An Agent requires a list of actions and observations.
# Possible actions: Noop, Charge, Clean, DestAction, DoorUse, ItemAction, MachineAction, Move8, Move4, North, NorthEast, ...
# Possible observations: All, Combined, GlobalPosition, Battery, ChargePods, DirtPiles, Destinations, Doors, Items, Inventory, DropOffLocations, Maintainers, ...
# You can use 'clone' as the agent name to have multiple instances with either a list of names or an int specifying the number of clones.
Agents:
Wolfgang:
Actions:
- Noop
- Charge
- Clean
- DestAction
- DoorUse
- ItemAction
- Move8
Observations:
- Combined:
- Other
- Walls
- GlobalPosition
- Battery
- ChargePods
- DirtPiles
- Destinations
- Doors
- Items
- Inventory
- DropOffLocations
- Maintainers
# Entities section defines the initial parameters and behaviors of different entities in the environment.
# Entities all spawn using coords_or_quantity, a number of entities or coordinates to place them.
Entities:
# Batteries: Entities representing power sources for agents.
Batteries:
initial_charge: 0.8
per_action_costs: 0.02
# ChargePods: Entities representing charging stations for Batteries.
ChargePods:
coords_or_quantity: 2
# Destinations: Entities representing target locations for agents.
# - spawn_mode: GROUPED or SINGLE. Determines how destinations are spawned.
Destinations:
coords_or_quantity: 1
spawn_mode: GROUPED
# DirtPiles: Entities representing piles of dirt.
# - initial_amount: Initial amount of dirt in each pile.
# - clean_amount: Amount of dirt cleaned in each cleaning action.
# - dirt_spawn_r_var: Random variation in dirt spawn amounts.
# - max_global_amount: Maximum total amount of dirt allowed in the environment.
# - max_local_amount: Maximum amount of dirt allowed in one position.
DirtPiles:
coords_or_quantity: 10
initial_amount: 2
clean_amount: 1
dirt_spawn_r_var: 0.1
max_global_amount: 20
max_local_amount: 5
# Doors are spawned using the level map.
Doors:
# DropOffLocations: Entities representing locations where agents can drop off items.
# - max_dropoff_storage_size: Maximum storage capacity at each drop-off location.
DropOffLocations:
coords_or_quantity: 1
max_dropoff_storage_size: 0
# GlobalPositions.
GlobalPositions: { }
# Inventories: Entities representing inventories for agents.
Inventories: { }
# Items: Entities representing items in the environment.
Items:
coords_or_quantity: 5
# Machines: Entities representing machines in the environment.
Machines:
coords_or_quantity: 2
# Maintainers: Entities representing maintainers that aim to maintain machines.
Maintainers:
coords_or_quantity: 1
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Environment Dynamics
# When stepping over a dirt pile, entities carry a ratio of the dirt to their next position
EntitiesSmearDirtOnMove:
smear_ratio: 0.2
# Doors automatically close after a certain number of time steps
DoorAutoClose:
close_frequency: 10
# Maintainers move at every time step
MoveMaintainers:
# Respawn Stuff
# Define how dirt should respawn after the initial spawn
RespawnDirt:
respawn_freq: 15
# Define how items should respawn after the initial spawn
RespawnItems:
respawn_freq: 15
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when an agent reaches a destination
DoneAtDestinationReach:
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
# The environment stops when a battery is discharged
DoneAtBatteryDischarge:
# The environment stops when a maintainer reports a collision
DoneAtMaintainerCollision:
# The environment stops after max steps
DoneAtMaxStepsReached:
max_steps: 500

View File

@ -1,89 +0,0 @@
# Gneral env. settings.
General:
# Just the best seed.
env_seed: 69
# Each agent receives an inividual Reward.
individual_rewards: true
# level file to load from .\levels\.
level_name: eight_puzzle
# Partial Observability. 0 = Full Observation.
pomdp_r: 0
# Please do not spam me.
verbose: false
# Do not touch, WIP
tests: false
# RL Surrogates
Agents:
# This defines the name of the agent. UTF-8
Wolfgang:
# Section which defines the availabll Actions per Agent
Actions:
# Move4 adds 4 actions [`North`, `East`, `South`, `West`]
Move4:
# Reward specification which differ from the default.
# Agent does a valid move in the environment. He actually moves.
valid_reward: -0.1
# Agent wants to move, but fails.
fail_reward: 0
# NOOP aka agent does not do a thing.
Noop:
# The Agent decides to not do anything. Which is always valid.
valid_reward: 0
# Does not do anything, just using the same interface.
fail_reward: 0
# What the agent wants to see.
Observations:
# The agent...
# sees other agents, but himself.
- Other
# wants to see walls
- Walls
# sees his associated Destination (singular). Use the Plural for `see all destinations`.
- Destination
# You want to have 7 clones, also possible to name them by giving names as list.
Clones: 7
# Agents are blocking their grid position from beeing entered by others.
is_blocking_pos: true
# Apart from agents, which additional endities do you want to load?
Entities:
# Observable destinations, which can be reached by stepping on the same position. Has additional parameters...
Destinations:
# Let them spawn on closed doors and agent positions
ignore_blocking: true
# For 8-Puzzle, we need a special spawn rule...
spawnrule:
# ...which spawn a single position just underneath an associated agent.
SpawnDestinationOnAgent: {} # There are no parameters, so we state empty kwargs.
# This section defines which operations are performed beside agent action.
# Without this section nothing happens, not even Done-condition checks.
# Also, situation based rewards are specidief this way.
Rules:
## Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omited/ignored if you do not want to take care of collisions at all.
# This does not mean, that agents can not collide, its just ignored.
WatchCollisions:
reward: 0
done_at_collisions: false
# In 8 Puzzle, do not randomize the start positions, rather move a random agent onto the single free position n-times.
DoRandomInitialSteps:
# How many times?
random_steps: 2
## Done Conditions
# Maximum steps per episode. There is no reward for failing.
DoneAtMaxStepsReached:
# After how many steps should the episode end?
max_steps: 200
# For 8 Puzzle we need a done condition that checks whether destinations have been reached, so...
DoneAtDestinationReach:
# On every step, should there be a reward for agets that reach their associated destination? No!
dest_reach_reward: 0 # Do not touch. This is usefull in other settings!
# Reward should only be given when all destiantions are reached in parallel!
condition: "simultaneous"
# Reward if this is the case. Granted to each agent when all agents are at their target position simultaniously.
reward_at_done: 1

View File

@ -1,92 +0,0 @@
General:
# Your Seed
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: narrow_corridor
# View Radius; 0 = full observatbility
pomdp_r: 0
# print all messages and events
verbose: true
# Run tests
tests: false
Agents:
# Agents are identified by their name
Wolfgang:
# The available actions for this particular agent
Actions:
# Able to do nothing
- Noop
# Able to move in all 8 directions
- Move8
# Stuff the agent can observe (per 2d slice)
# use "Combined" if you want to merge multiple slices into one
Observations:
# He sees walls
- Walls
# he sees other agent, "karl-Heinz" in this setting would be fine, too
- Other
# He can see Destinations, that are assigned to him (hence the singular)
- Destination
# Avaiable Spawn Positions as list
Positions:
- (2, 1)
- (2, 5)
# It is okay to collide with other agents, so that
# they end up on the same position
is_blocking_pos: true
# See Above....
Karl-Heinz:
Actions:
- Noop
- Move8
Observations:
- Walls
- Other
- Destination
Positions:
- (2, 1)
- (2, 5)
is_blocking_pos: true
# Other noteworthy Entitites
Entities:
# The destiantions or positional targets to reach
Destinations:
# Let them spawn on closed doors and agent positions
ignore_blocking: true
# We need a special spawn rule...
spawnrule:
# ...which assigns the destinations per agent
SpawnDestinationsPerAgent:
# we use this parameter
coords_or_quantity:
# to enable and assign special positions per agent
Wolfgang:
- (2, 1)
- (2, 5)
Karl-Heinz:
- (2, 1)
- (2, 5)
# Whether you want to provide a numeric Position observation.
# GlobalPositions:
# normalized: false
# Define the env. dynamics
Rules:
# Utilities
# This rule Checks for Collision, also it assigns the (negative) reward
WatchCollisions:
reward: -0.1
reward_at_done: -1
done_at_collisions: false
# Done Conditions
# Load any of the rules, to check for done conditions.
DoneAtDestinationReach:
reward_at_done: 1
# We want to give rewards only, when all targets have been reached.
condition: "all"
DoneAtMaxStepsReached:
max_steps: 200

View File

@ -1,70 +0,0 @@
General:
# Your Seed
env_seed: 69
# Individual vs global rewards
individual_rewards: true
level_name: simple_crossing
# View Radius; 0 = full observatbility
pomdp_r: 0
verbose: false
tests: false
Agents:
Agent_horizontal:
Actions:
- Noop
- Move4
Observations:
- Walls
- Other
- Destination
# Avaiable Spawn Positions as list
Positions:
- (2,1)
# It is okay to collide with other agents, so that
# they end up on the same position
is_blocking_pos: false
Agent_vertical:
Actions:
- Noop
- Move4
Observations:
- Walls
- Other
- Destination
Positions:
- (1,2)
is_blocking_pos: false
# Other noteworthy Entitites
Entities:
Destinations:
# Let them spawn on closed doors and agent positions
ignore_blocking: true
spawnrule:
SpawnDestinationsPerAgent:
coords_or_quantity:
Agent_horizontal:
- (2,3)
Agent_vertical:
- (3,2)
# Whether you want to provide a numeric Position observation.
# GlobalPositions:
# normalized: false
# Define the env. dynamics
Rules:
# Utilities
# This rule Checks for Collision, also it assigns the (negative) reward
WatchCollisions:
reward: -0.1
reward_at_done: -1
done_at_collisions: false
# Done Conditions
# Load any of the rules, to check for done conditions.
DoneAtDestinationReach:
reward_at_done: 1
# We want to give rewards only, when all targets have been reached.
condition: "all"
DoneAtMaxStepsReached:
max_steps: 200

View File

@ -1,124 +0,0 @@
Agents:
# Clean test agent:
# Actions:
# - Noop
# - Charge
# - Clean
# - DoorUse
# - Move8
# Observations:
# - Combined:
# - Other
# - Walls
# - GlobalPosition
# - Battery
# - ChargePods
# - DirtPiles
# - Destinations
# - Doors
# - Maintainers
# Clones: 0
# Item test agent:
# Actions:
# - Noop
# - Charge
# - DestAction
# - DoorUse
# - ItemAction
# - Move8
# Observations:
# - Combined:
# - Other
# - Walls
# - GlobalPosition
# - Battery
# - ChargePods
# - Destinations
# - Doors
# - Items
# - Inventory
# - DropOffLocations
# - Maintainers
# Clones: 0
Target test agent:
Actions:
- Noop
- Charge
- DoorUse
- Move8
Observations:
- Combined:
- Other
- Walls
- GlobalPosition
- Battery
- Destinations
- Doors
- Maintainers
Clones: 1
Entities:
Batteries:
initial_charge: 0.8
per_action_costs: 0.02
ChargePods:
coords_or_quantity: 2
Destinations:
coords_or_quantity: 1
spawn_mode: GROUPED
DirtPiles:
coords_or_quantity: 10
initial_amount: 2
clean_amount: 1
dirt_spawn_r_var: 0.1
max_global_amount: 20
max_local_amount: 5
Doors:
DropOffLocations:
coords_or_quantity: 1
max_dropoff_storage_size: 0
GlobalPositions: {}
Inventories: {}
Items:
coords_or_quantity: 5
Machines:
coords_or_quantity: 2
Maintainers:
coords_or_quantity: 1
General:
env_seed: 69
individual_rewards: true
level_name: quadrant
pomdp_r: 3
verbose: false
tests: false
Rules:
# Environment Dynamics
EntitiesSmearDirtOnMove:
smear_ratio: 0.2
DoorAutoClose:
close_frequency: 10
MoveMaintainers:
# Respawn Stuff
RespawnDirt:
respawn_freq: 15
RespawnItems:
respawn_freq: 15
# Utilities
WatchCollisions:
done_at_collisions: false
# Done Conditions
DoneAtMaxStepsReached:
max_steps: 20
Tests:
# MaintainerTest: {}
# DirtAgentTest: {}
# ItemAgentTest: {}
# TargetAgentTest: {}

View File

@ -1,69 +0,0 @@
General:
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 3
# Print all messages and events
verbose: false
# Run tests
tests: false
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
Agents:
Wolfgang:
Actions:
- Move8
- Noop
- DestAction
- DoorUse
Observations:
- Walls
- Other
- Doors
- Destination
Sigmund:
Actions:
- Move8
- Noop
- DestAction
- DoorUse
Observations:
- Combined:
- Other
- Walls
- Destination
- Doors
Entities:
Destinations:
spawnrule:
SpawnDestinationsPerAgent:
coords_or_quantity:
Wolfgang:
- (6,12)
Sigmund:
- (6, 2)
Doors: { }
GlobalPositions: { }
Rules:
# Environment Dynamics
DoorAutoClose:
close_frequency: 10
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Init
AssignGlobalPositions: { }
# Done Conditions
DoneAtMaxStepsReached:
max_steps: 10

View File

@ -3,7 +3,7 @@ General:
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms_modified
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# Print all messages and events

View File

@ -3,7 +3,7 @@ General:
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms_modified
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# Print all messages and events

View File

@ -3,7 +3,7 @@ General:
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms_modified
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# Print all messages and events

View File

@ -3,7 +3,7 @@ General:
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms_modified
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# Print all messages and events

View File

@ -3,7 +3,7 @@ General:
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms_modified
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 0
# Print all messages and events

View File

@ -109,7 +109,6 @@ class Factory(gym.Env):
# expensive - don't use; unless required !
self._renderer = None
self._recorder = None
# Init entities
entities = self.map.do_init()
@ -278,7 +277,7 @@ class Factory(gym.Env):
for render_entity in render_entities:
if render_entity.name == c.AGENT:
render_entity.aux = self.obs_builder.curr_lightmaps[render_entity.real_name]
return self._renderer.render(render_entities, self._recorder)
return self._renderer.render(render_entities)
def set_recorder(self, recorder):
self._recorder = recorder

View File

@ -1,5 +1,5 @@
MOVEMENTS_VALID: float = -1 # default: -0.001
MOVEMENTS_FAIL: float = -1 # default: -0.05
MOVEMENTS_VALID: float = -1
MOVEMENTS_FAIL: float = -1
NOOP: float = -1
COLLISION: float = -1
COLLISION_DONE: float = -1

View File

@ -1,11 +1,6 @@
import unittest
from typing import List
import marl_factory_grid.modules.maintenance.constants as M
from marl_factory_grid.environment.entity.agent import Agent
from marl_factory_grid.modules import Door, Machine, DirtPile, Item, DropOffLocation, ItemAction
from marl_factory_grid.utils.results import TickResult, DoneResult, ActionResult
import marl_factory_grid.environment.constants as c
from marl_factory_grid.utils.results import TickResult, DoneResult
class Test(unittest.TestCase):
@ -41,235 +36,3 @@ class Test(unittest.TestCase):
def on_check_done(self, state) -> List[DoneResult]:
return []
class MaintainerTest(Test):
def __init__(self):
"""
Tests whether the maintainer performs the correct actions and whether his actions register correctly in the env.
"""
super().__init__()
self.temp_state_dict = {}
pass
def tick_step(self, state) -> List[TickResult]:
for maintainer in state.entities[M.MAINTAINERS]:
self.assertIsInstance(maintainer.state, (ActionResult, TickResult))
# print(f"state validity maintainer: {maintainer.state.validity}")
# will open doors when standing in front
if maintainer._closed_door_in_path(state):
self.assertEqual(maintainer.get_move_action(state).name, 'use_door')
# if maintainer._next and not maintainer._path:
# finds valid targets when at target location
# route = maintainer.calculate_route(maintainer._last[-1], state.floortile_graph)
# if entities_at_target_location := [entity for entity in state.entities.by_pos(route[-1])]:
# self.assertTrue(any(isinstance(e, Machine) for e in entities_at_target_location))
return []
def tick_post_step(self, state) -> List[TickResult]:
# do maintainers' actions have correct effects on environment i.e. doors open, machines heal
for maintainer in state.entities[M.MAINTAINERS]:
if maintainer._path and self.temp_state_dict != {}:
if maintainer.identifier in self.temp_state_dict:
last_action = self.temp_state_dict[maintainer.identifier]
if last_action.identifier == 'DoorUse':
if door := next((entity for entity in state.entities.get_entities_near_pos(maintainer.pos) if
isinstance(entity, Door)), None):
agents_near_door = [agent for agent in state.entities.get_entities_near_pos(door.pos) if
isinstance(agent, Agent)]
if len(agents_near_door) < 2:
self.assertTrue(door.is_open)
if last_action.identifier == 'MachineAction':
if machine := next((entity for entity in state.entities.get_entities_near_pos(maintainer.pos) if
isinstance(entity, Machine)), None):
self.assertEqual(machine.health, 100)
return []
def on_check_done(self, state) -> List[DoneResult]:
# clear dict as the maintainer identifier increments each run the dict would fill over episodes
self.temp_state_dict = {}
for maintainer in state.entities[M.MAINTAINERS]:
temp_state = maintainer._status
if isinstance(temp_state, (ActionResult, TickResult)):
# print(f"maintainer {temp_state}")
self.temp_state_dict[maintainer.identifier] = temp_state
else:
self.temp_state_dict[maintainer.identifier] = None
return []
class DirtAgentTest(Test):
def __init__(self):
"""
Tests whether the dirt agent will perform the correct actions and whether the actions register correctly in the
environment.
"""
super().__init__()
self.temp_state_dict = {}
pass
def on_init(self, state, lvl_map):
return []
def on_reset(self):
return []
def tick_step(self, state) -> List[TickResult]:
for dirtagent in [a for a in state.entities[c.AGENT] if "Clean" in a.identifier]: # isinstance TSPDirtAgent
# state usually is an actionresult but after a crash, tickresults are reported
self.assertIsInstance(dirtagent.state, (ActionResult, TickResult))
# print(f"state validity dirtagent: {dirtagent.state.validity}")
return []
def tick_post_step(self, state) -> List[TickResult]:
# do agents' actions have correct effects on environment i.e. doors open, dirt is cleaned
for dirtagent in [a for a in state.entities[c.AGENT] if "Clean" in a.identifier]: # isinstance TSPDirtAgent
if self.temp_state_dict != {}:
last_action = self.temp_state_dict[dirtagent.identifier]
if last_action.identifier == 'DoorUse':
if door := next((entity for entity in state.entities.get_entities_near_pos(dirtagent.pos) if
isinstance(entity, Door)), None):
agents_near_door = [agent for agent in state.entities.get_entities_near_pos(door.pos) if
isinstance(agent, Agent)]
if len(agents_near_door) < 2:
# self.assertTrue(door.is_open)
if door.is_closed:
print("door should be open but seems closed.")
if last_action.identifier == 'Clean':
if dirt := next((entity for entity in state.entities.get_entities_near_pos(dirtagent.pos) if
isinstance(entity, DirtPile)), None):
# print(f"dirt left on pos: {dirt.amount}")
self.assertTrue(dirt.amount < 5) # get dirt amount one step before - clean amount
return []
def on_check_done(self, state) -> List[DoneResult]:
for dirtagent in [a for a in state.entities[c.AGENT] if "Clean" in a.identifier]: # isinstance TSPDirtAgent
temp_state = dirtagent._status
if isinstance(temp_state, (ActionResult, TickResult)):
# print(f"dirtagent {temp_state}")
self.temp_state_dict[dirtagent.identifier] = temp_state
else:
self.temp_state_dict[dirtagent.identifier] = None
return []
class ItemAgentTest(Test):
def __init__(self):
"""
Tests whether the dirt agent will perform the correct actions and whether the actions register correctly in the
environment.
"""
super().__init__()
self.temp_state_dict = {}
pass
def on_init(self, state, lvl_map):
return []
def on_reset(self):
return []
def tick_step(self, state) -> List[TickResult]:
for itemagent in [a for a in state.entities[c.AGENT] if "Item" in a.identifier]: # isinstance TSPItemAgent
# state usually is an actionresult but after a crash, tickresults are reported
self.assertIsInstance(itemagent.state, (ActionResult, TickResult))
# self.assertEqual(agent.state.validity, True)
# print(f"state validity itemagent: {itemagent.state.validity}")
return []
def tick_post_step(self, state) -> List[TickResult]:
# do agents' actions have correct effects on environment i.e. doors open, items are picked up and dropped off
for itemagent in [a for a in state.entities[c.AGENT] if "Item" in a.identifier]: # isinstance TSPItemAgent
if self.temp_state_dict != {}: # and
last_action = self.temp_state_dict[itemagent.identifier]
if last_action.identifier == 'DoorUse':
if door := next((entity for entity in state.entities.get_entities_near_pos(itemagent.pos) if
isinstance(entity, Door)), None):
agents_near_door = [agent for agent in state.entities.get_entities_near_pos(door.pos) if
isinstance(agent, Agent)]
if len(agents_near_door) < 2:
# self.assertTrue(door.is_open)
if door.is_closed:
print("door should be open but seems closed.")
# if last_action.identifier == 'ItemAction':
# If it was a pick-up action the item should be in the agents inventory and not in his neighboring
# positions anymore
# nearby_items = [e for e in state.entities.get_entities_near_pos(itemagent.pos) if
# isinstance(e, Item)]
# self.assertNotIn(Item, nearby_items)
# self.assertTrue(itemagent.bound_entity) # where is the inventory
#
# If it was a drop-off action the item should not be in the agents inventory anymore but instead in
# the drop-off locations inventory
#
# if nearby_drop_offs := [e for e in state.entities.get_entities_near_pos(itemagent.pos) if
# isinstance(e, DropOffLocation)]:
# dol = nearby_drop_offs[0]
# self.assertTrue(dol.bound_entity) # item in drop-off location?
# self.assertNotIn(Item, state.entities.get_entities_near_pos(itemagent.pos))
return []
def on_check_done(self, state) -> List[DoneResult]:
for itemagent in [a for a in state.entities[c.AGENT] if "Item" in a.identifier]: # isinstance TSPItemAgent
temp_state = itemagent._status
# print(f"itemagent {temp_state}")
self.temp_state_dict[itemagent.identifier] = temp_state
return []
class TargetAgentTest(Test):
def __init__(self):
"""
Tests whether the target agent will perform the correct actions and whether the actions register correctly in the
environment.
"""
super().__init__()
self.temp_state_dict = {}
pass
def on_init(self, state, lvl_map):
return []
def on_reset(self):
return []
def tick_step(self, state) -> List[TickResult]:
for targetagent in [a for a in state.entities[c.AGENT] if "Target" in a.identifier]:
# state usually is an actionresult but after a crash, tickresults are reported
self.assertIsInstance(targetagent.state, (ActionResult, TickResult))
# print(f"state validity targetagent: {targetagent.state.validity}")
return []
def tick_post_step(self, state) -> List[TickResult]:
# do agents' actions have correct effects on environment i.e. doors open, targets are destinations
for targetagent in [a for a in state.entities[c.AGENT] if "Target" in a.identifier]:
if self.temp_state_dict != {}:
last_action = self.temp_state_dict[targetagent.identifier]
if last_action.identifier == 'DoorUse':
if door := next((entity for entity in state.entities.get_entities_near_pos(targetagent.pos) if
isinstance(entity, Door)), None):
agents_near_door = [agent for agent in state.entities.get_entities_near_pos(door.pos) if
isinstance(agent, Agent)]
if len(agents_near_door) < 2:
# self.assertTrue(door.is_open)
if door.is_closed:
print("door should be open but seems closed.")
return []
def on_check_done(self, state) -> List[DoneResult]:
for targetagent in [a for a in state.entities[c.AGENT] if "Target" in a.identifier]:
temp_state = targetagent._status
# print(f"targetagent {temp_state}")
self.temp_state_dict[targetagent.identifier] = temp_state
return []

View File

@ -1,5 +0,0 @@
#####
#---#
#---#
#---#
#####

View File

@ -1,24 +0,0 @@
##############################################################
#-----------#---#--------------------------------------------#
#-----------#---#--------------------------------------------#
#-----------#---#------##------##------##------##------##----#
#-----------#---D------##------##------##------##------##----#
#-----------D---#--------------------------------------------#
#-----------#---#--------------------------------------------#
#############---####################D####################D####
#------------------------------------------------------------#
#------------------------------------------------------------#
#------------------------------------------------------------#
####################-####################################D####
#-----------------#---#------------------------------#-------#
#-----------------#---D------------------------------#-------#
#-----------------D---#------------------------------#-------#
#-----------------#---#######D#############D##########-------#
#-----------------#---D------------------------------D-------#
###################---#------------------------------#-------#
#-----------------#---#######D#############D##########-------#
#-----------------D---#------------------------------#-------#
#-----------------#---#------------------------------#-------#
#-----------------#---#------------------------------D-------#
#-----------------#---#------------------------------#-------#
##############################################################

View File

@ -1,47 +0,0 @@
###########################################################################################################################
#-----------#---#--------------------------------------------#-----------#---#--------------------------------------------#
#-----------#---#--------------------------------------------#-----------#---#--------------------------------------------#
#-----------#---#------##------##------##------##------##----#-----------#---#------##------##------##------##------##----#
#-----------#---D------##------##------##------##------##----#-----------#---D------##------##------##------##------##----#
#-----------D---#--------------------------------------------#-----------D---#--------------------------------------------#
#-----------#---#--------------------------------------------#-----------#---#--------------------------------------------#
#############---####################D####################D################---####################D####################D####
#------------------------------------------------------------#------------------------------------------------------------#
#------------------------------------------------------------D------------------------------------------------------------#
#------------------------------------------------------------#------------------------------------------------------------#
####################-####################################D#######################-####################################D####
#-----------------#---#------------------------------#-------#-----------------#---#------------------------------#-------#
#-----------------#---D------------------------------#-------#-----------------#---D------------------------------#-------#
#-----------------D---#------------------------------#-------#-----------------D---#------------------------------#-------#
#-----------------#---#######D#############D##########-------#-----------------#---#######D#############D##########-------#
#-----------------#---D------------------------------D-------#-----------------#---D------------------------------D-------#
###################---#------------------------------#-------###################---#------------------------------#-------#
#-----------------#---#######D#############D##########-------#-----------------#---#######D#############D##########-------#
#-----------------D---#------------------------------#-------D-----------------D---#------------------------------#-------#
#-----------------#---#------------------------------#-------#-----------------#---#------------------------------#-------#
#-----------------#---#------------------------------D-------#-----------------#---#------------------------------D-------#
#-----------------#---#------------------------------#-------#-----------------#---#------------------------------#-------#
##############D############################################################D###############################################
#-----------#---#--------------------------------------------#-----------#---#--------------------------------------------#
#-----------#---#--------------------------------------------#-----------#---#--------------------------------------------#
#-----------#---#------##------##------##------##------##----#-----------#---#------##------##------##------##------##----#
#-----------#---D------##------##------##------##------##----#-----------#---D------##------##------##------##------##----#
#-----------D---#--------------------------------------------#-----------D---#--------------------------------------------#
#-----------#---#--------------------------------------------#-----------#---#--------------------------------------------#
#############---####################D####################D################---####################D####################D####
#------------------------------------------------------------#------------------------------------------------------------#
#------------------------------------------------------------D------------------------------------------------------------#
#------------------------------------------------------------#------------------------------------------------------------#
###################---###################################D######################---###################################D####
#-----------------#---#------------------------------#-------#-----------------#---#------------------------------#-------#
#-----------------#---D------------------------------#-------#-----------------#---D------------------------------#-------#
#-----------------D---#------------------------------#-------#-----------------D---#------------------------------#-------#
#-----------------#---#######D#############D##########-------#-----------------#---#######D#############D##########-------#
#-----------------#---D------------------------------D-------#-----------------#---D------------------------------D-------#
###################---#------------------------------#-------###################---#------------------------------#-------#
#-----------------#---#######D#############D##########-------#-----------------#---#######D#############D##########-------#
#-----------------D---#------------------------------#-------#-----------------D---#------------------------------#-------#
#-----------------#---#------------------------------#-------#-----------------#---#------------------------------#-------#
#-----------------#---#------------------------------D-------#-----------------#---#------------------------------D-------#
#-----------------#---#------------------------------#-------#-----------------#---#------------------------------#-------#
###########################################################################################################################

View File

@ -1,5 +0,0 @@
#######
###-###
#-----#
###-###
#######

View File

@ -1,12 +0,0 @@
############
#----------#
#-#######--#
#-#-----D--#
#-#######--#
#-D-----D--#
#-#-#-#-#-##
#----------#
#----------#
#----------#
#----------#
############

View File

@ -1,13 +0,0 @@
###############
#333x33#444444#
#333#33#444444#
#333333xx#4444#
#333333#444444#
#333333#444444#
###x#######D###
#1111##2222222#
#11111#2222#22#
#11111D2222222#
#11111#2222222#
#11111#2222222#
###############

View File

@ -1,13 +0,0 @@
############
#----------#
#--######--#
#----------#
#--######--#
#----------#
#--######--#
#----------#
#--######--#
#----------#
#--######--#
#----------#
############

View File

@ -1,12 +0,0 @@
############
#----------#
#---#------#
#--------#-#
#----------#
#--#-------#
#----------#
#----#-----#
#----------#
#-------#--#
#----------#
############

View File

@ -1,5 +0,0 @@
#####
##-##
#---#
##-##
#####

View File

@ -1,13 +1,7 @@
###############
#111111#222222#
#111111#222222#
#111111#222222#
#111111#222222#
#111111#222222#
#111111D222222#
#111111#222222#
#111111#222222#
#111111#222222#
#111111#222222#
#111111#222222#
#------#------#
#------#------#
#------D------#
#------#------#
#------#------#
###############

View File

@ -1,7 +0,0 @@
###############
#111111#222222#
#111111#222222#
#111111D222222#
#111111#222222#
#111111#222222#
###############

View File

@ -1,10 +1,6 @@
from .batteries import *
from .clean_up import *
from .destinations import *
from .doors import *
from .items import *
from .machines import *
from .maintenance import *
"""
modules

View File

@ -1,4 +0,0 @@
from .actions import Charge
from .entitites import ChargePod, Battery
from .groups import ChargePods, Batteries
from .rules import DoneAtBatteryDischarge, BatteryDecharge

View File

@ -1,31 +0,0 @@
from typing import Union
from marl_factory_grid.environment.actions import Action
from marl_factory_grid.utils.results import ActionResult
from marl_factory_grid.modules.batteries import constants as b
from marl_factory_grid.environment import constants as c
from marl_factory_grid.utils import helpers as h
class Charge(Action):
def __init__(self):
"""
Checks if a charge pod is present at the agent's position.
If found, it attempts to charge the battery using the charge pod.
"""
super().__init__(b.ACTION_CHARGE, b.REWARD_CHARGE_VALID, b.Reward_CHARGE_FAIL)
def do(self, entity, state) -> Union[None, ActionResult]:
if charge_pod := h.get_first(state[b.CHARGE_PODS].by_pos(entity.pos)):
valid = charge_pod.charge_battery(entity, state)
if valid:
state.print(f'{entity.name} just charged batteries at {charge_pod.name}.')
else:
state.print(f'{entity.name} failed to charged batteries at {charge_pod.name}.')
else:
valid = c.NOT_VALID
state.print(f'{entity.name} failed to charged batteries at {entity.pos}.')
return self.get_result(valid, entity)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.9 KiB

View File

@ -1,17 +0,0 @@
# Battery Env
CHARGE_PODS = 'ChargePods'
BATTERIES = 'Batteries'
BATTERY_DISCHARGED = 'DISCHARGED'
CHARGE_POD_SYMBOL = 1
ACTION_CHARGE = 'do_charge_action'
REWARD_CHARGE_VALID: float = 0.1
Reward_CHARGE_FAIL: float = -0.1
REWARD_BATTERY_DISCHARGED: float = -1.0
REWARD_DISCHARGE_DONE: float = -1.0
GROUPED = "single"
SINGLE = "grouped"
MODES = [GROUPED, SINGLE]

View File

@ -1,119 +0,0 @@
from marl_factory_grid.environment import constants as c
from marl_factory_grid.environment.entity.agent import Agent
from marl_factory_grid.environment.entity.entity import Entity
from marl_factory_grid.environment.entity.object import Object
from marl_factory_grid.modules.batteries import constants as b
from marl_factory_grid.utils.utility_classes import RenderEntity
class Battery(Object):
@property
def var_can_be_bound(self):
return True
@property
def is_discharged(self) -> bool:
"""
Indicates whether the Batteries charge level is at 0 or not.
:return: Whether this battery is empty.
"""
return self.charge_level == 0
@property
def obs_tag(self):
return self.name
@property
def encoding(self):
return self.charge_level
def __init__(self, initial_charge_level, owner, *args, **kwargs):
"""
Represents a battery entity in the environment that can be bound to an agent and charged at charge pods.
:param initial_charge_level: The current charge level of the battery, ranging from 0 to 1.
:type initial_charge_level: float
:param owner: The entity to which the battery is bound.
:type owner: Entity
"""
super(Battery, self).__init__(*args, **kwargs)
self.charge_level = initial_charge_level
self.bind_to(owner)
def do_charge_action(self, amount) -> bool:
"""
Updates the Battery's charge level according to the passed value.
:param amount: Amount added to the Battery's charge level.
:returns: whether the battery could be charged. if not, it was already fully charged.
"""
if self.charge_level < 1:
# noinspection PyTypeChecker
self.charge_level = min(1, amount + self.charge_level)
return c.VALID
else:
return c.NOT_VALID
def decharge(self, amount) -> bool:
"""
Decreases the charge value of a battery. Currently only triggered by the battery-decharge rule.
"""
if self.charge_level != 0:
# noinspection PyTypeChecker
self.charge_level = max(0, amount + self.charge_level)
return c.VALID
else:
return c.NOT_VALID
def summarize_state(self):
summary = super().summarize_state()
summary.update(dict(belongs_to=self._bound_entity.name, chargeLevel=self.charge_level))
return summary
class ChargePod(Entity):
@property
def encoding(self):
return b.CHARGE_POD_SYMBOL
def __init__(self, *args, charge_rate: float = 0.4, multi_charge: bool = False, **kwargs):
"""
Represents a charging pod for batteries in the environment.
:param charge_rate: The rate at which the charging pod charges batteries. Defaults to 0.4.
:type charge_rate: float
:param multi_charge: Indicates whether the charging pod supports charging multiple batteries simultaneously.
Defaults to False.
:type multi_charge: bool
"""
super(ChargePod, self).__init__(*args, **kwargs)
self.charge_rate = charge_rate
self.multi_charge = multi_charge
def charge_battery(self, entity, state) -> bool:
"""
Triggers the battery charge action if possible. Impossible if battery at full charge level or more than one
agent at charge pods' position.
:returns: whether the action was successful (valid) or not.
"""
battery = state[b.BATTERIES].by_entity(entity)
if battery.charge_level >= 1.0:
return c.NOT_VALID
if len([x for x in state[c.AGENT].by_pos(entity.pos)]) > 1:
return c.NOT_VALID
valid = battery.do_charge_action(self.charge_rate)
return valid
def render(self):
return RenderEntity(b.CHARGE_PODS, self.pos)
def summarize_state(self) -> dict:
summary = super().summarize_state()
summary.update(charge_rate=self.charge_rate)
return summary

View File

@ -1,52 +0,0 @@
from typing import Union, List, Tuple
from marl_factory_grid.environment import constants as c
from marl_factory_grid.environment.groups.collection import Collection
from marl_factory_grid.modules.batteries.entitites import ChargePod, Battery
from marl_factory_grid.utils.results import Result
class Batteries(Collection):
_entity = Battery
@property
def var_has_position(self):
return False
@property
def var_can_be_bound(self):
return True
def __init__(self, size, initial_charge_level=1.0, *args, **kwargs):
"""
A collection of batteries that is in charge of spawning batteries. (spawned batteries are bound to agents)
:param size: The maximum allowed size of the collection. Ensures that the collection does not exceed this size.
:type size: int
:param initial_charge_level: The initial charge level of the battery.
:type initial_charge_level: float
"""
super(Batteries, self).__init__(size, *args, **kwargs)
self.initial_charge_level = initial_charge_level
def spawn(self, coords_or_quantity: Union[int, List[Tuple[(int, int)]]], *entity_args, **entity_kwargs):
batteries = [self._entity(self.initial_charge_level, agent) for _, agent in enumerate(entity_args[0])]
self.add_items(batteries)
def trigger_spawn(self, state, *entity_args, coords_or_quantity=None, **entity_kwargs):
self.spawn(0, state[c.AGENT])
return Result(identifier=f'{self.name}_spawn', validity=c.VALID, value=len(self))
class ChargePods(Collection):
_entity = ChargePod
def __init__(self, *args, **kwargs):
"""
A collection of charge pods in the environment.
"""
super(ChargePods, self).__init__(*args, **kwargs)
def __repr__(self):
return super(ChargePods, self).__repr__()

View File

@ -1,128 +0,0 @@
from typing import List, Union
from marl_factory_grid.environment import constants as c
from marl_factory_grid.environment.rules import Rule
from marl_factory_grid.modules.batteries import constants as b
from marl_factory_grid.utils.results import TickResult, DoneResult
class BatteryDecharge(Rule):
def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02,
battery_charge_reward: float = b.REWARD_CHARGE_VALID,
battery_failed_reward: float = b.Reward_CHARGE_FAIL,
battery_discharge_reward: float = b.REWARD_BATTERY_DISCHARGED,
paralyze_agents_on_discharge: bool = False):
f"""
Enables the Battery Charge/Discharge functionality.
:type paralyze_agents_on_discharge: bool
:param paralyze_agents_on_discharge: Wether agents are still able to perform actions when discharged.
:type per_action_costs: Union[dict, float] = 0.02
:param per_action_costs: 1. dict: with an action name as key, provide a value for each
(maybe walking is less tedious as opening a door? Just saying...).
2. float: each action "costs" the same.
----
!!! Does not introduce any Env.-Done condition.
!!! Batteries can only be charged if agent posses the "Charge" Action.
!!! Batteries can only be charged if there are "Charge Pods" and they are spawned!
----
:type initial_charge: float
:param initial_charge: How much juice they have.
:type battery_discharge_reward: float
:param battery_discharge_reward: Negative reward, when agents let their batters discharge.
Default: {b.REWARD_BATTERY_DISCHARGED}
:type battery_failed_reward: float
:param battery_failed_reward: Negative reward, when agent cannot charge, but do (overcharge, not on station).
Default: {b.Reward_CHARGE_FAIL}
:type battery_charge_reward: float
:param battery_charge_reward: Positive reward, when agent actually charge their battery.
Default: {b.REWARD_CHARGE_VALID}
"""
super().__init__()
self.paralyze_agents_on_discharge = paralyze_agents_on_discharge
self.battery_discharge_reward = battery_discharge_reward
self.battery_failed_reward = battery_failed_reward
self.battery_charge_reward = battery_charge_reward
self.per_action_costs = per_action_costs
self.initial_charge = initial_charge
def tick_step(self, state) -> List[TickResult]:
batteries = state[b.BATTERIES]
results = []
for agent in state[c.AGENT]:
if isinstance(self.per_action_costs, dict):
energy_consumption = self.per_action_costs[agent.state.identifier]
else:
energy_consumption = self.per_action_costs
batteries.by_entity(agent).decharge(energy_consumption)
results.append(TickResult(self.name, entity=agent, validity=c.VALID, value=energy_consumption))
return results
def tick_post_step(self, state) -> List[TickResult]:
results = []
for btry in state[b.BATTERIES]:
if btry.is_discharged:
state.print(f'Battery of {btry.bound_entity.name} is discharged!')
results.append(
TickResult(self.name, entity=btry.bound_entity, reward=self.battery_discharge_reward,
validity=c.VALID)
)
if self.paralyze_agents_on_discharge:
btry.bound_entity.paralyze(self.name)
results.append(
TickResult("Paralyzed", entity=btry.bound_entity, validity=c.VALID)
)
state.print(f'{btry.bound_entity.name} has just been paralyzed!')
if btry.bound_entity.var_is_paralyzed and not btry.is_discharged:
btry.bound_entity.de_paralyze(self.name)
results.append(
TickResult("De-Paralyzed", entity=btry.bound_entity, validity=c.VALID)
)
state.print(f'{btry.bound_entity.name} has just been de-paralyzed!')
return results
class DoneAtBatteryDischarge(BatteryDecharge):
def __init__(self, reward_discharge_done=b.REWARD_DISCHARGE_DONE, mode: str = b.SINGLE, **kwargs):
f"""
Enables the Battery Charge/Discharge functionality. Additionally
:type mode: str
:param mode: Does this Done rule trigger, when any battery is or all batteries are discharged?
:type per_action_costs: Union[dict, float] = 0.02
:param per_action_costs: 1. dict: with an action name as key, provide a value for each
(maybe walking is less tedious as opening a door? Just saying...).
2. float: each action "costs" the same.
:type initial_charge: float
:param initial_charge: How much juice they have.
:type reward_discharge_done: float
:param reward_discharge_done: Global negative reward, when agents let their batters discharge.
Default: {b.REWARD_BATTERY_DISCHARGED}
:type battery_discharge_reward: float
:param battery_discharge_reward: Negative reward, when agents let their batters discharge.
Default: {b.REWARD_BATTERY_DISCHARGED}
:type battery_failed_reward: float
:param battery_failed_reward: Negative reward, when agent cannot charge, but do (overcharge, not on station).
Default: {b.Reward_CHARGE_FAIL}
:type battery_charge_reward: float
:param battery_charge_reward: Positive reward, when agent actually charge their battery.
Default: {b.REWARD_CHARGE_VALID}
"""
super().__init__(**kwargs)
self.mode = mode
self.reward_discharge_done = reward_discharge_done
def on_check_done(self, state) -> List[DoneResult]:
any_discharged = (self.mode == b.SINGLE and any(battery.is_discharged for battery in state[b.BATTERIES]))
all_discharged = (self.mode == b.SINGLE and all(battery.is_discharged for battery in state[b.BATTERIES]))
if any_discharged or all_discharged:
return [DoneResult(self.name, validity=c.VALID, reward=self.reward_discharge_done)]
else:
return [DoneResult(self.name, validity=c.NOT_VALID)]

View File

@ -1,11 +0,0 @@
from .actions import ItemAction
from .entitites import Item, DropOffLocation
from .groups import DropOffLocations, Items, Inventory, Inventories
"""
items
=====
Todo
"""

View File

@ -1,63 +0,0 @@
from typing import Union
from marl_factory_grid.environment.actions import Action
from marl_factory_grid.utils.results import ActionResult
from marl_factory_grid.modules.items import constants as i
from marl_factory_grid.environment import constants as c
class ItemAction(Action):
def __init__(self, failed_dropoff_reward: float | None = None, valid_dropoff_reward: float | None = None, **kwargs):
"""
Allows an entity to pick up or drop off items in the environment.
:param failed_drop_off_reward: The reward assigned when a drop-off action fails. Default is None.
:type failed_dropoff_reward: float | None
:param valid_drop_off_reward: The reward assigned when a drop-off action is successful. Default is None.
:type valid_dropoff_reward: float | None
"""
super().__init__(i.ITEM_ACTION, i.REWARD_PICK_UP_FAIL, i.REWARD_PICK_UP_VALID, **kwargs)
self.failed_drop_off_reward = failed_dropoff_reward if failed_dropoff_reward is not None else i.REWARD_DROP_OFF_FAIL
self.valid_drop_off_reward = valid_dropoff_reward if valid_dropoff_reward is not None else i.REWARD_DROP_OFF_VALID
def get_dropoff_result(self, validity, entity) -> ActionResult:
"""
Generates an ActionResult for a drop-off action based on its validity.
:param validity: Whether the drop-off action is valid.
:type validity: bool
:param entity: The entity performing the action.
:type entity: Entity
:return: ActionResult for the drop-off action.
:rtype: ActionResult
"""
reward = self.valid_drop_off_reward if validity else self.failed_drop_off_reward
return ActionResult(self.__class__.__name__, validity, reward=reward, entity=entity)
def do(self, entity, state) -> Union[None, ActionResult]:
inventory = state[i.INVENTORY].by_entity(entity)
if drop_off := state[i.DROP_OFF].by_pos(entity.pos):
if inventory:
valid = drop_off.place_item(inventory.pop())
else:
valid = c.NOT_VALID
if valid:
state.print(f'{entity.name} just dropped of an item at {drop_off.pos}.')
else:
state.print(f'{entity.name} just tried to drop off at {entity.pos}, but failed.')
return self.get_dropoff_result(valid, entity)
elif items := state[i.ITEM].by_pos(entity.pos):
item = items[0]
item.change_parent_collection(inventory)
item.set_pos(c.VALUE_NO_POS)
state.print(f'{entity.name} just picked up an item at {entity.pos}')
return self.get_result(c.VALID, entity)
else:
state.print(f'{entity.name} just tried to pick up an item at {entity.pos}, but failed.')
return self.get_result(c.NOT_VALID, entity)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.5 KiB

Some files were not shown because too many files have changed in this diff Show More