mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-07-08 02:21:36 +02:00
Reset tsp route caching + renamed and moved configs + removed unnecessary files
This commit is contained in:
@ -1,19 +0,0 @@
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
# Set the OS, Python version and other tools you might need
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.12"
|
||||
|
||||
# Optional but recommended, declare the Python requirements required
|
||||
# to build your documentation
|
||||
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/requirements.txt
|
||||
|
||||
# Build documentation in the "docs/" directory with Sphinx
|
||||
sphinx:
|
||||
configuration: docs/source/conf.py
|
@ -56,7 +56,7 @@ Refer to [quickstart](_quickstart) for specific scenarios.
|
||||
|
||||
The majority of environment objects, including entities, rules, and assets, can be loaded automatically.
|
||||
Simply specify the requirements of your environment in a [
|
||||
*yaml*-config file](marl_factory_grid/configs/default_config.yaml).
|
||||
*yaml*-config file](marl_factory_grid/environment/configs/default_config.yaml).
|
||||
|
||||
If you only plan on using the environment without making any modifications, use ``quickstart_use``.
|
||||
This creates a default config-file and another one that lists all possible options of the environment.
|
||||
|
@ -1,112 +0,0 @@
|
||||
---
|
||||
General:
|
||||
level_name: large
|
||||
env_seed: 69
|
||||
verbose: !!bool False
|
||||
pomdp_r: 3
|
||||
individual_rewards: !!bool True
|
||||
|
||||
Entities:
|
||||
Defaults: {}
|
||||
DirtPiles:
|
||||
initial_dirt_ratio: 0.01 # On INIT, on max how many tiles does the dirt spawn in percent.
|
||||
dirt_spawn_r_var: 0.5 # How much does the dirt spawn amount vary?
|
||||
initial_amount: 1
|
||||
max_local_amount: 3 # Max dirt amount per tile.
|
||||
max_global_amount: 30 # Max dirt amount in the whole environment.
|
||||
Doors:
|
||||
closed_on_init: True
|
||||
auto_close_interval: 10
|
||||
indicate_area: False
|
||||
Batteries: {}
|
||||
ChargePods: {}
|
||||
Destinations: {}
|
||||
ReachedDestinations: {}
|
||||
Items: {}
|
||||
Inventories: {}
|
||||
DropOffLocations: {}
|
||||
|
||||
Agents:
|
||||
Wolfgang:
|
||||
Actions:
|
||||
- Noop
|
||||
- Noop
|
||||
- Noop
|
||||
- CleanUp
|
||||
Observations:
|
||||
- Self
|
||||
- Placeholder
|
||||
- Walls
|
||||
- DirtPiles
|
||||
- Placeholder
|
||||
- Doors
|
||||
- Doors
|
||||
Bjoern:
|
||||
Actions:
|
||||
# Move4, Noop
|
||||
- Move8
|
||||
- DoorUse
|
||||
- ItemAction
|
||||
Observations:
|
||||
- Defaults
|
||||
- Combined:
|
||||
- Other
|
||||
- Walls
|
||||
- Items
|
||||
- Inventory
|
||||
Karl-Heinz:
|
||||
Actions:
|
||||
- Move8
|
||||
- DoorUse
|
||||
Observations:
|
||||
# Wall, Only Other Agents
|
||||
- Defaults
|
||||
- Combined:
|
||||
- Other
|
||||
- Self
|
||||
- Walls
|
||||
- Doors
|
||||
- Destinations
|
||||
Manfred:
|
||||
Actions:
|
||||
- Move8
|
||||
- ItemAction
|
||||
- DoorUse
|
||||
- CleanUp
|
||||
- DestAction
|
||||
- BtryCharge
|
||||
Observations:
|
||||
- Defaults
|
||||
- Battery
|
||||
- Destinations
|
||||
- DirtPiles
|
||||
- Doors
|
||||
- Items
|
||||
- Inventory
|
||||
- DropOffLocations
|
||||
Rules:
|
||||
Defaults: {}
|
||||
Collision:
|
||||
done_at_collisions: !!bool False
|
||||
DirtRespawnRule:
|
||||
spawn_freq: 15
|
||||
DirtSmearOnMove:
|
||||
smear_amount: 0.12
|
||||
DoorAutoClose: {}
|
||||
DirtAllCleanDone: {}
|
||||
Btry: {}
|
||||
BtryDoneAtDischarge: {}
|
||||
DestinationReach: {}
|
||||
DestinationSpawn: {}
|
||||
DestinationDone: {}
|
||||
ItemRules: {}
|
||||
|
||||
Assets:
|
||||
- Defaults
|
||||
- Dirt
|
||||
- Door
|
||||
- Machine
|
||||
- Item
|
||||
- Destination
|
||||
- DropOffLocation
|
||||
- Chargepod
|
@ -1,189 +0,0 @@
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
##############################################
|
||||
# keep this for stand alone script execution #
|
||||
##############################################
|
||||
from environments.factory.base.base_factory import BaseFactory
|
||||
from environments.logging.recorder import EnvRecorder
|
||||
|
||||
try:
|
||||
# noinspection PyUnboundLocalVariable
|
||||
if __package__ is None:
|
||||
DIR = Path(__file__).resolve().parent
|
||||
sys.path.insert(0, str(DIR.parent))
|
||||
__package__ = DIR.name
|
||||
else:
|
||||
DIR = None
|
||||
except NameError:
|
||||
DIR = None
|
||||
pass
|
||||
##############################################
|
||||
##############################################
|
||||
##############################################
|
||||
|
||||
|
||||
import simplejson
|
||||
|
||||
from environments import helpers as h
|
||||
from environments.factory.additional.combined_factories import DestBatteryFactory
|
||||
from environments.factory.additional.dest.factory_dest import DestFactory
|
||||
from environments.factory.additional.dirt.factory_dirt import DirtFactory
|
||||
from environments.factory.additional.item.factory_item import ItemFactory
|
||||
from environments.helpers import ObservationTranslator, ActionTranslator
|
||||
from environments.logging.envmonitor import EnvMonitor
|
||||
from environments.utility_classes import ObservationProperties, AgentRenderOptions, MovementProperties
|
||||
|
||||
|
||||
def policy_model_kwargs():
|
||||
return dict(ent_coef=0.01)
|
||||
|
||||
|
||||
def dqn_model_kwargs():
|
||||
return dict(buffer_size=50000,
|
||||
learning_starts=64,
|
||||
batch_size=64,
|
||||
target_update_interval=5000,
|
||||
exploration_fraction=0.25,
|
||||
exploration_final_eps=0.025
|
||||
)
|
||||
|
||||
|
||||
def encapsule_env_factory(env_fctry, env_kwrgs):
|
||||
|
||||
def _init():
|
||||
with env_fctry(**env_kwrgs) as init_env:
|
||||
return init_env
|
||||
|
||||
return _init
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
render = False
|
||||
# Define Global Env Parameters
|
||||
# Define properties object parameters
|
||||
factory_kwargs = dict(
|
||||
max_steps=400, parse_doors=True,
|
||||
level_name='rooms',
|
||||
doors_have_area=True, verbose=False,
|
||||
mv_prop=MovementProperties(allow_diagonal_movement=True,
|
||||
allow_square_movement=True,
|
||||
allow_no_op=False),
|
||||
obs_prop=ObservationProperties(
|
||||
frames_to_stack=3,
|
||||
cast_shadows=True,
|
||||
omit_agent_self=True,
|
||||
render_agents=AgentRenderOptions.LEVEL,
|
||||
additional_agent_placeholder=None,
|
||||
)
|
||||
)
|
||||
|
||||
# Bundle both environments with global kwargs and parameters
|
||||
# Todo: find a better solution, like outo module loading
|
||||
env_map = {'DirtFactory': DirtFactory,
|
||||
'ItemFactory': ItemFactory,
|
||||
'DestFactory': DestFactory,
|
||||
'DestBatteryFactory': DestBatteryFactory
|
||||
}
|
||||
env_names = list(env_map.keys())
|
||||
|
||||
# Put all your multi-seed agends in a single folder, we do not need specific names etc.
|
||||
available_models = dict()
|
||||
available_envs = dict()
|
||||
available_runs_kwargs = dict()
|
||||
available_runs_agents = dict()
|
||||
max_seed = 0
|
||||
# Define this folder
|
||||
combinations_path = Path('combinations')
|
||||
# Those are all differently trained combinations of mdoels, environment and parameters
|
||||
for combination in (x for x in combinations_path.iterdir() if x.is_dir()):
|
||||
# These are all the models for this specific combination
|
||||
for model_run in (x for x in combination.iterdir() if x.is_dir()):
|
||||
model_name, env_name = model_run.name.split('_')[:2]
|
||||
if model_name not in available_models:
|
||||
available_models[model_name] = h.MODEL_MAP[model_name]
|
||||
if env_name not in available_envs:
|
||||
available_envs[env_name] = env_map[env_name]
|
||||
# Those are all available seeds
|
||||
for seed_run in (x for x in model_run.iterdir() if x.is_dir()):
|
||||
max_seed = max(int(seed_run.name.split('_')[0]), max_seed)
|
||||
# Read the environment configuration from ROM
|
||||
with next(seed_run.glob('env_params.json')).open('r') as f:
|
||||
env_kwargs = simplejson.load(f)
|
||||
available_runs_kwargs[seed_run.name] = env_kwargs
|
||||
# Read the trained model_path from ROM
|
||||
model_path = next(seed_run.glob('model.zip'))
|
||||
available_runs_agents[seed_run.name] = model_path
|
||||
|
||||
# We start by combining all SAME MODEL CLASSES per available Seed, across ALL available ENVIRONMENTS.
|
||||
for model_name, model_cls in available_models.items():
|
||||
for seed in range(max_seed):
|
||||
combined_env_kwargs = dict()
|
||||
model_paths = list()
|
||||
comparable_runs = {key: val for key, val in available_runs_kwargs.items() if (
|
||||
key.startswith(str(seed)) and model_name in key and key != 'key')
|
||||
}
|
||||
for name, run_kwargs in comparable_runs.items():
|
||||
# Select trained agent as a candidate:
|
||||
model_paths.append(available_runs_agents[name])
|
||||
# Sort Env Kwars:
|
||||
for key, val in run_kwargs.items():
|
||||
if key not in combined_env_kwargs:
|
||||
combined_env_kwargs.update(dict(key=val))
|
||||
else:
|
||||
assert combined_env_kwargs[key] == val, "Check the combinations you try to make!"
|
||||
|
||||
# Update and combine all kwargs to account for multiple agent etc.
|
||||
# We cannot capture all configuration cases!
|
||||
for key, val in factory_kwargs.items():
|
||||
if key not in combined_env_kwargs:
|
||||
combined_env_kwargs[key] = val
|
||||
else:
|
||||
assert combined_env_kwargs[key] == val
|
||||
del combined_env_kwargs['key']
|
||||
combined_env_kwargs.update(n_agents=len(comparable_runs))
|
||||
with type("CombinedEnv", tuple(available_envs.values()), {})(**combined_env_kwargs) as combEnv:
|
||||
# EnvMonitor Init
|
||||
comb = f'comb_{model_name}_{seed}'
|
||||
comb_monitor_path = combinations_path / comb / f'{comb}_monitor.pick'
|
||||
comb_recorder_path = combinations_path / comb / f'{comb}_recorder.json'
|
||||
comb_monitor_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
monitoredCombEnv = EnvMonitor(combEnv, filepath=comb_monitor_path)
|
||||
monitoredCombEnv = EnvRecorder(monitoredCombEnv, filepath=comb_recorder_path, freq=1)
|
||||
|
||||
# Evaluation starts here #####################################################
|
||||
# Load all models
|
||||
loaded_models = [available_models[model_name].load(model_path) for model_path in model_paths]
|
||||
obs_translators = ObservationTranslator(
|
||||
monitoredCombEnv.named_observation_space,
|
||||
*[agent.named_observation_space for agent in loaded_models],
|
||||
placeholder_fill_value='n')
|
||||
act_translators = ActionTranslator(
|
||||
monitoredCombEnv.named_action_space,
|
||||
*(agent.named_action_space for agent in loaded_models)
|
||||
)
|
||||
|
||||
for episode in range(1):
|
||||
obs = monitoredCombEnv.reset()
|
||||
if render: monitoredCombEnv.render()
|
||||
rew, done_bool = 0, False
|
||||
while not done_bool:
|
||||
actions = []
|
||||
for i, model in enumerate(loaded_models):
|
||||
pred = model.predict(obs_translators.translate_observation(i, obs[i]))[0]
|
||||
actions.append(act_translators.translate_action(i, pred))
|
||||
|
||||
obs, step_r, done_bool, info_obj = monitoredCombEnv.step(actions)
|
||||
|
||||
rew += step_r
|
||||
if render: monitoredCombEnv.render()
|
||||
if done_bool:
|
||||
break
|
||||
print(f'Factory run {episode} done, reward is:\n {rew}')
|
||||
# Eval monitor outputs are automatically stored by the monitor object
|
||||
# TODO: Plotting
|
||||
monitoredCombEnv.save_records()
|
||||
monitoredCombEnv.save_run()
|
||||
pass
|
@ -1,203 +0,0 @@
|
||||
import sys
|
||||
import time
|
||||
|
||||
from pathlib import Path
|
||||
import simplejson
|
||||
|
||||
import stable_baselines3 as sb3
|
||||
|
||||
# This is needed, when you put this file in a subfolder.
|
||||
try:
|
||||
# noinspection PyUnboundLocalVariable
|
||||
if __package__ is None:
|
||||
DIR = Path(__file__).resolve().parent
|
||||
sys.path.insert(0, str(DIR.parent))
|
||||
__package__ = DIR.name
|
||||
else:
|
||||
DIR = None
|
||||
except NameError:
|
||||
DIR = None
|
||||
pass
|
||||
|
||||
from environments import helpers as h
|
||||
from environments.factory.additional.dest.dest_util import DestModeOptions, DestProperties
|
||||
from environments.factory.additional.btry.btry_util import BatteryProperties
|
||||
from environments.logging.envmonitor import EnvMonitor
|
||||
from environments.logging.recorder import EnvRecorder
|
||||
from environments.factory.additional.combined_factories import DestBatteryFactory
|
||||
from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
|
||||
|
||||
from plotting.compare_runs import compare_seed_runs
|
||||
|
||||
"""
|
||||
Welcome to this quick start file. Here we will see how to:
|
||||
0. Setup I/O Paths
|
||||
1. Setup parameters for the environments (dirt-factory).
|
||||
2. Setup parameters for the agent training (SB3: PPO) and save metrics.
|
||||
Run the training.
|
||||
3. Save environment and agent for later analysis.
|
||||
4. Load the agent from drive
|
||||
5. Rendering the environment with a run of the trained agent.
|
||||
6. Plot metrics
|
||||
"""
|
||||
|
||||
if __name__ == '__main__':
|
||||
#########################################################
|
||||
# 0. Setup I/O Paths
|
||||
# Define some general parameters
|
||||
train_steps = 1e6
|
||||
n_seeds = 3
|
||||
model_class = sb3.PPO
|
||||
env_class = DestBatteryFactory
|
||||
|
||||
env_params_json = 'env_params.json'
|
||||
|
||||
# Define a global studi save path
|
||||
start_time = int(time.time())
|
||||
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
|
||||
# Create an _identifier, which is unique for every combination and easy to read in filesystem
|
||||
identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}'
|
||||
exp_path = study_root_path / identifier
|
||||
|
||||
#########################################################
|
||||
# 1. Setup parameters for the environments (dirt-factory).
|
||||
|
||||
|
||||
# Define property object parameters.
|
||||
# 'ObservationProperties' are for specifying how the agent sees the environment.
|
||||
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all
|
||||
omit_agent_self=True, # This is default
|
||||
additional_agent_placeholder=None, # We will not take care of future agent
|
||||
frames_to_stack=3, # To give the agent a notion of time
|
||||
pomdp_r=2 # the agent view-radius
|
||||
)
|
||||
# 'MovementProperties' are for specifying how the agent is allowed to move in the environment.
|
||||
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
|
||||
allow_square_movement=True, # Manhattan (edges)
|
||||
allow_no_op=False) # Pause movement (do nothing)
|
||||
|
||||
# 'DirtProperties' control if and how dirt is spawned
|
||||
# TODO: Comments
|
||||
dest_props = DestProperties(
|
||||
n_dests = 2, # How many destinations are there
|
||||
dwell_time = 0, # How long does the agent need to "wait" on a destination
|
||||
spawn_frequency = 0,
|
||||
spawn_in_other_zone = True, #
|
||||
spawn_mode = DestModeOptions.DONE,
|
||||
)
|
||||
btry_props = BatteryProperties(
|
||||
initial_charge = 0.9, #
|
||||
charge_rate = 0.4, #
|
||||
charge_locations = 3, #
|
||||
per_action_costs = 0.01,
|
||||
done_when_discharged = True,
|
||||
multi_charge = False,
|
||||
)
|
||||
|
||||
# These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
|
||||
# TODO: Comments
|
||||
factory_kwargs = dict(n_agents=1,
|
||||
max_steps=400,
|
||||
parse_doors=True,
|
||||
level_name='rooms',
|
||||
doors_have_area=True, #
|
||||
verbose=False,
|
||||
mv_prop=move_props, # See Above
|
||||
obs_prop=obs_props, # See Above
|
||||
done_at_collision=True,
|
||||
dest_prop=dest_props,
|
||||
btry_prop=btry_props
|
||||
)
|
||||
|
||||
#########################################################
|
||||
# 2. Setup parameters for the agent training (SB3: PPO) and save metrics.
|
||||
agent_kwargs = dict()
|
||||
|
||||
|
||||
#########################################################
|
||||
# Run the Training
|
||||
for seed in range(n_seeds):
|
||||
# Make a copy if you want to alter things in the training loop; like the seed.
|
||||
env_kwargs = factory_kwargs.copy()
|
||||
env_kwargs.update(env_seed=seed)
|
||||
|
||||
# Output folder
|
||||
seed_path = exp_path / f'{str(seed)}_{identifier}'
|
||||
seed_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Parameter Storage
|
||||
param_path = seed_path / env_params_json
|
||||
# Observation (measures) Storage
|
||||
monitor_path = seed_path / 'monitor.pick'
|
||||
recorder_path = seed_path / 'recorder.json'
|
||||
# Model save Path for the trained model
|
||||
model_save_path = seed_path / f'model.zip'
|
||||
|
||||
# Env Init & Model kwargs definition
|
||||
with env_class(**env_kwargs) as env_factory:
|
||||
|
||||
# EnvMonitor Init
|
||||
env_monitor_callback = EnvMonitor(env_factory)
|
||||
|
||||
# EnvRecorder Init
|
||||
env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10))
|
||||
|
||||
# Model Init
|
||||
model = model_class("MlpPolicy", env_factory, verbose=1, seed=seed, device='cpu')
|
||||
|
||||
# Model train
|
||||
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
|
||||
|
||||
#########################################################
|
||||
# 3. Save environment and agent for later analysis.
|
||||
# Save the trained Model, the monitor (environment measures) and the environment parameters
|
||||
model.named_observation_space = env_factory.named_observation_space
|
||||
model.named_action_space = env_factory.named_action_space
|
||||
model.save(model_save_path)
|
||||
env_factory.save_params(param_path)
|
||||
env_monitor_callback.save_run(monitor_path)
|
||||
env_recorder_callback.save_records(recorder_path, save_occupation_map=False)
|
||||
|
||||
# Compare performance runs, for each seed within a model
|
||||
try:
|
||||
compare_seed_runs(exp_path, use_tex=False)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Train ends here ############################################################
|
||||
|
||||
# Evaluation starts here #####################################################
|
||||
# First Iterate over every model and monitor "as trained"
|
||||
print('Start Measurement Tracking')
|
||||
# For trained policy in study_root_path / _identifier
|
||||
for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]:
|
||||
|
||||
# retrieve model class
|
||||
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
|
||||
# Load the agent agent
|
||||
model = model_cls.load(policy_path / 'model.zip', device='cpu')
|
||||
# Load old environment kwargs
|
||||
with next(policy_path.glob(env_params_json)).open('r') as f:
|
||||
env_kwargs = simplejson.load(f)
|
||||
# Make the environment stop ar collisions
|
||||
# (you only want to have a single collision per episode hence the statistics)
|
||||
env_kwargs.update(done_at_collision=True)
|
||||
|
||||
# Init Env
|
||||
with env_class(**env_kwargs) as env_factory:
|
||||
monitored_env_factory = EnvMonitor(env_factory)
|
||||
|
||||
# Evaluation Loop for i in range(n Episodes)
|
||||
for episode in range(100):
|
||||
# noinspection PyRedeclaration
|
||||
env_state = monitored_env_factory.reset()
|
||||
rew, done_bool = 0, False
|
||||
while not done_bool:
|
||||
action = model.predict(env_state, deterministic=True)[0]
|
||||
env_state, step_r, done_bool, info_obj = monitored_env_factory.step(action)
|
||||
rew += step_r
|
||||
if done_bool:
|
||||
break
|
||||
print(f'Factory run {episode} done, reward is:\n {rew}')
|
||||
monitored_env_factory.save_run(filepath=policy_path / 'eval_run_monitor.pick')
|
||||
print('Measurements Done')
|
@ -1,193 +0,0 @@
|
||||
import sys
|
||||
import time
|
||||
|
||||
from pathlib import Path
|
||||
import simplejson
|
||||
|
||||
import stable_baselines3 as sb3
|
||||
|
||||
# This is needed, when you put this file in a subfolder.
|
||||
try:
|
||||
# noinspection PyUnboundLocalVariable
|
||||
if __package__ is None:
|
||||
DIR = Path(__file__).resolve().parent
|
||||
sys.path.insert(0, str(DIR.parent))
|
||||
__package__ = DIR.name
|
||||
else:
|
||||
DIR = None
|
||||
except NameError:
|
||||
DIR = None
|
||||
pass
|
||||
|
||||
from environments import helpers as h
|
||||
from environments.factory.additional.dest.dest_util import DestModeOptions, DestProperties
|
||||
from environments.logging.envmonitor import EnvMonitor
|
||||
from environments.logging.recorder import EnvRecorder
|
||||
from environments.factory.additional.dest.factory_dest import DestFactory
|
||||
from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
|
||||
|
||||
from plotting.compare_runs import compare_seed_runs
|
||||
|
||||
"""
|
||||
Welcome to this quick start file. Here we will see how to:
|
||||
0. Setup I/O Paths
|
||||
1. Setup parameters for the environments (dest-factory).
|
||||
2. Setup parameters for the agent training (SB3: PPO) and save metrics.
|
||||
Run the training.
|
||||
3. Save environment and agent for later analysis.
|
||||
4. Load the agent from drive
|
||||
5. Rendering the environment with a run of the trained agent.
|
||||
6. Plot metrics
|
||||
"""
|
||||
|
||||
if __name__ == '__main__':
|
||||
#########################################################
|
||||
# 0. Setup I/O Paths
|
||||
# Define some general parameters
|
||||
train_steps = 1e6
|
||||
n_seeds = 3
|
||||
model_class = sb3.PPO
|
||||
env_class = DestFactory
|
||||
|
||||
env_params_json = 'env_params.json'
|
||||
|
||||
# Define a global studi save path
|
||||
start_time = int(time.time())
|
||||
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
|
||||
# Create an _identifier, which is unique for every combination and easy to read in filesystem
|
||||
identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}'
|
||||
exp_path = study_root_path / identifier
|
||||
|
||||
#########################################################
|
||||
# 1. Setup parameters for the environments (dest-factory).
|
||||
|
||||
|
||||
# Define property object parameters.
|
||||
# 'ObservationProperties' are for specifying how the agent sees the environment.
|
||||
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all
|
||||
omit_agent_self=True, # This is default
|
||||
additional_agent_placeholder=None, # We will not take care of future agent
|
||||
frames_to_stack=3, # To give the agent a notion of time
|
||||
pomdp_r=2 # the agent view-radius
|
||||
)
|
||||
# 'MovementProperties' are for specifying how the agent is allowed to move in the environment.
|
||||
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
|
||||
allow_square_movement=True, # Manhattan (edges)
|
||||
allow_no_op=False) # Pause movement (do nothing)
|
||||
|
||||
# 'DestProperties' control if and how dest is spawned
|
||||
# TODO: Comments
|
||||
dest_props = DestProperties(
|
||||
n_dests = 2, # How many destinations are there
|
||||
dwell_time = 0, # How long does the agent need to "wait" on a destination
|
||||
spawn_frequency = 0,
|
||||
spawn_in_other_zone = True, #
|
||||
spawn_mode = DestModeOptions.DONE,
|
||||
)
|
||||
|
||||
# These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
|
||||
# TODO: Comments
|
||||
factory_kwargs = dict(n_agents=1,
|
||||
max_steps=400,
|
||||
parse_doors=True,
|
||||
level_name='rooms',
|
||||
doors_have_area=True, #
|
||||
verbose=False,
|
||||
mv_prop=move_props, # See Above
|
||||
obs_prop=obs_props, # See Above
|
||||
done_at_collision=True,
|
||||
dest_prop=dest_props
|
||||
)
|
||||
|
||||
#########################################################
|
||||
# 2. Setup parameters for the agent training (SB3: PPO) and save metrics.
|
||||
agent_kwargs = dict()
|
||||
|
||||
|
||||
#########################################################
|
||||
# Run the Training
|
||||
for seed in range(n_seeds):
|
||||
# Make a copy if you want to alter things in the training loop; like the seed.
|
||||
env_kwargs = factory_kwargs.copy()
|
||||
env_kwargs.update(env_seed=seed)
|
||||
|
||||
# Output folder
|
||||
seed_path = exp_path / f'{str(seed)}_{identifier}'
|
||||
seed_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Parameter Storage
|
||||
param_path = seed_path / env_params_json
|
||||
# Observation (measures) Storage
|
||||
monitor_path = seed_path / 'monitor.pick'
|
||||
recorder_path = seed_path / 'recorder.json'
|
||||
# Model save Path for the trained model
|
||||
model_save_path = seed_path / f'model.zip'
|
||||
|
||||
# Env Init & Model kwargs definition
|
||||
with env_class(**env_kwargs) as env_factory:
|
||||
|
||||
# EnvMonitor Init
|
||||
env_monitor_callback = EnvMonitor(env_factory)
|
||||
|
||||
# EnvRecorder Init
|
||||
env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10))
|
||||
|
||||
# Model Init
|
||||
model = model_class("MlpPolicy", env_factory,verbose=1, seed=seed, device='cpu')
|
||||
|
||||
# Model train
|
||||
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
|
||||
|
||||
#########################################################
|
||||
# 3. Save environment and agent for later analysis.
|
||||
# Save the trained Model, the monitor (environment measures) and the environment parameters
|
||||
model.named_observation_space = env_factory.named_observation_space
|
||||
model.named_action_space = env_factory.named_action_space
|
||||
model.save(model_save_path)
|
||||
env_factory.save_params(param_path)
|
||||
env_monitor_callback.save_run(monitor_path)
|
||||
env_recorder_callback.save_records(recorder_path, save_occupation_map=False)
|
||||
|
||||
# Compare performance runs, for each seed within a model
|
||||
try:
|
||||
compare_seed_runs(exp_path, use_tex=False)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Train ends here ############################################################
|
||||
|
||||
# Evaluation starts here #####################################################
|
||||
# First Iterate over every model and monitor "as trained"
|
||||
print('Start Measurement Tracking')
|
||||
# For trained policy in study_root_path / _identifier
|
||||
for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]:
|
||||
|
||||
# retrieve model class
|
||||
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
|
||||
# Load the agent agent
|
||||
model = model_cls.load(policy_path / 'model.zip', device='cpu')
|
||||
# Load old environment kwargs
|
||||
with next(policy_path.glob(env_params_json)).open('r') as f:
|
||||
env_kwargs = simplejson.load(f)
|
||||
# Make the environment stop ar collisions
|
||||
# (you only want to have a single collision per episode hence the statistics)
|
||||
env_kwargs.update(done_at_collision=True)
|
||||
|
||||
# Init Env
|
||||
with env_class(**env_kwargs) as env_factory:
|
||||
monitored_env_factory = EnvMonitor(env_factory)
|
||||
|
||||
# Evaluation Loop for i in range(n Episodes)
|
||||
for episode in range(100):
|
||||
# noinspection PyRedeclaration
|
||||
env_state = monitored_env_factory.reset()
|
||||
rew, done_bool = 0, False
|
||||
while not done_bool:
|
||||
action = model.predict(env_state, deterministic=True)[0]
|
||||
env_state, step_r, done_bool, info_obj = monitored_env_factory.step(action)
|
||||
rew += step_r
|
||||
if done_bool:
|
||||
break
|
||||
print(f'Factory run {episode} done, reward is:\n {rew}')
|
||||
monitored_env_factory.save_run(filepath=policy_path / 'eval_run_monitor.pick')
|
||||
print('Measurements Done')
|
@ -1,195 +0,0 @@
|
||||
import sys
|
||||
import time
|
||||
|
||||
from pathlib import Path
|
||||
import simplejson
|
||||
|
||||
import stable_baselines3 as sb3
|
||||
|
||||
# This is needed, when you put this file in a subfolder.
|
||||
try:
|
||||
# noinspection PyUnboundLocalVariable
|
||||
if __package__ is None:
|
||||
DIR = Path(__file__).resolve().parent
|
||||
sys.path.insert(0, str(DIR.parent))
|
||||
__package__ = DIR.name
|
||||
else:
|
||||
DIR = None
|
||||
except NameError:
|
||||
DIR = None
|
||||
pass
|
||||
|
||||
from environments import helpers as h
|
||||
from environments.logging.envmonitor import EnvMonitor
|
||||
from environments.logging.recorder import EnvRecorder
|
||||
from environments.factory.additional.dirt.dirt_util import DirtProperties
|
||||
from environments.factory.additional.dirt.factory_dirt import DirtFactory
|
||||
from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
|
||||
|
||||
from plotting.compare_runs import compare_seed_runs
|
||||
|
||||
"""
|
||||
Welcome to this quick start file. Here we will see how to:
|
||||
0. Setup I/O Paths
|
||||
1. Setup parameters for the environments (dirt-factory).
|
||||
2. Setup parameters for the agent training (SB3: PPO) and save metrics.
|
||||
Run the training.
|
||||
3. Save environment and agent for later analysis.
|
||||
4. Load the agent from drive
|
||||
5. Rendering the environment with a run of the trained agent.
|
||||
6. Plot metrics
|
||||
"""
|
||||
|
||||
if __name__ == '__main__':
|
||||
#########################################################
|
||||
# 0. Setup I/O Paths
|
||||
# Define some general parameters
|
||||
train_steps = 1e6
|
||||
n_seeds = 3
|
||||
model_class = sb3.PPO
|
||||
env_class = DirtFactory
|
||||
|
||||
env_params_json = 'env_params.json'
|
||||
|
||||
# Define a global studi save path
|
||||
start_time = int(time.time())
|
||||
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
|
||||
# Create an _identifier, which is unique for every combination and easy to read in filesystem
|
||||
identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}'
|
||||
exp_path = study_root_path / identifier
|
||||
|
||||
#########################################################
|
||||
# 1. Setup parameters for the environments (dirt-factory).
|
||||
|
||||
|
||||
# Define property object parameters.
|
||||
# 'ObservationProperties' are for specifying how the agent sees the environment.
|
||||
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all
|
||||
omit_agent_self=True, # This is default
|
||||
additional_agent_placeholder=None, # We will not take care of future agent
|
||||
frames_to_stack=3, # To give the agent a notion of time
|
||||
pomdp_r=2 # the agent' view-radius
|
||||
)
|
||||
# 'MovementProperties' are for specifying how the agent is allowed to move in the environment.
|
||||
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
|
||||
allow_square_movement=True, # Manhattan (edges)
|
||||
allow_no_op=False) # Pause movement (do nothing)
|
||||
|
||||
# 'DirtProperties' control if and how dirt is spawned
|
||||
# TODO: Comments
|
||||
dirt_props = DirtProperties(initial_dirt_ratio=0.35,
|
||||
initial_dirt_spawn_r_var=0.1,
|
||||
clean_amount=0.34,
|
||||
max_spawn_amount=0.1,
|
||||
max_global_amount=20,
|
||||
max_local_amount=1,
|
||||
spawn_frequency=0,
|
||||
max_spawn_ratio=0.05,
|
||||
dirt_smear_amount=0.0)
|
||||
|
||||
# These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
|
||||
# TODO: Comments
|
||||
factory_kwargs = dict(n_agents=1,
|
||||
max_steps=400,
|
||||
parse_doors=True,
|
||||
level_name='rooms',
|
||||
doors_have_area=True, #
|
||||
verbose=False,
|
||||
mv_prop=move_props, # See Above
|
||||
obs_prop=obs_props, # See Above
|
||||
done_at_collision=True,
|
||||
dirt_prop=dirt_props
|
||||
)
|
||||
|
||||
#########################################################
|
||||
# 2. Setup parameters for the agent training (SB3: PPO) and save metrics.
|
||||
agent_kwargs = dict()
|
||||
|
||||
|
||||
#########################################################
|
||||
# Run the Training
|
||||
for seed in range(n_seeds):
|
||||
# Make a copy if you want to alter things in the training loop; like the seed.
|
||||
env_kwargs = factory_kwargs.copy()
|
||||
env_kwargs.update(env_seed=seed)
|
||||
|
||||
# Output folder
|
||||
seed_path = exp_path / f'{str(seed)}_{identifier}'
|
||||
seed_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Parameter Storage
|
||||
param_path = seed_path / env_params_json
|
||||
# Observation (measures) Storage
|
||||
monitor_path = seed_path / 'monitor.pick'
|
||||
recorder_path = seed_path / 'recorder.json'
|
||||
# Model save Path for the trained model
|
||||
model_save_path = seed_path / f'model.zip'
|
||||
|
||||
# Env Init & Model kwargs definition
|
||||
with env_class(**env_kwargs) as env_factory:
|
||||
|
||||
# EnvMonitor Init
|
||||
env_monitor_callback = EnvMonitor(env_factory)
|
||||
|
||||
# EnvRecorder Init
|
||||
env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10))
|
||||
|
||||
# Model Init
|
||||
model = model_class("MlpPolicy", env_factory, verbose=1, seed=seed, device='cpu')
|
||||
|
||||
# Model train
|
||||
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
|
||||
|
||||
#########################################################
|
||||
# 3. Save environment and agent for later analysis.
|
||||
# Save the trained Model, the monitor (environment measures) and the environment parameters
|
||||
model.named_observation_space = env_factory.named_observation_space
|
||||
model.named_action_space = env_factory.named_action_space
|
||||
model.save(model_save_path)
|
||||
env_factory.save_params(param_path)
|
||||
env_monitor_callback.save_run(monitor_path)
|
||||
env_recorder_callback.save_records(recorder_path, save_occupation_map=False)
|
||||
|
||||
# Compare performance runs, for each seed within a model
|
||||
try:
|
||||
compare_seed_runs(exp_path, use_tex=False)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Train ends here ############################################################
|
||||
|
||||
# Evaluation starts here #####################################################
|
||||
# First Iterate over every model and monitor "as trained"
|
||||
print('Start Measurement Tracking')
|
||||
# For trained policy in study_root_path / _identifier
|
||||
for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]:
|
||||
|
||||
# retrieve model class
|
||||
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
|
||||
# Load the agent
|
||||
model = model_cls.load(policy_path / 'model.zip', device='cpu')
|
||||
# Load old environment kwargs
|
||||
with next(policy_path.glob(env_params_json)).open('r') as f:
|
||||
env_kwargs = simplejson.load(f)
|
||||
# Make the environment stop ar collisions
|
||||
# (you only want to have a single collision per episode hence the statistics)
|
||||
env_kwargs.update(done_at_collision=True)
|
||||
|
||||
# Init Env
|
||||
with env_class(**env_kwargs) as env_factory:
|
||||
monitored_env_factory = EnvMonitor(env_factory)
|
||||
|
||||
# Evaluation Loop for i in range(n Episodes)
|
||||
for episode in range(100):
|
||||
# noinspection PyRedeclaration
|
||||
env_state = monitored_env_factory.reset()
|
||||
rew, done_bool = 0, False
|
||||
while not done_bool:
|
||||
action = model.predict(env_state, deterministic=True)[0]
|
||||
env_state, step_r, done_bool, info_obj = monitored_env_factory.step(action)
|
||||
rew += step_r
|
||||
if done_bool:
|
||||
break
|
||||
print(f'Factory run {episode} done, reward is:\n {rew}')
|
||||
monitored_env_factory.save_run(filepath=policy_path / 'eval_run_monitor.pick')
|
||||
print('Measurements Done')
|
@ -1,191 +0,0 @@
|
||||
import sys
|
||||
import time
|
||||
|
||||
from pathlib import Path
|
||||
import simplejson
|
||||
|
||||
import stable_baselines3 as sb3
|
||||
|
||||
# This is needed, when you put this file in a subfolder.
|
||||
try:
|
||||
# noinspection PyUnboundLocalVariable
|
||||
if __package__ is None:
|
||||
DIR = Path(__file__).resolve().parent
|
||||
sys.path.insert(0, str(DIR.parent))
|
||||
__package__ = DIR.name
|
||||
else:
|
||||
DIR = None
|
||||
except NameError:
|
||||
DIR = None
|
||||
pass
|
||||
|
||||
from environments import helpers as h
|
||||
from environments.factory.additional.item.factory_item import ItemFactory
|
||||
from environments.factory.additional.item.item_util import ItemProperties
|
||||
from environments.logging.envmonitor import EnvMonitor
|
||||
from environments.logging.recorder import EnvRecorder
|
||||
from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
|
||||
|
||||
from plotting.compare_runs import compare_seed_runs
|
||||
|
||||
"""
|
||||
Welcome to this quick start file. Here we will see how to:
|
||||
0. Setup I/O Paths
|
||||
1. Setup parameters for the environments (item-factory).
|
||||
2. Setup parameters for the agent training (SB3: PPO) and save metrics.
|
||||
Run the training.
|
||||
3. Save environment and agent for later analysis.
|
||||
4. Load the agent from drive
|
||||
5. Rendering the environment with a run of the trained agent.
|
||||
6. Plot metrics
|
||||
"""
|
||||
|
||||
if __name__ == '__main__':
|
||||
#########################################################
|
||||
# 0. Setup I/O Paths
|
||||
# Define some general parameters
|
||||
train_steps = 1e6
|
||||
n_seeds = 3
|
||||
model_class = sb3.PPO
|
||||
env_class = ItemFactory
|
||||
|
||||
env_params_json = 'env_params.json'
|
||||
|
||||
# Define a global studi save path
|
||||
start_time = int(time.time())
|
||||
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
|
||||
# Create an _identifier, which is unique for every combination and easy to read in filesystem
|
||||
identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}'
|
||||
exp_path = study_root_path / identifier
|
||||
|
||||
#########################################################
|
||||
# 1. Setup parameters for the environments (item-factory).
|
||||
#
|
||||
# Define property object parameters.
|
||||
# 'ObservationProperties' are for specifying how the agent sees the environment.
|
||||
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all
|
||||
omit_agent_self=True, # This is default
|
||||
additional_agent_placeholder=None, # We will not take care of future agent
|
||||
frames_to_stack=3, # To give the agent a notion of time
|
||||
pomdp_r=2 # the agent view-radius
|
||||
)
|
||||
# 'MovementProperties' are for specifying how the agent is allowed to move in the environment.
|
||||
move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices)
|
||||
allow_square_movement=True, # Manhattan (edges)
|
||||
allow_no_op=False) # Pause movement (do nothing)
|
||||
|
||||
# 'ItemProperties' control if and how item is spawned
|
||||
# TODO: Comments
|
||||
item_props = ItemProperties(
|
||||
n_items = 7, # How many items are there at the same time
|
||||
spawn_frequency = 50, # Spawn Frequency in Steps
|
||||
n_drop_off_locations = 10, # How many DropOff locations are there at the same time
|
||||
max_dropoff_storage_size = 0, # How many items are needed until the dropoff is full
|
||||
max_agent_inventory_capacity = 5, # How many items are needed until the agent inventory is full)
|
||||
)
|
||||
|
||||
# These are the EnvKwargs for initializing the environment class, holding all former parameter-classes
|
||||
# TODO: Comments
|
||||
factory_kwargs = dict(n_agents=1,
|
||||
max_steps=400,
|
||||
parse_doors=True,
|
||||
level_name='rooms',
|
||||
doors_have_area=True, #
|
||||
verbose=False,
|
||||
mv_prop=move_props, # See Above
|
||||
obs_prop=obs_props, # See Above
|
||||
done_at_collision=True,
|
||||
item_prop=item_props
|
||||
)
|
||||
|
||||
#########################################################
|
||||
# 2. Setup parameters for the agent training (SB3: PPO) and save metrics.
|
||||
agent_kwargs = dict()
|
||||
|
||||
#########################################################
|
||||
# Run the Training
|
||||
for seed in range(n_seeds):
|
||||
# Make a copy if you want to alter things in the training loop; like the seed.
|
||||
env_kwargs = factory_kwargs.copy()
|
||||
env_kwargs.update(env_seed=seed)
|
||||
|
||||
# Output folder
|
||||
seed_path = exp_path / f'{str(seed)}_{identifier}'
|
||||
seed_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Parameter Storage
|
||||
param_path = seed_path / env_params_json
|
||||
# Observation (measures) Storage
|
||||
monitor_path = seed_path / 'monitor.pick'
|
||||
recorder_path = seed_path / 'recorder.json'
|
||||
# Model save Path for the trained model
|
||||
model_save_path = seed_path / f'model.zip'
|
||||
|
||||
# Env Init & Model kwargs definition
|
||||
with ItemFactory(**env_kwargs) as env_factory:
|
||||
|
||||
# EnvMonitor Init
|
||||
env_monitor_callback = EnvMonitor(env_factory)
|
||||
|
||||
# EnvRecorder Init
|
||||
env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10))
|
||||
|
||||
# Model Init
|
||||
model = model_class("MlpPolicy", env_factory,verbose=1, seed=seed, device='cpu')
|
||||
|
||||
# Model train
|
||||
model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback])
|
||||
|
||||
#########################################################
|
||||
# 3. Save environment and agent for later analysis.
|
||||
# Save the trained Model, the monitor (environment measures) and the environment parameters
|
||||
model.named_observation_space = env_factory.named_observation_space
|
||||
model.named_action_space = env_factory.named_action_space
|
||||
model.save(model_save_path)
|
||||
env_factory.save_params(param_path)
|
||||
env_monitor_callback.save_run(monitor_path)
|
||||
env_recorder_callback.save_records(recorder_path, save_occupation_map=False)
|
||||
|
||||
# Compare performance runs, for each seed within a model
|
||||
try:
|
||||
compare_seed_runs(exp_path, use_tex=False)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
# Train ends here ############################################################
|
||||
|
||||
# Evaluation starts here #####################################################
|
||||
# First Iterate over every model and monitor "as trained"
|
||||
print('Start Measurement Tracking')
|
||||
# For trained policy in study_root_path / _identifier
|
||||
for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]:
|
||||
|
||||
# retrieve model class
|
||||
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name)
|
||||
# Load the agent agent
|
||||
model = model_cls.load(policy_path / 'model.zip', device='cpu')
|
||||
# Load old environment kwargs
|
||||
with next(policy_path.glob(env_params_json)).open('r') as f:
|
||||
env_kwargs = simplejson.load(f)
|
||||
# Make the environment stop ar collisions
|
||||
# (you only want to have a single collision per episode hence the statistics)
|
||||
env_kwargs.update(done_at_collision=True)
|
||||
|
||||
# Init Env
|
||||
with ItemFactory(**env_kwargs) as env_factory:
|
||||
monitored_env_factory = EnvMonitor(env_factory)
|
||||
|
||||
# Evaluation Loop for i in range(n Episodes)
|
||||
for episode in range(100):
|
||||
# noinspection PyRedeclaration
|
||||
env_state = monitored_env_factory.reset()
|
||||
rew, done_bool = 0, False
|
||||
while not done_bool:
|
||||
action = model.predict(env_state, deterministic=True)[0]
|
||||
env_state, step_r, done_bool, info_obj = monitored_env_factory.step(action)
|
||||
rew += step_r
|
||||
if done_bool:
|
||||
break
|
||||
print(f'Factory run {episode} done, reward is:\n {rew}')
|
||||
monitored_env_factory.save_run(filepath=policy_path / 'eval_run_monitor.pick')
|
||||
print('Measurements Done')
|
@ -1,25 +0,0 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = source
|
||||
BUILDDIR = build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
buildapi:
|
||||
sphinx-apidoc.exe -fEM -T -t _templates -o source/source ../marl_factory_grid "../**/marl", "../**/proto"
|
||||
@echo "Auto-generation of 'SOURCEAPI' documentation finished. " \
|
||||
"The generated files were placed in 'source/'"
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
@ -1,35 +0,0 @@
|
||||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=source
|
||||
set BUILDDIR=build
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.https://www.sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
@ -1,4 +0,0 @@
|
||||
myst_parser
|
||||
sphinx-pdj-theme
|
||||
sphinx-mdinclude
|
||||
sphinx-book-theme
|
@ -1,72 +0,0 @@
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# For the full list of built-in configuration values, see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||
|
||||
project = 'marl-factory-grid'
|
||||
copyright = '2023, Steffen Illium, Robert Mueller, Joel Friedrich'
|
||||
author = 'Steffen Illium, Robert Mueller, Joel Friedrich'
|
||||
release = '2.5.0'
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
|
||||
extensions = [#'myst_parser',
|
||||
'sphinx.ext.todo',
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinx.ext.intersphinx',
|
||||
# 'sphinx.ext.autosummary',
|
||||
'sphinx.ext.linkcode',
|
||||
'sphinx_mdinclude',
|
||||
]
|
||||
|
||||
templates_path = ['_templates']
|
||||
exclude_patterns = ['marl_factory_grid.utils.proto', 'marl_factory_grid.utils.proto.fiksProto_pb2*']
|
||||
|
||||
|
||||
autoclass_content = 'both'
|
||||
autodoc_class_signature = 'separated'
|
||||
autodoc_typehints = 'description'
|
||||
autodoc_inherit_docstrings = True
|
||||
autodoc_typehints_format = 'short'
|
||||
autodoc_default_options = {
|
||||
'members': True,
|
||||
# 'member-order': 'bysource',
|
||||
'special-members': '__init__',
|
||||
'undoc-members': True,
|
||||
# 'exclude-members': '__weakref__',
|
||||
'show-inheritance': True,
|
||||
}
|
||||
autosummary_generate = True
|
||||
add_module_names = False
|
||||
toc_object_entries = False
|
||||
modindex_common_prefix = ['marl_factory_grid.']
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here.
|
||||
from pathlib import Path
|
||||
import sys
|
||||
sys.path.insert(0, (Path(__file__).parents[2]).resolve().as_posix())
|
||||
sys.path.insert(0, (Path(__file__).parents[2] / 'marl_factory_grid').resolve().as_posix())
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
||||
html_theme = "sphinx_book_theme" # 'alabaster'
|
||||
# html_static_path = ['_static']
|
||||
|
||||
# In your configuration, you need to specify a linkcode_resolve function that returns an URL based on the object.
|
||||
# https://www.sphinx-doc.org/en/master/usage/extensions/linkcode.html
|
||||
|
||||
|
||||
def linkcode_resolve(domain, info):
|
||||
if domain in ['py', '__init__.py']:
|
||||
return None
|
||||
if not info['module']:
|
||||
return None
|
||||
filename = info['module'].replace('.', '/')
|
||||
return "https://github.com/illiumst/marl-factory-grid/%s.py" % filename
|
||||
|
||||
print(sys.executable)
|
@ -1,99 +0,0 @@
|
||||
Creating a New Scenario
|
||||
=======================
|
||||
|
||||
|
||||
Creating a new scenario in the `marl-factory-grid` environment allows you to customize the environment to fit your specific requirements. This guide provides step-by-step instructions on how to create a new scenario, including defining a configuration file, designing a level, and potentially adding new entities, rules, and assets. See the "modifications.rst" file for more information on how to modify existing entities, levels, rules, groups and assets.
|
||||
|
||||
Step 1: Define Configuration File
|
||||
-----------------
|
||||
|
||||
1. **Create a Configuration File:** Start by creating a new configuration file (`.yaml`) for your scenario. This file will contain settings such as the number of agents, environment dimensions, and other parameters. You can use existing configuration files as templates.
|
||||
|
||||
2. **Specify Custom Parameters:** Modify the configuration file to include any custom parameters specific to your scenario. For example, you can set the respawn rate of entities or define specific rewards.
|
||||
|
||||
Step 2: Design the Level
|
||||
-----------------
|
||||
|
||||
1. **Create a Level File:** Design the layout of your environment by creating a new level file (`.txt`). Use symbols such as `#` for walls, `-` for walkable floors, and introduce new symbols for custom entities.
|
||||
|
||||
2. **Define Entity Locations:** Specify the initial locations of entities, including agents and any new entities introduced in your scenario. These spawn locations are typically provided in the conf file.
|
||||
|
||||
Step 3: Introduce New Entities
|
||||
-----------------
|
||||
|
||||
1. **Create New Entity Modules:** If your scenario involves introducing new entities, create new entity modules in the `marl_factory_grid/environment/entity` directory. Define their behavior, properties, and any custom actions they can perform. Check out the template module.
|
||||
|
||||
2. **Update Configuration:** Update the configuration file to include settings related to your new entities, such as spawn rates, initial quantities, or any specific behaviors.
|
||||
|
||||
Step 4: Implement Custom Rules
|
||||
-----------------
|
||||
|
||||
1. **Create Rule Modules:** If your scenario requires custom rules, create new rule modules in the `marl_factory_grid/environment/rules` directory. Implement the necessary logic to govern the behavior of entities in your scenario and use the provided environment hooks.
|
||||
|
||||
2. **Update Configuration:** If your custom rules have configurable parameters, update the configuration file to include these settings and activate the rule by adding it to the conf file.
|
||||
|
||||
Step 5: Add Custom Assets (Optional)
|
||||
-----------------
|
||||
|
||||
1. **Include Custom Asset Files:** If your scenario introduces new assets (e.g., images for entities), include the necessary asset files in the appropriate directories, such as `marl_factory_grid/environment/assets`.
|
||||
|
||||
Step 6: Test and Experiment
|
||||
-----------------
|
||||
|
||||
1. **Run Your Scenario:** Use the provided scripts or write your own script to run the scenario with your customized configuration. Observe the behavior of agents and entities in the environment.
|
||||
|
||||
2. **Iterate and Experiment:** Adjust configuration parameters, level design, or introduce new elements based on your observations. Iterate through this process until your scenario meets your desired specifications.
|
||||
|
||||
|
||||
Congratulations! You have successfully created a new scenario in the `marl-factory-grid` environment. Experiment with different configurations, levels, entities, and rules to design unique and engaging environments for your simulations. Below you find an example of how to create a new scenario.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
New Example Scenario: Apple Resource Dilemma
|
||||
-----------------
|
||||
|
||||
To provide you with an example, we'll guide you through creating the "Apple Resource Dilemma" scenario using the steps outlined in the tutorial.
|
||||
In this example scenario, agents face a dilemma of collecting apples. The apples only spawn if there are already enough in the environment. If agents collect them at the beginning, they won't respawn as quickly as if they wait for more to spawn before collecting.
|
||||
|
||||
**Step 1: Define Configuration File**
|
||||
|
||||
|
||||
1. **Create a Configuration File:** Start by creating a new configuration file, e.g., `apple_dilemma_config.yaml`. Use the default config file as a good starting point.
|
||||
|
||||
2. **Specify Custom Parameters:** Add custom parameters to control the behavior of your scenario. Also delete unused entities, actions and observations from the default config file such as dirt piles.
|
||||
|
||||
**Step 2: Design the Level**
|
||||
|
||||
1. Create a Level File: Design the layout of your environment by creating a new level file, e.g., apple_dilemma_level.txt.
|
||||
Of course you can also just use or modify an existing level.
|
||||
|
||||
2. Define Entity Locations: Specify the initial locations of entities, including doors (D). Since the apples will likely be spawning randomly, it would not make sense to encode their spawn in the level file.
|
||||
|
||||
**Step 3: Introduce New Entities**
|
||||
|
||||
1. Create New Entity Modules: Create a new entity module for the apple in the `marl_factory_grid/environment/entity` directory. Use the module template or existing modules as inspiration. Instead of creating a new agent, the item agent can be used as he is already configured to collect all items and drop them off at designated locations.
|
||||
|
||||
2. Update Configuration: Update the configuration file to include settings related to your new entities. Agents need to be able to interact and observe them.
|
||||
|
||||
**Step 4: Implement Custom Rules**
|
||||
|
||||
1. Create Rule Modules: You might want to create new rule modules. For example, apple_respawn_rule.py could be inspired from the dirt respawn rule:
|
||||
|
||||
>>> from marl_factory_grid.environment.rules.rule import Rule
|
||||
class AppleRespawnRule(Rule):
|
||||
def __init__(self, apple_spawn_rate=0.1):
|
||||
super().__init__()
|
||||
self.apple_spawn_rate = apple_spawn_rate
|
||||
def tick_post_step(self, state):
|
||||
# Logic to respawn apples based on spawn rate
|
||||
pass
|
||||
|
||||
2. Update Configuration: Update the configuration file to include the new rule.
|
||||
|
||||
**Step 5: Add Custom Assets (Optional)**
|
||||
|
||||
1. Include Custom Asset Files: If your scenario introduces new assets (e.g., images for entities), include the necessary files in the appropriate directories, such as `marl_factory_grid/environment/assets`.
|
||||
|
||||
**Step 6: Test and Experiment**
|
@ -1,23 +0,0 @@
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Table of Contents
|
||||
:titlesonly:
|
||||
|
||||
installation
|
||||
usage
|
||||
modifications
|
||||
creating a new scenario
|
||||
testing
|
||||
source
|
||||
|
||||
.. note::
|
||||
This project is under active development.
|
||||
|
||||
.. mdinclude:: ../../README.md
|
||||
|
||||
Indices and tables
|
||||
------------------
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
@ -1,22 +0,0 @@
|
||||
Installation
|
||||
============
|
||||
|
||||
|
||||
|
||||
How to install the environment
|
||||
------------------------------
|
||||
|
||||
To use `marl-factory-grid`, first install it using pip:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
(.venv) $ pip install marl-factory-grid
|
||||
|
||||
|
||||
Indices and tables
|
||||
------------------
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
|
@ -1,92 +0,0 @@
|
||||
Custom Modifications
|
||||
====================
|
||||
|
||||
This section covers main aspects of working with the environment.
|
||||
|
||||
Modifying levels
|
||||
----------------
|
||||
Varying levels are created by defining Walls, Floor or Doors in *.txt*-files (see `levels`_ for examples).
|
||||
Define which *level* to use in your *config file* as:
|
||||
|
||||
.. _levels: marl_factory_grid/levels
|
||||
|
||||
>>> General:
|
||||
level_name: rooms # 'simple', 'narrow_corridor', 'eight_puzzle',...
|
||||
|
||||
... or create your own. Maybe with the help of `asciiflow.com <https://asciiflow.com/#/>`_.
|
||||
Make sure to use `#` as `Walls`_ , `-` as free (walkable) floor and `D` for `Doors`_.
|
||||
Other Entities (define your own) may bring their own `Symbols`.
|
||||
|
||||
.. _Walls: marl_factory_grid/environment/entity/wall.py
|
||||
.. _Doors: modules/doors/entities.py
|
||||
|
||||
|
||||
Modifying Entites
|
||||
-----------------
|
||||
Entities are `Objects`_ that can additionally be assigned a position.
|
||||
Abstract Entities are provided.
|
||||
|
||||
If you wish to introduce new entities to the environment just create a new module that implements the entity class. If
|
||||
necessary, provide additional classe such as custom actions or rewards and load the entity into the environment using
|
||||
the config file.
|
||||
|
||||
.. _Objects: marl_factory_grid/environment/entity/object.py
|
||||
|
||||
Modifying Groups
|
||||
----------------
|
||||
`Groups`_ are entity Sets that provide administrative access to all group members.
|
||||
All `Entity Collections`_ are available at runtime as a property of the env state.
|
||||
If you add an entity, you probably also want a collection of that entity.
|
||||
|
||||
.. _Groups: marl_factory_grid/environment/groups/objects.py
|
||||
.. _Entity Collections: marl_factory_grid/environment/entity/global_entities.py
|
||||
|
||||
Modifying Rules
|
||||
---------------
|
||||
`Rules <https://marl-factory-grid.readthedocs.io/en/latest/code/marl_factory_grid.environment.rules.html>`_ define how
|
||||
the environment behaves on micro scale. Each of the hooks (`on_init`, `pre_step`, `on_step`, '`post_step`', `on_done`)
|
||||
provide env-access to implement custom logic, calculate rewards, or gather information.
|
||||
|
||||
If you wish to introduce new rules to the environment make sure it implements the Rule class and override its' hooks
|
||||
to implement your own rule logic.
|
||||
|
||||
|
||||
.. image:: ../../images/Hooks_FIKS.png
|
||||
:alt: Hooks Image
|
||||
|
||||
|
||||
Modifying Constants and Rewards
|
||||
-------------------------------
|
||||
|
||||
Customizing rewards and constants allows you to tailor the environment to specific requirements.
|
||||
You can set custom rewards in the configuration file. If no specific rewards are defined, the environment
|
||||
will utilize default rewards, which are provided in the constants file of each module.
|
||||
|
||||
In addition to rewards, you can also customize other constants used in the environment's rules or actions. Each module has
|
||||
its dedicated constants file, while global constants are centrally located in the environment's constants file.
|
||||
Be careful when making changes to constants, as they can radically impact the behavior of the environment. Only modify
|
||||
constants if you have a solid understanding of their implications and are confident in the adjustments you're making.
|
||||
|
||||
|
||||
Modifying Results
|
||||
-----------------
|
||||
`Results <https://marl-factory-grid.readthedocs.io/en/latest/code/marl_factory_grid.utils.results.html>`_
|
||||
provide a way to return `rule` evaluations such as rewards and state reports back to the environment.
|
||||
|
||||
|
||||
Modifying Assets
|
||||
----------------
|
||||
Make sure to bring your own assets for each Entity living in the Gridworld as the `Renderer` relies on it.
|
||||
PNG-files (transparent background) of square aspect-ratio should do the job, in general.
|
||||
|
||||
.. image:: ../../marl_factory_grid/environment/assets/wall.png
|
||||
:alt: Wall Image
|
||||
.. image:: ../../marl_factory_grid/environment/assets/agent/agent.png
|
||||
:alt: Agent Image
|
||||
|
||||
Indices and tables
|
||||
------------------
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
@ -1,17 +0,0 @@
|
||||
Source
|
||||
======
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:glob:
|
||||
:caption: Table of Contents
|
||||
:titlesonly:
|
||||
|
||||
source/*
|
||||
|
||||
Indices and tables
|
||||
------------------
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
@ -1,40 +0,0 @@
|
||||
marl\_factory\_grid.environment.entity package
|
||||
==============================================
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.entity
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.entity.agent
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.entity.entity
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.entity.object
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.entity.util
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.entity.wall
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,52 +0,0 @@
|
||||
marl\_factory\_grid.environment.groups package
|
||||
==============================================
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.groups
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.groups.agents
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.groups.collection
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.groups.global_entities
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.groups.mixins
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.groups.objects
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.groups.utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.groups.walls
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,49 +0,0 @@
|
||||
marl\_factory\_grid.environment package
|
||||
=======================================
|
||||
|
||||
.. automodule:: marl_factory_grid.environment
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
marl_factory_grid.environment.entity
|
||||
marl_factory_grid.environment.groups
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.actions
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.constants
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.factory
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.rewards
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.environment.rules
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,7 +0,0 @@
|
||||
marl\_factory\_grid.levels package
|
||||
==================================
|
||||
|
||||
.. automodule:: marl_factory_grid.levels
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,40 +0,0 @@
|
||||
marl\_factory\_grid.modules.batteries package
|
||||
=============================================
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.batteries
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.batteries.actions
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.batteries.constants
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.batteries.entitites
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.batteries.groups
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.batteries.rules
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,40 +0,0 @@
|
||||
marl\_factory\_grid.modules.clean\_up package
|
||||
=============================================
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.clean_up
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.clean_up.actions
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.clean_up.constants
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.clean_up.entitites
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.clean_up.groups
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.clean_up.rules
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,40 +0,0 @@
|
||||
marl\_factory\_grid.modules.destinations package
|
||||
================================================
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.destinations
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.destinations.actions
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.destinations.constants
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.destinations.entitites
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.destinations.groups
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.destinations.rules
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,40 +0,0 @@
|
||||
marl\_factory\_grid.modules.doors package
|
||||
=========================================
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.doors
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.doors.actions
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.doors.constants
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.doors.entitites
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.doors.groups
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.doors.rules
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,40 +0,0 @@
|
||||
marl\_factory\_grid.modules.items package
|
||||
=========================================
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.items
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.items.actions
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.items.constants
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.items.entitites
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.items.groups
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.items.rules
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,40 +0,0 @@
|
||||
marl\_factory\_grid.modules.machines package
|
||||
============================================
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.machines
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.machines.actions
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.machines.constants
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.machines.entitites
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.machines.groups
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.machines.rules
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,34 +0,0 @@
|
||||
marl\_factory\_grid.modules.maintenance package
|
||||
===============================================
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.maintenance
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.maintenance.constants
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.maintenance.entities
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.maintenance.groups
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.maintenance.rules
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,22 +0,0 @@
|
||||
marl\_factory\_grid.modules package
|
||||
===================================
|
||||
|
||||
.. automodule:: marl_factory_grid.modules
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
marl_factory_grid.modules.batteries
|
||||
marl_factory_grid.modules.clean_up
|
||||
marl_factory_grid.modules.destinations
|
||||
marl_factory_grid.modules.doors
|
||||
marl_factory_grid.modules.items
|
||||
marl_factory_grid.modules.machines
|
||||
marl_factory_grid.modules.maintenance
|
||||
marl_factory_grid.modules.zones
|
@ -1,34 +0,0 @@
|
||||
marl\_factory\_grid.modules.zones package
|
||||
=========================================
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.zones
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.zones.constants
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.zones.entitites
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.zones.groups
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.modules.zones.rules
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,28 +0,0 @@
|
||||
marl\_factory\_grid package
|
||||
===========================
|
||||
|
||||
.. automodule:: marl_factory_grid
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
marl_factory_grid.algorithms
|
||||
marl_factory_grid.environment
|
||||
marl_factory_grid.levels
|
||||
marl_factory_grid.modules
|
||||
marl_factory_grid.utils
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.quickstart
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,22 +0,0 @@
|
||||
marl\_factory\_grid.utils.logging package
|
||||
=========================================
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.logging
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.logging.envmonitor
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.logging.recorder
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,28 +0,0 @@
|
||||
marl\_factory\_grid.utils.plotting package
|
||||
==========================================
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.plotting
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.plotting.plot_compare_runs
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.plotting.plot_single_runs
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.plotting.plotting_utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,79 +0,0 @@
|
||||
marl\_factory\_grid.utils package
|
||||
=================================
|
||||
|
||||
.. automodule:: marl_factory_grid.utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
marl_factory_grid.utils.logging
|
||||
marl_factory_grid.utils.plotting
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.config_parser
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.helpers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.level_parser
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.observation_builder
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.ray_caster
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.renderer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.results
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.states
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.tools
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
.. automodule:: marl_factory_grid.utils.utility_classes
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
@ -1,15 +0,0 @@
|
||||
Testing
|
||||
=======
|
||||
In EDYS, tests are seamlessly integrated through environment hooks, mirroring the organization of rules, as explained in the README.md file.
|
||||
|
||||
Running tests
|
||||
-------------
|
||||
To include specific tests in your run, simply append them to the "tests" section within the configuration file.
|
||||
If the test requires a specific entity in the environment (i.e the clean up test requires a TSPDirtAgent that can observe
|
||||
and clean dirt in its environment), make sure to include it in the config file.
|
||||
|
||||
Writing tests
|
||||
------------
|
||||
If you intend to create additional tests, refer to the tests.py file for examples.
|
||||
Ensure that any new tests implement the corresponding test class and make use of its hooks.
|
||||
There are no additional steps required, except for the inclusion of your custom tests in the config file.
|
@ -1,75 +0,0 @@
|
||||
Basic Usage
|
||||
===========
|
||||
|
||||
Environment objects, including agents, entities and rules, that are specified in a *yaml*-configfile will be loaded automatically.
|
||||
Using ``quickstart_use`` creates a default config-file and another one that lists all possible options of the environment.
|
||||
Also, it generates an initial script where an agent is executed in the environment specified by the config-file.
|
||||
|
||||
After initializing the environment using the specified configuration file, the script enters a reinforcement learning loop.
|
||||
The loop consists of episodes, where each episode involves resetting the environment, executing actions, and receiving feedback.
|
||||
|
||||
Here's a breakdown of the key components in the provided script. Feel free to customize it based on your specific requirements:
|
||||
|
||||
1. **Initialization:**
|
||||
|
||||
>>> path = Path('marl_factory_grid/configs/default_config.yaml')
|
||||
factory = Factory(path)
|
||||
factory = EnvMonitor(factory)
|
||||
factory = EnvRecorder(factory)
|
||||
|
||||
- The `path` variable points to the location of your configuration file. Ensure it corresponds to the correct path.
|
||||
- `Factory` initializes the environment based on the provided configuration.
|
||||
- `EnvMonitor` and `EnvRecorder` are optional components. They add monitoring and recording functionalities to the environment, respectively.
|
||||
|
||||
2. **Reinforcement Learning Loop:**
|
||||
|
||||
>>> for episode in trange(10):
|
||||
_ = factory.reset()
|
||||
done = False
|
||||
if render:
|
||||
factory.render()
|
||||
action_spaces = factory.action_space
|
||||
agents = []
|
||||
|
||||
- The loop iterates over a specified number of episodes (in this case, 10).
|
||||
- `factory.reset()` resets the environment for a new episode.
|
||||
- `factory.render()` is used for visualization if rendering is enabled.
|
||||
- `action_spaces` stores the action spaces available for the agents.
|
||||
- `agents` will store agent-specific information during the episode.
|
||||
|
||||
3. **Taking Actions:**
|
||||
|
||||
>>> while not done:
|
||||
a = [randint(0, x.n - 1) for x in action_spaces]
|
||||
obs_type, _, reward, done, info = factory.step(a)
|
||||
if render:
|
||||
factory.render()
|
||||
|
||||
- Within each episode, the loop continues until the environment signals completion (`done`).
|
||||
- `a` represents a list of random actions for each agent based on their action space.
|
||||
- `factory.step(a)` executes the actions, returning observation types, rewards, completion status, and additional information.
|
||||
|
||||
4. **Handling Episode Completion:**
|
||||
|
||||
>>> if done:
|
||||
print(f'Episode {episode} done...')
|
||||
|
||||
- After each episode, a message is printed indicating its completion.
|
||||
|
||||
|
||||
Evaluating the run
|
||||
------------------
|
||||
|
||||
If monitoring and recording are enabled, the environment states will be traced and recorded automatically.
|
||||
The EnvMonitor class acts as a wrapper for Gym environments, monitoring and logging key information during interactions,
|
||||
while the EnvRecorder class records state summaries during interactions in the environment.
|
||||
At the end of each run a plot displaying the step reward is generated. The step reward represents the cumulative sum of rewards obtained by all agents throughout the episode.
|
||||
Furthermore a comparative plot that shows the achieved score (step reward) over several runs with different seeds or different parameter settings can be generated using the methods provided in plotting/plot_compare_runs.py.
|
||||
For a more comprehensive evaluation, we recommend using the `Weights and Biases (W&B) <https://wandb.ai/site>`_ framework, with the dataframes generated by the monitor and recorder. These can be found in the run path specified in your script. W&B provides a powerful API for logging and visualizing model training metrics, enabling analysis using predefined or also custom metrics.
|
||||
|
||||
Indices and tables
|
||||
------------------
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
Binary file not shown.
Before Width: | Height: | Size: 296 KiB |
@ -1,4 +1,3 @@
|
||||
from .quickstart import init
|
||||
from marl_factory_grid.environment.factory import Factory
|
||||
"""
|
||||
Main module of the 'marl-factory-grid'-environment.
|
||||
|
@ -1 +1 @@
|
||||
from marl_factory_grid.algorithms.marl.memory import MARLActorCriticMemory
|
||||
|
||||
|
@ -11,7 +11,6 @@ import numpy as np
|
||||
from torch.distributions import Categorical
|
||||
|
||||
from marl_factory_grid.algorithms.marl.base_a2c import PolicyGradient, cumulate_discount
|
||||
from marl_factory_grid.algorithms.marl.memory import MARLActorCriticMemory
|
||||
from marl_factory_grid.algorithms.utils import add_env_props, instantiate_class
|
||||
from pathlib import Path
|
||||
from collections import deque
|
||||
|
@ -2,8 +2,6 @@ import numpy as np; import torch as th; import scipy as sp;
|
||||
from collections import deque
|
||||
from torch import nn
|
||||
|
||||
# RLLab Magic for calculating the discounted return G(t) = R(t) + gamma * R(t-1)
|
||||
# cf. https://github.com/rll/rllab/blob/ba78e4c16dc492982e648f117875b22af3965579/rllab/misc/special.py#L107
|
||||
cumulate_discount = lambda x, gamma: sp.signal.lfilter([1], [1, - gamma], x[::-1], axis=0)[::-1]
|
||||
|
||||
class Net(th.nn.Module):
|
||||
|
@ -1,242 +0,0 @@
|
||||
import torch
|
||||
from typing import Union, List, Dict
|
||||
import numpy as np
|
||||
from torch.distributions import Categorical
|
||||
from marl_factory_grid.algorithms.marl.memory import MARLActorCriticMemory
|
||||
from marl_factory_grid.algorithms.utils import add_env_props, instantiate_class
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
from collections import deque
|
||||
|
||||
|
||||
class Names:
|
||||
REWARD = 'reward'
|
||||
DONE = 'done'
|
||||
ACTION = 'action'
|
||||
OBSERVATION = 'observation'
|
||||
LOGITS = 'logits'
|
||||
HIDDEN_ACTOR = 'hidden_actor'
|
||||
HIDDEN_CRITIC = 'hidden_critic'
|
||||
AGENT = 'agent'
|
||||
ENV = 'env'
|
||||
ENV_NAME = 'env_name'
|
||||
N_AGENTS = 'n_agents'
|
||||
ALGORITHM = 'algorithm'
|
||||
MAX_STEPS = 'max_steps'
|
||||
N_STEPS = 'n_steps'
|
||||
BUFFER_SIZE = 'buffer_size'
|
||||
CRITIC = 'critic'
|
||||
BATCH_SIZE = 'bnatch_size'
|
||||
N_ACTIONS = 'n_actions'
|
||||
TRAIN_RENDER = 'train_render'
|
||||
EVAL_RENDER = 'eval_render'
|
||||
|
||||
|
||||
nms = Names
|
||||
ListOrTensor = Union[List, torch.Tensor]
|
||||
|
||||
|
||||
class BaseActorCritic:
|
||||
def __init__(self, cfg):
|
||||
self.factory = add_env_props(cfg)
|
||||
self.__training = True
|
||||
self.cfg = cfg
|
||||
self.n_agents = cfg[nms.AGENT][nms.N_AGENTS]
|
||||
self.reset_memory_after_epoch = True
|
||||
self.setup()
|
||||
|
||||
def setup(self):
|
||||
self.net = instantiate_class(self.cfg[nms.AGENT])
|
||||
self.optimizer = torch.optim.RMSprop(self.net.parameters(), lr=3e-4, eps=1e-5)
|
||||
|
||||
@classmethod
|
||||
def _as_torch(cls, x):
|
||||
if isinstance(x, np.ndarray):
|
||||
return torch.from_numpy(x)
|
||||
elif isinstance(x, List):
|
||||
return torch.tensor(x)
|
||||
elif isinstance(x, (int, float)):
|
||||
return torch.tensor([x])
|
||||
return x
|
||||
|
||||
def train(self):
|
||||
self.__training = False
|
||||
networks = [self.net] if not isinstance(self.net, List) else self.net
|
||||
for net in networks:
|
||||
net.train()
|
||||
|
||||
def eval(self):
|
||||
self.__training = False
|
||||
networks = [self.net] if not isinstance(self.net, List) else self.net
|
||||
for net in networks:
|
||||
net.eval()
|
||||
|
||||
def load_state_dict(self, path: Path):
|
||||
pass
|
||||
|
||||
def get_actions(self, out) -> ListOrTensor:
|
||||
actions = [Categorical(logits=logits).sample().item() for logits in out[nms.LOGITS]]
|
||||
return actions
|
||||
|
||||
def init_hidden(self) -> Dict[str, ListOrTensor]:
|
||||
pass
|
||||
|
||||
def forward(self,
|
||||
observations: ListOrTensor,
|
||||
actions: ListOrTensor,
|
||||
hidden_actor: ListOrTensor,
|
||||
hidden_critic: ListOrTensor
|
||||
) -> Dict[str, ListOrTensor]:
|
||||
pass
|
||||
|
||||
@torch.no_grad()
|
||||
def train_loop(self, checkpointer=None):
|
||||
env = self.factory
|
||||
if self.cfg[nms.ENV][nms.TRAIN_RENDER]:
|
||||
env.render()
|
||||
n_steps, max_steps = [self.cfg[nms.ALGORITHM][k] for k in [nms.N_STEPS, nms.MAX_STEPS]]
|
||||
tm = MARLActorCriticMemory(self.n_agents, self.cfg[nms.ALGORITHM].get(nms.BUFFER_SIZE, n_steps))
|
||||
global_steps, episode, df_results = 0, 0, []
|
||||
reward_queue = deque(maxlen=2000)
|
||||
|
||||
while global_steps < max_steps:
|
||||
obs = env.reset()
|
||||
obs = list(obs.values())
|
||||
last_hiddens = self.init_hidden()
|
||||
last_action, reward = [-1] * self.n_agents, [0.] * self.n_agents
|
||||
done, rew_log = [False] * self.n_agents, 0
|
||||
|
||||
if self.reset_memory_after_epoch:
|
||||
tm.reset()
|
||||
|
||||
tm.add(observation=obs, action=last_action,
|
||||
logits=torch.zeros(self.n_agents, 1, self.cfg[nms.AGENT][nms.N_ACTIONS]),
|
||||
values=torch.zeros(self.n_agents, 1), reward=reward, done=done, **last_hiddens)
|
||||
|
||||
while not all(done):
|
||||
out = self.forward(obs, last_action, **last_hiddens)
|
||||
action = self.get_actions(out)
|
||||
_, next_obs, reward, done, info = env.step(action)
|
||||
done = [done] * self.n_agents if isinstance(done, bool) else done
|
||||
|
||||
if self.cfg[nms.ENV][nms.TRAIN_RENDER]:
|
||||
env.render()
|
||||
|
||||
last_hiddens = dict(hidden_actor=out[nms.HIDDEN_ACTOR],
|
||||
hidden_critic=out[nms.HIDDEN_CRITIC])
|
||||
|
||||
logits = torch.stack([tensor.squeeze(0) for tensor in out.get(nms.LOGITS, None)], dim=0)
|
||||
values = torch.stack([tensor.squeeze(0) for tensor in out.get(nms.CRITIC, None)], dim=0)
|
||||
|
||||
tm.add(observation=obs, action=action, reward=reward, done=done,
|
||||
logits=logits, values=values,
|
||||
**last_hiddens)
|
||||
|
||||
obs = next_obs
|
||||
last_action = action
|
||||
|
||||
if (global_steps+1) % n_steps == 0 or all(done):
|
||||
with torch.inference_mode(False):
|
||||
self.learn(tm)
|
||||
|
||||
global_steps += 1
|
||||
rew_log += sum(reward)
|
||||
reward_queue.extend(reward)
|
||||
|
||||
if checkpointer is not None:
|
||||
checkpointer.step([
|
||||
(f'agent#{i}', agent)
|
||||
for i, agent in enumerate([self.net] if not isinstance(self.net, List) else self.net)
|
||||
])
|
||||
|
||||
if global_steps >= max_steps:
|
||||
break
|
||||
if global_steps%100 == 0:
|
||||
print(f'reward at episode: {episode} = {rew_log}')
|
||||
episode += 1
|
||||
df_results.append([episode, rew_log, *reward])
|
||||
df_results = pd.DataFrame(df_results,
|
||||
columns=['steps', 'reward', *[f'agent#{i}' for i in range(self.n_agents)]]
|
||||
)
|
||||
if checkpointer is not None:
|
||||
df_results.to_csv(checkpointer.path / 'results.csv', index=False)
|
||||
return df_results
|
||||
|
||||
@torch.inference_mode(True)
|
||||
def eval_loop(self, n_episodes, render=False):
|
||||
env = self.factory
|
||||
if self.cfg[nms.ENV][nms.EVAL_RENDER]:
|
||||
env.render()
|
||||
episode, results = 0, []
|
||||
while episode < n_episodes:
|
||||
obs = env.reset()
|
||||
obs = list(obs.values())
|
||||
last_hiddens = self.init_hidden()
|
||||
last_action, reward = [-1] * self.n_agents, [0.] * self.n_agents
|
||||
done, rew_log, eps_rew = [False] * self.n_agents, 0, torch.zeros(self.n_agents)
|
||||
while not all(done):
|
||||
out = self.forward(obs, last_action, **last_hiddens)
|
||||
action = self.get_actions(out)
|
||||
_, next_obs, reward, done, info = env.step(action)
|
||||
|
||||
if self.cfg[nms.ENV][nms.EVAL_RENDER]:
|
||||
env.render()
|
||||
|
||||
if isinstance(done, bool):
|
||||
done = [done] * obs[0].shape[0]
|
||||
obs = next_obs
|
||||
last_action = action
|
||||
last_hiddens = dict(hidden_actor=out.get(nms.HIDDEN_ACTOR, None),
|
||||
hidden_critic=out.get(nms.HIDDEN_CRITIC, None)
|
||||
)
|
||||
eps_rew += torch.tensor(reward)
|
||||
results.append(eps_rew.tolist() + [sum(eps_rew).item()] + [episode])
|
||||
episode += 1
|
||||
agent_columns = [f'agent#{i}' for i in range(self.cfg[nms.ENV][nms.N_AGENTS])]
|
||||
results = pd.DataFrame(results, columns=agent_columns + ['sum', 'episode'])
|
||||
results = pd.melt(results, id_vars=['episode'], value_vars=agent_columns + ['sum'],
|
||||
value_name='reward', var_name='agent')
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def compute_advantages(critic, reward, done, gamma, gae_coef=0.0):
|
||||
tds = (reward + gamma * (1.0 - done) * critic[:, 1:].detach()) - critic[:, :-1]
|
||||
|
||||
if gae_coef <= 0:
|
||||
return tds
|
||||
|
||||
gae = torch.zeros_like(tds[:, -1])
|
||||
gaes = []
|
||||
for t in range(tds.shape[1]-1, -1, -1):
|
||||
gae = tds[:, t] + gamma * gae_coef * (1.0 - done[:, t]) * gae
|
||||
gaes.insert(0, gae)
|
||||
gaes = torch.stack(gaes, dim=1)
|
||||
return gaes
|
||||
|
||||
def actor_critic(self, tm, network, gamma, entropy_coef, vf_coef, gae_coef=0.0, **kwargs):
|
||||
obs, actions, done, reward = tm.observation, tm.action, tm.done[:, 1:], tm.reward[:, 1:]
|
||||
|
||||
out = network(obs, actions, tm.hidden_actor[:, 0].squeeze(0), tm.hidden_critic[:, 0].squeeze(0))
|
||||
logits = out[nms.LOGITS][:, :-1] # last one only needed for v_{t+1}
|
||||
critic = out[nms.CRITIC]
|
||||
|
||||
entropy_loss = Categorical(logits=logits).entropy().mean(-1)
|
||||
advantages = self.compute_advantages(critic, reward, done, gamma, gae_coef)
|
||||
value_loss = advantages.pow(2).mean(-1) # n_agent
|
||||
|
||||
# policy loss
|
||||
log_ap = torch.log_softmax(logits, -1)
|
||||
log_ap = torch.gather(log_ap, dim=-1, index=actions[:, 1:].unsqueeze(-1)).squeeze()
|
||||
a2c_loss = -(advantages.detach() * log_ap).mean(-1)
|
||||
# weighted loss
|
||||
loss = a2c_loss + vf_coef*value_loss - entropy_coef * entropy_loss
|
||||
return loss.mean()
|
||||
|
||||
def learn(self, tm: MARLActorCriticMemory, **kwargs):
|
||||
loss = self.actor_critic(tm, self.net, **self.cfg[nms.ALGORITHM], **kwargs)
|
||||
# remove next_obs, will be added in next iter
|
||||
self.optimizer.zero_grad()
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(self.net.parameters(), 0.5)
|
||||
self.optimizer.step()
|
||||
|
@ -1,8 +0,0 @@
|
||||
marl_factory_grid>environment>rules.py#SpawnEntity.on_reset()
|
||||
marl_factory_grid>environment>rewards.py
|
||||
marl_factory_grid>modules>clean_up>groups.py#DirtPiles.trigger_spawn()
|
||||
marl_factory_grid>environment>rules.py#AgentSpawnRule
|
||||
marl_factory_grid>utils>states.py#GameState.__init__()
|
||||
marl_factory_grid>environment>factory.py>Factory#render
|
||||
marl_factory_grid>environment>factory.py>Factory#set_recorder
|
||||
marl_factory_grid>utils>renderer.py>Renderer#render
|
@ -1,57 +0,0 @@
|
||||
import torch
|
||||
from marl_factory_grid.algorithms.marl.base_ac import BaseActorCritic, nms
|
||||
from marl_factory_grid.algorithms.utils import instantiate_class
|
||||
from pathlib import Path
|
||||
from natsort import natsorted
|
||||
from marl_factory_grid.algorithms.marl.memory import MARLActorCriticMemory
|
||||
|
||||
|
||||
class LoopIAC(BaseActorCritic):
|
||||
|
||||
def __init__(self, cfg):
|
||||
super(LoopIAC, self).__init__(cfg)
|
||||
|
||||
def setup(self):
|
||||
self.net = [
|
||||
instantiate_class(self.cfg[nms.AGENT]) for _ in range(self.n_agents)
|
||||
]
|
||||
self.optimizer = [
|
||||
torch.optim.RMSprop(self.net[ag_i].parameters(), lr=3e-4, eps=1e-5) for ag_i in range(self.n_agents)
|
||||
]
|
||||
|
||||
def load_state_dict(self, path: Path):
|
||||
paths = natsorted(list(path.glob('*.pt')))
|
||||
for path, net in zip(paths, self.net):
|
||||
net.load_state_dict(torch.load(path))
|
||||
|
||||
@staticmethod
|
||||
def merge_dicts(ds): # todo could be recursive for more than 1 hierarchy
|
||||
d = {}
|
||||
for k in ds[0].keys():
|
||||
d[k] = [d[k] for d in ds]
|
||||
return d
|
||||
|
||||
def init_hidden(self):
|
||||
ha = [net.init_hidden_actor() for net in self.net]
|
||||
hc = [net.init_hidden_critic() for net in self.net]
|
||||
return dict(hidden_actor=ha, hidden_critic=hc)
|
||||
|
||||
def forward(self, observations, actions, hidden_actor, hidden_critic):
|
||||
outputs = [
|
||||
net(
|
||||
self._as_torch(observations[ag_i]).unsqueeze(0).unsqueeze(0), # agent x time
|
||||
self._as_torch(actions[ag_i]).unsqueeze(0),
|
||||
hidden_actor[ag_i],
|
||||
hidden_critic[ag_i]
|
||||
) for ag_i, net in enumerate(self.net)
|
||||
]
|
||||
return self.merge_dicts(outputs)
|
||||
|
||||
def learn(self, tms: MARLActorCriticMemory, **kwargs):
|
||||
for ag_i in range(self.n_agents):
|
||||
tm, net = tms(ag_i), self.net[ag_i]
|
||||
loss = self.actor_critic(tm, net, **self.cfg[nms.ALGORITHM], **kwargs)
|
||||
self.optimizer[ag_i].zero_grad()
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(net.parameters(), 0.5)
|
||||
self.optimizer[ag_i].step()
|
@ -1,66 +0,0 @@
|
||||
from marl_factory_grid.algorithms.marl.base_ac import Names as nms
|
||||
from marl_factory_grid.algorithms.marl.snac import LoopSNAC
|
||||
from marl_factory_grid.algorithms.marl.memory import MARLActorCriticMemory
|
||||
import torch
|
||||
from torch.distributions import Categorical
|
||||
from marl_factory_grid.algorithms.utils import instantiate_class
|
||||
|
||||
|
||||
class LoopMAPPO(LoopSNAC):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(LoopMAPPO, self).__init__(*args, **kwargs)
|
||||
self.reset_memory_after_epoch = False
|
||||
|
||||
def setup(self):
|
||||
self.net = instantiate_class(self.cfg[nms.AGENT])
|
||||
self.optimizer = torch.optim.Adam(self.net.parameters(), lr=3e-4, eps=1e-5)
|
||||
|
||||
def learn(self, tm: MARLActorCriticMemory, **kwargs):
|
||||
if len(tm) >= self.cfg['algorithm']['buffer_size']:
|
||||
# only learn when buffer is full
|
||||
for batch_i in range(self.cfg['algorithm']['n_updates']):
|
||||
batch = tm.chunk_dataloader(chunk_len=self.cfg['algorithm']['n_steps'],
|
||||
k=self.cfg['algorithm']['batch_size'])
|
||||
loss = self.mappo(batch, self.net, **self.cfg[nms.ALGORITHM], **kwargs)
|
||||
self.optimizer.zero_grad()
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(self.net.parameters(), 0.5)
|
||||
self.optimizer.step()
|
||||
|
||||
def monte_carlo_returns(self, rewards, done, gamma):
|
||||
rewards_ = []
|
||||
discounted_reward = torch.zeros_like(rewards[:, -1])
|
||||
for t in range(rewards.shape[1]-1, -1, -1):
|
||||
discounted_reward = rewards[:, t] + (gamma * (1.0 - done[:, t]) * discounted_reward)
|
||||
rewards_.insert(0, discounted_reward)
|
||||
rewards_ = torch.stack(rewards_, dim=1)
|
||||
return rewards_
|
||||
|
||||
def mappo(self, batch, network, gamma, entropy_coef, vf_coef, clip_range, **__):
|
||||
out = network(batch[nms.OBSERVATION], batch[nms.ACTION], batch[nms.HIDDEN_ACTOR], batch[nms.HIDDEN_CRITIC])
|
||||
logits = out[nms.LOGITS][:, :-1] # last one only needed for v_{t+1}
|
||||
|
||||
old_log_probs = torch.log_softmax(batch[nms.LOGITS], -1)
|
||||
old_log_probs = torch.gather(old_log_probs, index=batch[nms.ACTION][:, 1:].unsqueeze(-1), dim=-1).squeeze()
|
||||
|
||||
# monte carlo returns
|
||||
mc_returns = self.monte_carlo_returns(batch[nms.REWARD], batch[nms.DONE], gamma)
|
||||
mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) # todo: norm across agent ok?
|
||||
advantages = mc_returns - out[nms.CRITIC][:, :-1]
|
||||
|
||||
# policy loss
|
||||
log_ap = torch.log_softmax(logits, -1)
|
||||
log_ap = torch.gather(log_ap, dim=-1, index=batch[nms.ACTION][:, 1:].unsqueeze(-1)).squeeze()
|
||||
ratio = (log_ap - old_log_probs).exp()
|
||||
surr1 = ratio * advantages.detach()
|
||||
surr2 = torch.clamp(ratio, 1 - clip_range, 1 + clip_range) * advantages.detach()
|
||||
policy_loss = -torch.min(surr1, surr2).mean(-1)
|
||||
|
||||
# entropy & value loss
|
||||
entropy_loss = Categorical(logits=logits).entropy().mean(-1)
|
||||
value_loss = advantages.pow(2).mean(-1) # n_agent
|
||||
|
||||
# weighted loss
|
||||
loss = policy_loss + vf_coef*value_loss - entropy_coef * entropy_loss
|
||||
|
||||
return loss.mean()
|
@ -1,221 +0,0 @@
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
import torch
|
||||
from typing import Union
|
||||
from torch import Tensor
|
||||
from torch.utils.data import Dataset, ConcatDataset
|
||||
import random
|
||||
|
||||
|
||||
class ActorCriticMemory(object):
|
||||
def __init__(self, capacity=10):
|
||||
self.capacity = capacity
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.__actions = LazyTensorFiFoQueue(maxlen=self.capacity+1)
|
||||
self.__hidden_actor = LazyTensorFiFoQueue(maxlen=self.capacity+1)
|
||||
self.__hidden_critic = LazyTensorFiFoQueue(maxlen=self.capacity+1)
|
||||
self.__states = LazyTensorFiFoQueue(maxlen=self.capacity+1)
|
||||
self.__rewards = LazyTensorFiFoQueue(maxlen=self.capacity+1)
|
||||
self.__dones = LazyTensorFiFoQueue(maxlen=self.capacity+1)
|
||||
self.__logits = LazyTensorFiFoQueue(maxlen=self.capacity+1)
|
||||
self.__values = LazyTensorFiFoQueue(maxlen=self.capacity+1)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.__rewards) - 1
|
||||
|
||||
@property
|
||||
def observation(self, sls=slice(0, None)): # add time dimension through stacking
|
||||
return self.__states[sls].unsqueeze(0) # 1 x time x hidden dim
|
||||
|
||||
@property
|
||||
def hidden_actor(self, sls=slice(0, None)): # 1 x n_layers x dim
|
||||
return self.__hidden_actor[sls].unsqueeze(0) # 1 x time x n_layers x dim
|
||||
|
||||
@property
|
||||
def hidden_critic(self, sls=slice(0, None)): # 1 x n_layers x dim
|
||||
return self.__hidden_critic[sls].unsqueeze(0) # 1 x time x n_layers x dim
|
||||
|
||||
@property
|
||||
def reward(self, sls=slice(0, None)):
|
||||
return self.__rewards[sls].squeeze().unsqueeze(0) # 1 x time
|
||||
|
||||
@property
|
||||
def action(self, sls=slice(0, None)):
|
||||
return self.__actions[sls].long().squeeze().unsqueeze(0) # 1 x time
|
||||
|
||||
@property
|
||||
def done(self, sls=slice(0, None)):
|
||||
return self.__dones[sls].float().squeeze().unsqueeze(0) # 1 x time
|
||||
|
||||
@property
|
||||
def logits(self, sls=slice(0, None)): # assumes a trailing 1 for time dimension - common when using output from NN
|
||||
return self.__logits[sls].squeeze().unsqueeze(0) # 1 x time x actions
|
||||
|
||||
@property
|
||||
def values(self, sls=slice(0, None)):
|
||||
return self.__values[sls].squeeze().unsqueeze(0) # 1 x time x actions
|
||||
|
||||
def add_observation(self, state: Union[Tensor, np.ndarray]):
|
||||
self.__states.append(state if isinstance(state, Tensor) else torch.from_numpy(state))
|
||||
|
||||
def add_hidden_actor(self, hidden: Tensor):
|
||||
# layers x hidden dim
|
||||
self.__hidden_actor.append(hidden)
|
||||
|
||||
def add_hidden_critic(self, hidden: Tensor):
|
||||
# layers x hidden dim
|
||||
self.__hidden_critic.append(hidden)
|
||||
|
||||
def add_action(self, action: Union[int, Tensor]):
|
||||
if not isinstance(action, Tensor):
|
||||
action = torch.tensor(action)
|
||||
self.__actions.append(action)
|
||||
|
||||
def add_reward(self, reward: Union[float, Tensor]):
|
||||
if not isinstance(reward, Tensor):
|
||||
reward = torch.tensor(reward)
|
||||
self.__rewards.append(reward)
|
||||
|
||||
def add_done(self, done: bool):
|
||||
if not isinstance(done, Tensor):
|
||||
done = torch.tensor(done)
|
||||
self.__dones.append(done)
|
||||
|
||||
def add_logits(self, logits: Tensor):
|
||||
self.__logits.append(logits)
|
||||
|
||||
def add_values(self, values: Tensor):
|
||||
self.__values.append(values)
|
||||
|
||||
def add(self, **kwargs):
|
||||
for k, v in kwargs.items():
|
||||
func = getattr(ActorCriticMemory, f'add_{k}')
|
||||
func(self, v)
|
||||
|
||||
|
||||
class MARLActorCriticMemory(object):
|
||||
def __init__(self, n_agents, capacity):
|
||||
self.n_agents = n_agents
|
||||
self.memories = [
|
||||
ActorCriticMemory(capacity) for _ in range(n_agents)
|
||||
]
|
||||
|
||||
def __call__(self, agent_i):
|
||||
return self.memories[agent_i]
|
||||
|
||||
def __len__(self):
|
||||
return len(self.memories[0]) # todo add assertion check!
|
||||
|
||||
def reset(self):
|
||||
for mem in self.memories:
|
||||
mem.reset()
|
||||
|
||||
def add(self, **kwargs):
|
||||
for agent_i in range(self.n_agents):
|
||||
for k, v in kwargs.items():
|
||||
func = getattr(ActorCriticMemory, f'add_{k}')
|
||||
func(self.memories[agent_i], v[agent_i])
|
||||
|
||||
def __getattr__(self, attr):
|
||||
all_attrs = [getattr(mem, attr) for mem in self.memories]
|
||||
return torch.cat(all_attrs, 0) # agent x time ...
|
||||
|
||||
def chunk_dataloader(self, chunk_len, k):
|
||||
datasets = [ExperienceChunks(mem, chunk_len, k) for mem in self.memories]
|
||||
dataset = ConcatDataset(datasets)
|
||||
data = [dataset[i] for i in range(len(dataset))]
|
||||
data = custom_collate_fn(data)
|
||||
return data
|
||||
|
||||
|
||||
def custom_collate_fn(batch):
|
||||
elem = batch[0]
|
||||
return {key: torch.cat([d[key] for d in batch], dim=0) for key in elem}
|
||||
|
||||
|
||||
class ExperienceChunks(Dataset):
|
||||
def __init__(self, memory, chunk_len, k):
|
||||
assert chunk_len <= len(memory), 'chunk_len cannot be longer than the size of the memory'
|
||||
self.memory = memory
|
||||
self.chunk_len = chunk_len
|
||||
self.k = k
|
||||
|
||||
@property
|
||||
def whitelist(self):
|
||||
whitelist = torch.ones(len(self.memory) - self.chunk_len)
|
||||
for d in self.memory.done.squeeze().nonzero().flatten():
|
||||
whitelist[max((0, d-self.chunk_len-1)):d+2] = 0
|
||||
whitelist[0] = 0
|
||||
return whitelist.tolist()
|
||||
|
||||
def sample(self, start=1):
|
||||
cl = self.chunk_len
|
||||
sample = dict(observation=self.memory.observation[:, start:start+cl+1],
|
||||
action=self.memory.action[:, start-1:start+cl],
|
||||
hidden_actor=self.memory.hidden_actor[:, start-1],
|
||||
hidden_critic=self.memory.hidden_critic[:, start-1],
|
||||
reward=self.memory.reward[:, start:start + cl],
|
||||
done=self.memory.done[:, start:start + cl],
|
||||
logits=self.memory.logits[:, start:start + cl],
|
||||
values=self.memory.values[:, start:start + cl])
|
||||
return sample
|
||||
|
||||
def __len__(self):
|
||||
return self.k
|
||||
|
||||
def __getitem__(self, i):
|
||||
idx = random.choices(range(0, len(self.memory) - self.chunk_len), weights=self.whitelist, k=1)
|
||||
return self.sample(idx[0])
|
||||
|
||||
|
||||
class LazyTensorFiFoQueue:
|
||||
def __init__(self, maxlen):
|
||||
self.maxlen = maxlen
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.__lazy_queue = deque(maxlen=self.maxlen)
|
||||
self.shape = None
|
||||
self.queue = None
|
||||
|
||||
def shape_init(self, tensor: Tensor):
|
||||
self.shape = torch.Size([self.maxlen, *tensor.shape])
|
||||
|
||||
def build_tensor_queue(self):
|
||||
if len(self.__lazy_queue) > 0:
|
||||
block = torch.stack(list(self.__lazy_queue), dim=0)
|
||||
l = block.shape[0]
|
||||
if self.queue is None:
|
||||
self.queue = block
|
||||
elif self.true_len() <= self.maxlen:
|
||||
self.queue = torch.cat((self.queue, block), dim=0)
|
||||
else:
|
||||
self.queue = torch.cat((self.queue[l:], block), dim=0)
|
||||
self.__lazy_queue.clear()
|
||||
|
||||
def append(self, data):
|
||||
if self.shape is None:
|
||||
self.shape_init(data)
|
||||
self.__lazy_queue.append(data)
|
||||
if len(self.__lazy_queue) >= self.maxlen:
|
||||
self.build_tensor_queue()
|
||||
|
||||
def true_len(self):
|
||||
return len(self.__lazy_queue) + (0 if self.queue is None else self.queue.shape[0])
|
||||
|
||||
def __len__(self):
|
||||
return min((self.true_len(), self.maxlen))
|
||||
|
||||
def __str__(self):
|
||||
return f'LazyTensorFiFoQueue\tmaxlen: {self.maxlen}, shape: {self.shape}, ' \
|
||||
f'len: {len(self)}, true_len: {self.true_len()}, elements in lazy queue: {len(self.__lazy_queue)}'
|
||||
|
||||
def __getitem__(self, item_or_slice):
|
||||
self.build_tensor_queue()
|
||||
return self.queue[item_or_slice]
|
||||
|
||||
|
||||
|
||||
|
@ -7,8 +7,8 @@ agent:
|
||||
hidden_size_critic: 64
|
||||
use_agent_embedding: False
|
||||
env:
|
||||
classname: marl_factory_grid.configs.custom
|
||||
env_name: "custom/MultiAgentConfigs/dirt_quadrant_train_config"
|
||||
classname: marl_factory_grid.environment.configs.marl_eval
|
||||
env_name: "marl_eval/dirt_quadrant_eval_config"
|
||||
n_agents: 2
|
||||
max_steps: 250
|
||||
pomdp_r: 2
|
@ -7,8 +7,8 @@ agent:
|
||||
hidden_size_critic: 64
|
||||
use_agent_embedding: False
|
||||
env:
|
||||
classname: marl_factory_grid.configs.custom
|
||||
env_name: "custom/two_rooms_one_door_modified_train_config"
|
||||
classname: marl_factory_grid.environment.configs.marl_eval
|
||||
env_name: "marl_eval/two_rooms_eval_config"
|
||||
n_agents: 2
|
||||
max_steps: 250
|
||||
pomdp_r: 2
|
@ -1,103 +0,0 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class RecurrentAC(nn.Module):
|
||||
def __init__(self, observation_size, n_actions, obs_emb_size,
|
||||
action_emb_size, hidden_size_actor, hidden_size_critic,
|
||||
n_agents, use_agent_embedding=True):
|
||||
super(RecurrentAC, self).__init__()
|
||||
observation_size = np.prod(observation_size)
|
||||
self.n_layers = 1
|
||||
self.n_actions = n_actions
|
||||
self.use_agent_embedding = use_agent_embedding
|
||||
self.hidden_size_actor = hidden_size_actor
|
||||
self.hidden_size_critic = hidden_size_critic
|
||||
self.action_emb_size = action_emb_size
|
||||
self.obs_proj = nn.Linear(observation_size, obs_emb_size)
|
||||
self.action_emb = nn.Embedding(n_actions+1, action_emb_size, padding_idx=0)
|
||||
self.agent_emb = nn.Embedding(n_agents, action_emb_size)
|
||||
mix_in_size = obs_emb_size+action_emb_size if not use_agent_embedding else obs_emb_size+n_agents*action_emb_size
|
||||
self.mix = nn.Sequential(nn.Tanh(),
|
||||
nn.Linear(mix_in_size, obs_emb_size),
|
||||
nn.Tanh(),
|
||||
nn.Linear(obs_emb_size, obs_emb_size)
|
||||
)
|
||||
self.gru_actor = nn.GRU(obs_emb_size, hidden_size_actor, batch_first=True, num_layers=self.n_layers)
|
||||
self.gru_critic = nn.GRU(obs_emb_size, hidden_size_critic, batch_first=True, num_layers=self.n_layers)
|
||||
self.action_head = nn.Sequential(
|
||||
nn.Linear(hidden_size_actor, hidden_size_actor),
|
||||
nn.Tanh(),
|
||||
nn.Linear(hidden_size_actor, n_actions)
|
||||
)
|
||||
# spectral_norm(nn.Linear(hidden_size_actor, hidden_size_actor)),
|
||||
self.critic_head = nn.Sequential(
|
||||
nn.Linear(hidden_size_critic, hidden_size_critic),
|
||||
nn.Tanh(),
|
||||
nn.Linear(hidden_size_critic, 1)
|
||||
)
|
||||
#self.action_head[-1].weight.data.uniform_(-3e-3, 3e-3)
|
||||
#self.action_head[-1].bias.data.uniform_(-3e-3, 3e-3)
|
||||
|
||||
def init_hidden_actor(self):
|
||||
return torch.zeros(1, self.n_layers, self.hidden_size_actor)
|
||||
|
||||
def init_hidden_critic(self):
|
||||
return torch.zeros(1, self.n_layers, self.hidden_size_critic)
|
||||
|
||||
def forward(self, observations, actions, hidden_actor=None, hidden_critic=None):
|
||||
n_agents, t, *_ = observations.shape
|
||||
obs_emb = self.obs_proj(observations.view(n_agents, t, -1).float())
|
||||
action_emb = self.action_emb(actions+1) # shift by one due to padding idx
|
||||
|
||||
if not self.use_agent_embedding:
|
||||
x_t = torch.cat((obs_emb, action_emb), -1)
|
||||
else:
|
||||
agent_emb = self.agent_emb(
|
||||
torch.cat([torch.arange(0, n_agents, 1).view(-1, 1)] * t, 1)
|
||||
)
|
||||
x_t = torch.cat((obs_emb, agent_emb, action_emb), -1)
|
||||
|
||||
mixed_x_t = self.mix(x_t)
|
||||
output_p, _ = self.gru_actor(input=mixed_x_t, hx=hidden_actor.swapaxes(1, 0))
|
||||
output_c, _ = self.gru_critic(input=mixed_x_t, hx=hidden_critic.swapaxes(1, 0))
|
||||
|
||||
logits = self.action_head(output_p)
|
||||
critic = self.critic_head(output_c).squeeze(-1)
|
||||
return dict(logits=logits, critic=critic, hidden_actor=output_p, hidden_critic=output_c)
|
||||
|
||||
|
||||
class RecurrentACL2(RecurrentAC):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.action_head = nn.Sequential(
|
||||
nn.Linear(self.hidden_size_actor, self.hidden_size_actor),
|
||||
nn.Tanh(),
|
||||
NormalizedLinear(self.hidden_size_actor, self.n_actions, trainable_magnitude=True)
|
||||
)
|
||||
|
||||
|
||||
class NormalizedLinear(nn.Linear):
|
||||
def __init__(self, in_features: int, out_features: int,
|
||||
device=None, dtype=None, trainable_magnitude=False):
|
||||
super(NormalizedLinear, self).__init__(in_features, out_features, False, device, dtype)
|
||||
self.d_sqrt = in_features**0.5
|
||||
self.trainable_magnitude = trainable_magnitude
|
||||
self.scale = nn.Parameter(torch.tensor([1.]), requires_grad=trainable_magnitude)
|
||||
|
||||
def forward(self, in_array):
|
||||
normalized_input = F.normalize(in_array, dim=-1, p=2, eps=1e-5)
|
||||
normalized_weight = F.normalize(self.weight, dim=-1, p=2, eps=1e-5)
|
||||
return F.linear(normalized_input, normalized_weight) * self.d_sqrt * self.scale
|
||||
|
||||
|
||||
class L2Norm(nn.Module):
|
||||
def __init__(self, in_features, trainable_magnitude=False):
|
||||
super(L2Norm, self).__init__()
|
||||
self.d_sqrt = in_features**0.5
|
||||
self.scale = nn.Parameter(torch.tensor([1.]), requires_grad=trainable_magnitude)
|
||||
|
||||
def forward(self, x):
|
||||
return F.normalize(x, dim=-1, p=2, eps=1e-5) * self.d_sqrt * self.scale
|
@ -1,55 +0,0 @@
|
||||
import torch
|
||||
from torch.distributions import Categorical
|
||||
from marl_factory_grid.algorithms.marl.iac import LoopIAC
|
||||
from marl_factory_grid.algorithms.marl.base_ac import nms
|
||||
from marl_factory_grid.algorithms.marl.memory import MARLActorCriticMemory
|
||||
|
||||
|
||||
class LoopSEAC(LoopIAC):
|
||||
def __init__(self, cfg):
|
||||
super(LoopSEAC, self).__init__(cfg)
|
||||
|
||||
def actor_critic(self, tm, networks, gamma, entropy_coef, vf_coef, gae_coef=0.0, **kwargs):
|
||||
obs, actions, done, reward = tm.observation, tm.action, tm.done[:, 1:], tm.reward[:, 1:]
|
||||
outputs = [net(obs, actions, tm.hidden_actor[:, 0], tm.hidden_critic[:, 0]) for net in networks]
|
||||
|
||||
with torch.inference_mode(True):
|
||||
true_action_logp = torch.stack([
|
||||
torch.log_softmax(out[nms.LOGITS][ag_i, :-1], -1)
|
||||
.gather(index=actions[ag_i, 1:, None], dim=-1)
|
||||
for ag_i, out in enumerate(outputs)
|
||||
], 0).squeeze()
|
||||
|
||||
losses = []
|
||||
|
||||
for ag_i, out in enumerate(outputs):
|
||||
logits = out[nms.LOGITS][:, :-1] # last one only needed for v_{t+1}
|
||||
critic = out[nms.CRITIC]
|
||||
|
||||
entropy_loss = Categorical(logits=logits[ag_i]).entropy().mean()
|
||||
advantages = self.compute_advantages(critic, reward, done, gamma, gae_coef)
|
||||
|
||||
# policy loss
|
||||
log_ap = torch.log_softmax(logits, -1)
|
||||
log_ap = torch.gather(log_ap, dim=-1, index=actions[:, 1:].unsqueeze(-1)).squeeze()
|
||||
|
||||
# importance weights
|
||||
iw = (log_ap - true_action_logp).exp().detach() # importance_weights
|
||||
|
||||
a2c_loss = (-iw*log_ap * advantages.detach()).mean(-1)
|
||||
|
||||
value_loss = (iw*advantages.pow(2)).mean(-1) # n_agent
|
||||
|
||||
# weighted loss
|
||||
loss = (a2c_loss + vf_coef*value_loss - entropy_coef * entropy_loss).mean()
|
||||
losses.append(loss)
|
||||
|
||||
return losses
|
||||
|
||||
def learn(self, tms: MARLActorCriticMemory, **kwargs):
|
||||
losses = self.actor_critic(tms, self.net, **self.cfg[nms.ALGORITHM], **kwargs)
|
||||
for ag_i, loss in enumerate(losses):
|
||||
self.optimizer[ag_i].zero_grad()
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(self.net[ag_i].parameters(), 0.5)
|
||||
self.optimizer[ag_i].step()
|
@ -7,8 +7,8 @@ agent:
|
||||
hidden_size_critic: 64
|
||||
use_agent_embedding: False
|
||||
env:
|
||||
classname: marl_factory_grid.configs.custom
|
||||
env_name: "custom/dirt_quadrant_train_config"
|
||||
classname: marl_factory_grid.environment.configs.rl
|
||||
env_name: "rl/dirt_quadrant_train_config"
|
||||
n_agents: 1
|
||||
max_steps: 250
|
||||
pomdp_r: 2
|
@ -7,8 +7,8 @@ agent:
|
||||
hidden_size_critic: 64
|
||||
use_agent_embedding: False
|
||||
env:
|
||||
classname: marl_factory_grid.configs.custom
|
||||
env_name: "custom/two_rooms_one_door_modified_train_config"
|
||||
classname: marl_factory_grid.environment.configs.rl
|
||||
env_name: "rl/two_rooms_train_config"
|
||||
n_agents: 1
|
||||
max_steps: 250
|
||||
pomdp_r: 2
|
@ -1,33 +0,0 @@
|
||||
from marl_factory_grid.algorithms.marl.base_ac import BaseActorCritic
|
||||
from marl_factory_grid.algorithms.marl.base_ac import nms
|
||||
import torch
|
||||
from torch.distributions import Categorical
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class LoopSNAC(BaseActorCritic):
|
||||
def __init__(self, cfg):
|
||||
super().__init__(cfg)
|
||||
|
||||
def load_state_dict(self, path: Path):
|
||||
path2weights = list(path.glob('*.pt'))
|
||||
assert len(path2weights) == 1, f'Expected a single set of weights but got {len(path2weights)}'
|
||||
self.net.load_state_dict(torch.load(path2weights[0]))
|
||||
|
||||
def init_hidden(self):
|
||||
hidden_actor = self.net.init_hidden_actor()
|
||||
hidden_critic = self.net.init_hidden_critic()
|
||||
return dict(hidden_actor=torch.cat([hidden_actor] * self.n_agents, 0),
|
||||
hidden_critic=torch.cat([hidden_critic] * self.n_agents, 0)
|
||||
)
|
||||
|
||||
def get_actions(self, out):
|
||||
actions = Categorical(logits=out[nms.LOGITS]).sample().squeeze()
|
||||
return actions
|
||||
|
||||
def forward(self, observations, actions, hidden_actor, hidden_critic):
|
||||
out = self.net(self._as_torch(observations).unsqueeze(1),
|
||||
self._as_torch(actions).unsqueeze(1),
|
||||
hidden_actor, hidden_critic
|
||||
)
|
||||
return out
|
@ -37,7 +37,6 @@ class TSPBaseAgent(ABC):
|
||||
self._position_graph = self.generate_pos_graph()
|
||||
self._static_route = None
|
||||
self.cached_route = None
|
||||
self.fallback_action = None
|
||||
self.action_list = []
|
||||
|
||||
@abstractmethod
|
||||
@ -50,46 +49,6 @@ class TSPBaseAgent(ABC):
|
||||
"""
|
||||
return 0
|
||||
|
||||
def calculate_tsp_route(self, target_identifier):
|
||||
"""
|
||||
Calculate the TSP route to reach a target.
|
||||
|
||||
:param target_identifier: Identifier of the target entity
|
||||
:type target_identifier: str
|
||||
|
||||
:return: TSP route
|
||||
:rtype: List[int]
|
||||
"""
|
||||
target_positions = [x for x in self._env.state[target_identifier].positions if x != c.VALUE_NO_POS]
|
||||
|
||||
# if there are cached routes, search for one matching the current and target position
|
||||
if self._env.state.route_cache and (
|
||||
route := self._env.state.get_cached_route(self.state.pos, target_positions)) is not None:
|
||||
# print(f"Retrieved cached route: {route}")
|
||||
return route
|
||||
# if none are found, calculate tsp route and cache it
|
||||
else:
|
||||
start_time = time.time()
|
||||
if self.local_optimization:
|
||||
nodes = \
|
||||
[self.state.pos] + \
|
||||
[x for x in target_positions if max(abs(np.subtract(x, self.state.pos))) < 3]
|
||||
try:
|
||||
while len(nodes) < 7:
|
||||
nodes += [next(x for x in target_positions if x not in nodes)]
|
||||
except StopIteration:
|
||||
nodes = [self.state.pos] + target_positions
|
||||
|
||||
else:
|
||||
nodes = [self.state.pos] + target_positions
|
||||
|
||||
route = tsp.traveling_salesman_problem(self._position_graph,
|
||||
nodes=nodes, cycle=True, method=tsp.greedy_tsp)
|
||||
duration = time.time() - start_time
|
||||
print("TSP calculation took {:.2f} seconds to execute".format(duration))
|
||||
self._env.state.cache_route(route)
|
||||
return route
|
||||
|
||||
def _use_door_or_move(self, door, target):
|
||||
"""
|
||||
Helper method to decide whether to use a door or move towards a target.
|
||||
@ -108,6 +67,47 @@ class TSPBaseAgent(ABC):
|
||||
action = self._predict_move(target)
|
||||
return action
|
||||
|
||||
def calculate_tsp_route(self, target_identifier):
|
||||
"""
|
||||
Calculate the TSP route to reach a target.
|
||||
|
||||
:param target_identifier: Identifier of the target entity
|
||||
:type target_identifier: str
|
||||
|
||||
:return: TSP route
|
||||
:rtype: List[int]
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
if self.cached_route is not None:
|
||||
#print(f" Used cached route: {self.cached_route}")
|
||||
return copy.deepcopy(self.cached_route)
|
||||
|
||||
else:
|
||||
positions = [x for x in self._env.state[target_identifier].positions if x != c.VALUE_NO_POS]
|
||||
if self.local_optimization:
|
||||
nodes = \
|
||||
[self.state.pos] + \
|
||||
[x for x in positions if max(abs(np.subtract(x, self.state.pos))) < 3]
|
||||
try:
|
||||
while len(nodes) < 7:
|
||||
nodes += [next(x for x in positions if x not in nodes)]
|
||||
except StopIteration:
|
||||
nodes = [self.state.pos] + positions
|
||||
|
||||
else:
|
||||
nodes = [self.state.pos] + positions
|
||||
|
||||
route = tsp.traveling_salesman_problem(self._position_graph,
|
||||
nodes=nodes, cycle=True, method=tsp.greedy_tsp)
|
||||
self.cached_route = copy.deepcopy(route)
|
||||
#print(f"Cached route: {self.cached_route}")
|
||||
|
||||
end_time = time.time()
|
||||
duration = end_time - start_time
|
||||
#print("TSP calculation took {:.2f} seconds to execute".format(duration))
|
||||
return route
|
||||
|
||||
def _door_is_close(self, state):
|
||||
"""
|
||||
Check if a door is close to the agent's position.
|
||||
@ -173,11 +173,8 @@ class TSPBaseAgent(ABC):
|
||||
action = next(action for action, pos_diff in MOVEMAP.items() if
|
||||
np.all(diff == pos_diff) and action in allowed_directions)
|
||||
except StopIteration:
|
||||
print(f"No valid action found for pos diff: {diff}. Using fallback action: {self.fallback_action}.")
|
||||
if self.fallback_action and any(self.fallback_action == action.name for action in self.state.actions):
|
||||
action = self.fallback_action
|
||||
else:
|
||||
action = choice(self.state.actions).name
|
||||
print(f"No valid action found for pos diff: {diff}. Using fallback action.")
|
||||
action = choice(self.state.actions).name
|
||||
else:
|
||||
action = choice(self.state.actions).name
|
||||
# noinspection PyUnboundLocalVariable
|
||||
|
@ -1,76 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from marl_factory_grid.algorithms.static.TSP_base_agent import TSPBaseAgent
|
||||
|
||||
from marl_factory_grid.modules.items import constants as i
|
||||
from marl_factory_grid.environment import constants as c
|
||||
|
||||
future_planning = 7
|
||||
inventory_size = 3
|
||||
|
||||
MODE_GET = 'Mode_Get'
|
||||
MODE_BRING = 'Mode_Bring'
|
||||
|
||||
|
||||
class TSPItemAgent(TSPBaseAgent):
|
||||
|
||||
def __init__(self, *args, mode=MODE_GET, **kwargs):
|
||||
"""
|
||||
Initializes a TSPItemAgent that colects items in the environment, stores them in his inventory and drops them off
|
||||
at a drop-off location.
|
||||
|
||||
:param mode: Mode of the agent, either MODE_GET or MODE_BRING.
|
||||
"""
|
||||
super(TSPItemAgent, self).__init__(*args, **kwargs)
|
||||
self.mode = mode
|
||||
self.fallback_action = c.NOOP
|
||||
|
||||
def predict(self, *_, **__):
|
||||
item_at_position = self._env.state[i.ITEM].by_pos(self.state.pos)
|
||||
dropoff_at_position = self._env.state[i.DROP_OFF].by_pos(self.state.pos)
|
||||
if item_at_position:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = i.ITEM_ACTION
|
||||
elif dropoff_at_position:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = i.ITEM_ACTION
|
||||
elif door := self._door_is_close(self._env.state):
|
||||
action = self._use_door_or_move(door, i.DROP_OFF if self.mode == MODE_BRING else i.ITEM)
|
||||
else:
|
||||
action = self._choose()
|
||||
self.action_list.append(action)
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
try:
|
||||
action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
|
||||
except (StopIteration, UnboundLocalError):
|
||||
print('Will not happen')
|
||||
raise EnvironmentError
|
||||
# noinspection PyUnboundLocalVariable
|
||||
if self.mode == MODE_BRING and len(self._env[i.INVENTORY].by_entity(self.state)):
|
||||
pass
|
||||
elif self.mode == MODE_BRING and not len(self._env[i.INVENTORY].by_entity(self.state)):
|
||||
self.mode = MODE_GET
|
||||
elif self.mode == MODE_GET and len(self._env[i.INVENTORY].by_entity(self.state)) > inventory_size:
|
||||
self.mode = MODE_BRING
|
||||
else:
|
||||
pass
|
||||
return action_obj
|
||||
|
||||
def _choose(self):
|
||||
"""
|
||||
Internal Usage. Chooses the action based on the agent's mode and the environment state.
|
||||
|
||||
:return: Chosen action.
|
||||
:rtype: int
|
||||
"""
|
||||
target = i.DROP_OFF if self.mode == MODE_BRING else i.ITEM
|
||||
if len(self._env.state[i.ITEM]) >= 1:
|
||||
action = self._predict_move(target)
|
||||
|
||||
elif len(self._env[i.INVENTORY].by_entity(self.state)):
|
||||
self.mode = MODE_BRING
|
||||
action = self._predict_move(target)
|
||||
else:
|
||||
action = int(np.random.randint(self._env.action_space.n))
|
||||
# noinspection PyUnboundLocalVariable
|
||||
return action
|
@ -1,27 +0,0 @@
|
||||
from random import randint
|
||||
|
||||
from marl_factory_grid.algorithms.static.TSP_base_agent import TSPBaseAgent
|
||||
|
||||
future_planning = 7
|
||||
|
||||
|
||||
class TSPRandomAgent(TSPBaseAgent):
|
||||
|
||||
def __init__(self, n_actions, *args, **kwargs):
|
||||
"""
|
||||
Initializes a TSPRandomAgent that performs random actions from within his action space.
|
||||
|
||||
:param n_actions: Number of possible actions.
|
||||
:type n_actions: int
|
||||
"""
|
||||
super(TSPRandomAgent, self).__init__(*args, **kwargs)
|
||||
self.n_action = n_actions
|
||||
|
||||
def predict(self, *_, **__):
|
||||
"""
|
||||
Predicts the next action randomly.
|
||||
|
||||
:return: Predicted action.
|
||||
:rtype: int
|
||||
"""
|
||||
return randint(0, self.n_action - 1)
|
@ -58,7 +58,7 @@ def load_yaml_file(path: Path):
|
||||
|
||||
def add_env_props(cfg):
|
||||
# Path to config File
|
||||
env_path = Path(f'../marl_factory_grid/configs/{cfg["env"]["env_name"]}.yaml')
|
||||
env_path = Path(f'../marl_factory_grid/environment/configs/{cfg["env"]["env_name"]}.yaml')
|
||||
|
||||
# Env Init
|
||||
factory = Factory(env_path)
|
||||
|
@ -1,66 +0,0 @@
|
||||
General:
|
||||
env_seed: 69
|
||||
individual_rewards: true
|
||||
level_name: obs_test_map
|
||||
pomdp_r: 0
|
||||
verbose: True
|
||||
tests: false
|
||||
|
||||
Agents:
|
||||
Wolfgang:
|
||||
Actions:
|
||||
- Noop
|
||||
Observations:
|
||||
- Walls
|
||||
- Doors
|
||||
- Other
|
||||
- DirtPiles
|
||||
Positions:
|
||||
- (1, 3)
|
||||
|
||||
Soeren:
|
||||
Actions:
|
||||
- Noop
|
||||
Observations:
|
||||
- Walls
|
||||
- Doors
|
||||
- Other
|
||||
- DirtPiles
|
||||
Positions:
|
||||
- (1, 1)
|
||||
|
||||
Juergen:
|
||||
Actions:
|
||||
- Noop
|
||||
Observations:
|
||||
- Walls
|
||||
- Doors
|
||||
- Other
|
||||
- DirtPiles
|
||||
Positions:
|
||||
- (1, 2)
|
||||
|
||||
Walter:
|
||||
Actions:
|
||||
- Noop
|
||||
Observations:
|
||||
- Walls
|
||||
- Doors
|
||||
- Other
|
||||
- DirtPiles
|
||||
Positions:
|
||||
- (1, 4)
|
||||
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
Doors:
|
||||
|
||||
Rules:
|
||||
# Utilities
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 500
|
@ -1,92 +0,0 @@
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: rooms
|
||||
# Radius of Partially observable Markov decision process
|
||||
pomdp_r: 3
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
||||
# other agents aim to clean dirt piles.
|
||||
Agents:
|
||||
# The clean agents
|
||||
Wolfgang:
|
||||
Actions:
|
||||
- Move8
|
||||
- DoorUse
|
||||
- Clean
|
||||
- Noop
|
||||
Observations:
|
||||
- Walls
|
||||
- Doors
|
||||
- Other
|
||||
- DirtPiles
|
||||
Clones: 8
|
||||
|
||||
# The item agent
|
||||
Juergen:
|
||||
Actions:
|
||||
- Move8
|
||||
- DoorUse
|
||||
- ItemAction
|
||||
- Noop
|
||||
Observations:
|
||||
- Walls
|
||||
- Doors
|
||||
- Other
|
||||
- Items
|
||||
- DropOffLocations
|
||||
- Inventory
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: 10
|
||||
initial_amount: 2
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0.1
|
||||
max_global_amount: 20
|
||||
max_local_amount: 5
|
||||
Doors:
|
||||
DropOffLocations:
|
||||
coords_or_quantity: 1
|
||||
max_dropoff_storage_size: 0
|
||||
Inventories: { }
|
||||
Items:
|
||||
coords_or_quantity: 5
|
||||
|
||||
# Rules section specifies the rules governing the dynamics of the environment.
|
||||
Rules:
|
||||
# Environment Dynamics
|
||||
# When stepping over a dirt pile, entities carry a ratio of the dirt to their next position
|
||||
EntitiesSmearDirtOnMove:
|
||||
smear_ratio: 0.2
|
||||
# Doors automatically close after a certain number of time steps
|
||||
DoorAutoClose:
|
||||
close_frequency: 7
|
||||
|
||||
# Respawn Stuff
|
||||
# Define how dirt should respawn after the initial spawn
|
||||
RespawnDirt:
|
||||
respawn_freq: 30
|
||||
# Define how items should respawn after the initial spawn
|
||||
RespawnItems:
|
||||
respawn_freq: 50
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||
# The environment stops when all dirt is cleaned
|
||||
DoneOnAllDirtCleaned:
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 500
|
@ -1,73 +0,0 @@
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: quadrant
|
||||
# Radius of Partially observable Markov decision process
|
||||
pomdp_r: 0 # default 3
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
||||
# other agents aim to clean dirt piles.
|
||||
Agents:
|
||||
# The clean agents
|
||||
Sigmund:
|
||||
Actions:
|
||||
- Move4
|
||||
#- Clean
|
||||
#- Noop
|
||||
Observations:
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
- (9,1)
|
||||
- (4,5)
|
||||
- (1,1)
|
||||
- (4,5)
|
||||
- (9,1)
|
||||
- (9,9)
|
||||
Wolfgang:
|
||||
Actions:
|
||||
- Move4
|
||||
#- Clean
|
||||
#- Noop
|
||||
Observations:
|
||||
- DirtPiles
|
||||
- Self
|
||||
Positions:
|
||||
- (9,5)
|
||||
- (4,5)
|
||||
- (1,1)
|
||||
- (4,5)
|
||||
- (9,5)
|
||||
- (9,9)
|
||||
|
||||
Entities:
|
||||
DirtPiles:
|
||||
coords_or_quantity: (9,9), (1,1), (4,5) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0
|
||||
max_global_amount: 12
|
||||
max_local_amount: 1
|
||||
|
||||
# Rules section specifies the rules governing the dynamics of the environment.
|
||||
Rules:
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||
# The environment stops when all dirt is cleaned
|
||||
DoneOnAllDirtCleaned:
|
||||
#DoneAtMaxStepsReached: # An episode should last for at most max_steps steps
|
||||
#max_steps: 100
|
@ -1,146 +0,0 @@
|
||||
# Default Configuration File
|
||||
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: large
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 3
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# Agents section defines the characteristics of different agents in the environment.
|
||||
|
||||
# An Agent requires a list of actions and observations.
|
||||
# Possible actions: Noop, Charge, Clean, DestAction, DoorUse, ItemAction, MachineAction, Move8, Move4, North, NorthEast, ...
|
||||
# Possible observations: All, Combined, GlobalPosition, Battery, ChargePods, DirtPiles, Destinations, Doors, Items, Inventory, DropOffLocations, Maintainers, ...
|
||||
# You can use 'clone' as the agent name to have multiple instances with either a list of names or an int specifying the number of clones.
|
||||
Agents:
|
||||
Wolfgang:
|
||||
Actions:
|
||||
- Noop
|
||||
- Charge
|
||||
- Clean
|
||||
- DestAction
|
||||
- DoorUse
|
||||
- ItemAction
|
||||
- Move8
|
||||
Observations:
|
||||
- Combined:
|
||||
- Other
|
||||
- Walls
|
||||
- GlobalPosition
|
||||
- Battery
|
||||
- ChargePods
|
||||
- DirtPiles
|
||||
- Destinations
|
||||
- Doors
|
||||
- Items
|
||||
- Inventory
|
||||
- DropOffLocations
|
||||
- Maintainers
|
||||
|
||||
# Entities section defines the initial parameters and behaviors of different entities in the environment.
|
||||
# Entities all spawn using coords_or_quantity, a number of entities or coordinates to place them.
|
||||
Entities:
|
||||
# Batteries: Entities representing power sources for agents.
|
||||
Batteries:
|
||||
initial_charge: 0.8
|
||||
per_action_costs: 0.02
|
||||
|
||||
# ChargePods: Entities representing charging stations for Batteries.
|
||||
ChargePods:
|
||||
coords_or_quantity: 2
|
||||
|
||||
# Destinations: Entities representing target locations for agents.
|
||||
# - spawn_mode: GROUPED or SINGLE. Determines how destinations are spawned.
|
||||
Destinations:
|
||||
coords_or_quantity: 1
|
||||
spawn_mode: GROUPED
|
||||
|
||||
# DirtPiles: Entities representing piles of dirt.
|
||||
# - initial_amount: Initial amount of dirt in each pile.
|
||||
# - clean_amount: Amount of dirt cleaned in each cleaning action.
|
||||
# - dirt_spawn_r_var: Random variation in dirt spawn amounts.
|
||||
# - max_global_amount: Maximum total amount of dirt allowed in the environment.
|
||||
# - max_local_amount: Maximum amount of dirt allowed in one position.
|
||||
DirtPiles:
|
||||
coords_or_quantity: 10
|
||||
initial_amount: 2
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0.1
|
||||
max_global_amount: 20
|
||||
max_local_amount: 5
|
||||
|
||||
# Doors are spawned using the level map.
|
||||
Doors:
|
||||
|
||||
# DropOffLocations: Entities representing locations where agents can drop off items.
|
||||
# - max_dropoff_storage_size: Maximum storage capacity at each drop-off location.
|
||||
DropOffLocations:
|
||||
coords_or_quantity: 1
|
||||
max_dropoff_storage_size: 0
|
||||
|
||||
# GlobalPositions.
|
||||
GlobalPositions: { }
|
||||
|
||||
# Inventories: Entities representing inventories for agents.
|
||||
Inventories: { }
|
||||
|
||||
# Items: Entities representing items in the environment.
|
||||
Items:
|
||||
coords_or_quantity: 5
|
||||
|
||||
# Machines: Entities representing machines in the environment.
|
||||
Machines:
|
||||
coords_or_quantity: 2
|
||||
|
||||
# Maintainers: Entities representing maintainers that aim to maintain machines.
|
||||
Maintainers:
|
||||
coords_or_quantity: 1
|
||||
|
||||
|
||||
# Rules section specifies the rules governing the dynamics of the environment.
|
||||
Rules:
|
||||
# Environment Dynamics
|
||||
# When stepping over a dirt pile, entities carry a ratio of the dirt to their next position
|
||||
EntitiesSmearDirtOnMove:
|
||||
smear_ratio: 0.2
|
||||
# Doors automatically close after a certain number of time steps
|
||||
DoorAutoClose:
|
||||
close_frequency: 10
|
||||
# Maintainers move at every time step
|
||||
MoveMaintainers:
|
||||
|
||||
# Respawn Stuff
|
||||
# Define how dirt should respawn after the initial spawn
|
||||
RespawnDirt:
|
||||
respawn_freq: 15
|
||||
# Define how items should respawn after the initial spawn
|
||||
RespawnItems:
|
||||
respawn_freq: 15
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||
# The environment stops when an agent reaches a destination
|
||||
DoneAtDestinationReach:
|
||||
# The environment stops when all dirt is cleaned
|
||||
DoneOnAllDirtCleaned:
|
||||
# The environment stops when a battery is discharged
|
||||
DoneAtBatteryDischarge:
|
||||
# The environment stops when a maintainer reports a collision
|
||||
DoneAtMaintainerCollision:
|
||||
# The environment stops after max steps
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 500
|
@ -1,89 +0,0 @@
|
||||
# Gneral env. settings.
|
||||
General:
|
||||
# Just the best seed.
|
||||
env_seed: 69
|
||||
# Each agent receives an inividual Reward.
|
||||
individual_rewards: true
|
||||
# level file to load from .\levels\.
|
||||
level_name: eight_puzzle
|
||||
# Partial Observability. 0 = Full Observation.
|
||||
pomdp_r: 0
|
||||
# Please do not spam me.
|
||||
verbose: false
|
||||
# Do not touch, WIP
|
||||
tests: false
|
||||
|
||||
# RL Surrogates
|
||||
Agents:
|
||||
# This defines the name of the agent. UTF-8
|
||||
Wolfgang:
|
||||
# Section which defines the availabll Actions per Agent
|
||||
Actions:
|
||||
# Move4 adds 4 actions [`North`, `East`, `South`, `West`]
|
||||
Move4:
|
||||
# Reward specification which differ from the default.
|
||||
# Agent does a valid move in the environment. He actually moves.
|
||||
valid_reward: -0.1
|
||||
# Agent wants to move, but fails.
|
||||
fail_reward: 0
|
||||
# NOOP aka agent does not do a thing.
|
||||
Noop:
|
||||
# The Agent decides to not do anything. Which is always valid.
|
||||
valid_reward: 0
|
||||
# Does not do anything, just using the same interface.
|
||||
fail_reward: 0
|
||||
# What the agent wants to see.
|
||||
Observations:
|
||||
# The agent...
|
||||
# sees other agents, but himself.
|
||||
- Other
|
||||
# wants to see walls
|
||||
- Walls
|
||||
# sees his associated Destination (singular). Use the Plural for `see all destinations`.
|
||||
- Destination
|
||||
# You want to have 7 clones, also possible to name them by giving names as list.
|
||||
Clones: 7
|
||||
# Agents are blocking their grid position from beeing entered by others.
|
||||
is_blocking_pos: true
|
||||
# Apart from agents, which additional endities do you want to load?
|
||||
Entities:
|
||||
# Observable destinations, which can be reached by stepping on the same position. Has additional parameters...
|
||||
Destinations:
|
||||
# Let them spawn on closed doors and agent positions
|
||||
ignore_blocking: true
|
||||
# For 8-Puzzle, we need a special spawn rule...
|
||||
spawnrule:
|
||||
# ...which spawn a single position just underneath an associated agent.
|
||||
SpawnDestinationOnAgent: {} # There are no parameters, so we state empty kwargs.
|
||||
|
||||
# This section defines which operations are performed beside agent action.
|
||||
# Without this section nothing happens, not even Done-condition checks.
|
||||
# Also, situation based rewards are specidief this way.
|
||||
Rules:
|
||||
## Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
# Can be omited/ignored if you do not want to take care of collisions at all.
|
||||
# This does not mean, that agents can not collide, its just ignored.
|
||||
WatchCollisions:
|
||||
reward: 0
|
||||
done_at_collisions: false
|
||||
|
||||
# In 8 Puzzle, do not randomize the start positions, rather move a random agent onto the single free position n-times.
|
||||
DoRandomInitialSteps:
|
||||
# How many times?
|
||||
random_steps: 2
|
||||
|
||||
## Done Conditions
|
||||
# Maximum steps per episode. There is no reward for failing.
|
||||
DoneAtMaxStepsReached:
|
||||
# After how many steps should the episode end?
|
||||
max_steps: 200
|
||||
|
||||
# For 8 Puzzle we need a done condition that checks whether destinations have been reached, so...
|
||||
DoneAtDestinationReach:
|
||||
# On every step, should there be a reward for agets that reach their associated destination? No!
|
||||
dest_reach_reward: 0 # Do not touch. This is usefull in other settings!
|
||||
# Reward should only be given when all destiantions are reached in parallel!
|
||||
condition: "simultaneous"
|
||||
# Reward if this is the case. Granted to each agent when all agents are at their target position simultaniously.
|
||||
reward_at_done: 1
|
@ -1,92 +0,0 @@
|
||||
General:
|
||||
# Your Seed
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: narrow_corridor
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 0
|
||||
# print all messages and events
|
||||
verbose: true
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
Agents:
|
||||
# Agents are identified by their name
|
||||
Wolfgang:
|
||||
# The available actions for this particular agent
|
||||
Actions:
|
||||
# Able to do nothing
|
||||
- Noop
|
||||
# Able to move in all 8 directions
|
||||
- Move8
|
||||
# Stuff the agent can observe (per 2d slice)
|
||||
# use "Combined" if you want to merge multiple slices into one
|
||||
Observations:
|
||||
# He sees walls
|
||||
- Walls
|
||||
# he sees other agent, "karl-Heinz" in this setting would be fine, too
|
||||
- Other
|
||||
# He can see Destinations, that are assigned to him (hence the singular)
|
||||
- Destination
|
||||
# Avaiable Spawn Positions as list
|
||||
Positions:
|
||||
- (2, 1)
|
||||
- (2, 5)
|
||||
# It is okay to collide with other agents, so that
|
||||
# they end up on the same position
|
||||
is_blocking_pos: true
|
||||
# See Above....
|
||||
Karl-Heinz:
|
||||
Actions:
|
||||
- Noop
|
||||
- Move8
|
||||
Observations:
|
||||
- Walls
|
||||
- Other
|
||||
- Destination
|
||||
Positions:
|
||||
- (2, 1)
|
||||
- (2, 5)
|
||||
is_blocking_pos: true
|
||||
|
||||
# Other noteworthy Entitites
|
||||
Entities:
|
||||
# The destiantions or positional targets to reach
|
||||
Destinations:
|
||||
# Let them spawn on closed doors and agent positions
|
||||
ignore_blocking: true
|
||||
# We need a special spawn rule...
|
||||
spawnrule:
|
||||
# ...which assigns the destinations per agent
|
||||
SpawnDestinationsPerAgent:
|
||||
# we use this parameter
|
||||
coords_or_quantity:
|
||||
# to enable and assign special positions per agent
|
||||
Wolfgang:
|
||||
- (2, 1)
|
||||
- (2, 5)
|
||||
Karl-Heinz:
|
||||
- (2, 1)
|
||||
- (2, 5)
|
||||
# Whether you want to provide a numeric Position observation.
|
||||
# GlobalPositions:
|
||||
# normalized: false
|
||||
|
||||
# Define the env. dynamics
|
||||
Rules:
|
||||
# Utilities
|
||||
# This rule Checks for Collision, also it assigns the (negative) reward
|
||||
WatchCollisions:
|
||||
reward: -0.1
|
||||
reward_at_done: -1
|
||||
done_at_collisions: false
|
||||
# Done Conditions
|
||||
# Load any of the rules, to check for done conditions.
|
||||
DoneAtDestinationReach:
|
||||
reward_at_done: 1
|
||||
# We want to give rewards only, when all targets have been reached.
|
||||
condition: "all"
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 200
|
@ -1,70 +0,0 @@
|
||||
General:
|
||||
# Your Seed
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
level_name: simple_crossing
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 0
|
||||
verbose: false
|
||||
tests: false
|
||||
|
||||
Agents:
|
||||
Agent_horizontal:
|
||||
Actions:
|
||||
- Noop
|
||||
- Move4
|
||||
Observations:
|
||||
- Walls
|
||||
- Other
|
||||
- Destination
|
||||
# Avaiable Spawn Positions as list
|
||||
Positions:
|
||||
- (2,1)
|
||||
# It is okay to collide with other agents, so that
|
||||
# they end up on the same position
|
||||
is_blocking_pos: false
|
||||
Agent_vertical:
|
||||
Actions:
|
||||
- Noop
|
||||
- Move4
|
||||
Observations:
|
||||
- Walls
|
||||
- Other
|
||||
- Destination
|
||||
Positions:
|
||||
- (1,2)
|
||||
is_blocking_pos: false
|
||||
|
||||
# Other noteworthy Entitites
|
||||
Entities:
|
||||
Destinations:
|
||||
# Let them spawn on closed doors and agent positions
|
||||
ignore_blocking: true
|
||||
spawnrule:
|
||||
SpawnDestinationsPerAgent:
|
||||
coords_or_quantity:
|
||||
Agent_horizontal:
|
||||
- (2,3)
|
||||
Agent_vertical:
|
||||
- (3,2)
|
||||
# Whether you want to provide a numeric Position observation.
|
||||
# GlobalPositions:
|
||||
# normalized: false
|
||||
|
||||
# Define the env. dynamics
|
||||
Rules:
|
||||
# Utilities
|
||||
# This rule Checks for Collision, also it assigns the (negative) reward
|
||||
WatchCollisions:
|
||||
reward: -0.1
|
||||
reward_at_done: -1
|
||||
done_at_collisions: false
|
||||
# Done Conditions
|
||||
# Load any of the rules, to check for done conditions.
|
||||
DoneAtDestinationReach:
|
||||
reward_at_done: 1
|
||||
# We want to give rewards only, when all targets have been reached.
|
||||
condition: "all"
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 200
|
@ -1,124 +0,0 @@
|
||||
Agents:
|
||||
# Clean test agent:
|
||||
# Actions:
|
||||
# - Noop
|
||||
# - Charge
|
||||
# - Clean
|
||||
# - DoorUse
|
||||
# - Move8
|
||||
# Observations:
|
||||
# - Combined:
|
||||
# - Other
|
||||
# - Walls
|
||||
# - GlobalPosition
|
||||
# - Battery
|
||||
# - ChargePods
|
||||
# - DirtPiles
|
||||
# - Destinations
|
||||
# - Doors
|
||||
# - Maintainers
|
||||
# Clones: 0
|
||||
# Item test agent:
|
||||
# Actions:
|
||||
# - Noop
|
||||
# - Charge
|
||||
# - DestAction
|
||||
# - DoorUse
|
||||
# - ItemAction
|
||||
# - Move8
|
||||
# Observations:
|
||||
# - Combined:
|
||||
# - Other
|
||||
# - Walls
|
||||
# - GlobalPosition
|
||||
# - Battery
|
||||
# - ChargePods
|
||||
# - Destinations
|
||||
# - Doors
|
||||
# - Items
|
||||
# - Inventory
|
||||
# - DropOffLocations
|
||||
# - Maintainers
|
||||
# Clones: 0
|
||||
Target test agent:
|
||||
Actions:
|
||||
- Noop
|
||||
- Charge
|
||||
- DoorUse
|
||||
- Move8
|
||||
Observations:
|
||||
- Combined:
|
||||
- Other
|
||||
- Walls
|
||||
- GlobalPosition
|
||||
- Battery
|
||||
- Destinations
|
||||
- Doors
|
||||
- Maintainers
|
||||
Clones: 1
|
||||
|
||||
Entities:
|
||||
|
||||
Batteries:
|
||||
initial_charge: 0.8
|
||||
per_action_costs: 0.02
|
||||
ChargePods:
|
||||
coords_or_quantity: 2
|
||||
Destinations:
|
||||
coords_or_quantity: 1
|
||||
spawn_mode: GROUPED
|
||||
DirtPiles:
|
||||
coords_or_quantity: 10
|
||||
initial_amount: 2
|
||||
clean_amount: 1
|
||||
dirt_spawn_r_var: 0.1
|
||||
max_global_amount: 20
|
||||
max_local_amount: 5
|
||||
Doors:
|
||||
DropOffLocations:
|
||||
coords_or_quantity: 1
|
||||
max_dropoff_storage_size: 0
|
||||
GlobalPositions: {}
|
||||
Inventories: {}
|
||||
Items:
|
||||
coords_or_quantity: 5
|
||||
Machines:
|
||||
coords_or_quantity: 2
|
||||
Maintainers:
|
||||
coords_or_quantity: 1
|
||||
|
||||
General:
|
||||
env_seed: 69
|
||||
individual_rewards: true
|
||||
level_name: quadrant
|
||||
pomdp_r: 3
|
||||
verbose: false
|
||||
tests: false
|
||||
|
||||
Rules:
|
||||
# Environment Dynamics
|
||||
EntitiesSmearDirtOnMove:
|
||||
smear_ratio: 0.2
|
||||
DoorAutoClose:
|
||||
close_frequency: 10
|
||||
MoveMaintainers:
|
||||
|
||||
# Respawn Stuff
|
||||
RespawnDirt:
|
||||
respawn_freq: 15
|
||||
RespawnItems:
|
||||
respawn_freq: 15
|
||||
|
||||
# Utilities
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 20
|
||||
|
||||
Tests:
|
||||
# MaintainerTest: {}
|
||||
# DirtAgentTest: {}
|
||||
# ItemAgentTest: {}
|
||||
# TargetAgentTest: {}
|
@ -1,69 +0,0 @@
|
||||
General:
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 3
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
|
||||
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
|
||||
Agents:
|
||||
Wolfgang:
|
||||
Actions:
|
||||
- Move8
|
||||
- Noop
|
||||
- DestAction
|
||||
- DoorUse
|
||||
Observations:
|
||||
- Walls
|
||||
- Other
|
||||
- Doors
|
||||
- Destination
|
||||
Sigmund:
|
||||
Actions:
|
||||
- Move8
|
||||
- Noop
|
||||
- DestAction
|
||||
- DoorUse
|
||||
Observations:
|
||||
- Combined:
|
||||
- Other
|
||||
- Walls
|
||||
- Destination
|
||||
- Doors
|
||||
|
||||
Entities:
|
||||
Destinations:
|
||||
spawnrule:
|
||||
SpawnDestinationsPerAgent:
|
||||
coords_or_quantity:
|
||||
Wolfgang:
|
||||
- (6,12)
|
||||
Sigmund:
|
||||
- (6, 2)
|
||||
|
||||
Doors: { }
|
||||
GlobalPositions: { }
|
||||
|
||||
Rules:
|
||||
# Environment Dynamics
|
||||
DoorAutoClose:
|
||||
close_frequency: 10
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Init
|
||||
AssignGlobalPositions: { }
|
||||
|
||||
# Done Conditions
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 10
|
@ -3,7 +3,7 @@ General:
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms_modified
|
||||
level_name: two_rooms
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 0
|
||||
# Print all messages and events
|
@ -3,7 +3,7 @@ General:
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms_modified
|
||||
level_name: two_rooms
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 0
|
||||
# Print all messages and events
|
@ -3,7 +3,7 @@ General:
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms_modified
|
||||
level_name: two_rooms
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 0
|
||||
# Print all messages and events
|
@ -3,7 +3,7 @@ General:
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms_modified
|
||||
level_name: two_rooms
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 0
|
||||
# Print all messages and events
|
@ -3,7 +3,7 @@ General:
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms_modified
|
||||
level_name: two_rooms
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 0
|
||||
# Print all messages and events
|
@ -109,7 +109,6 @@ class Factory(gym.Env):
|
||||
|
||||
# expensive - don't use; unless required !
|
||||
self._renderer = None
|
||||
self._recorder = None
|
||||
|
||||
# Init entities
|
||||
entities = self.map.do_init()
|
||||
@ -278,7 +277,7 @@ class Factory(gym.Env):
|
||||
for render_entity in render_entities:
|
||||
if render_entity.name == c.AGENT:
|
||||
render_entity.aux = self.obs_builder.curr_lightmaps[render_entity.real_name]
|
||||
return self._renderer.render(render_entities, self._recorder)
|
||||
return self._renderer.render(render_entities)
|
||||
|
||||
def set_recorder(self, recorder):
|
||||
self._recorder = recorder
|
||||
|
@ -1,5 +1,5 @@
|
||||
MOVEMENTS_VALID: float = -1 # default: -0.001
|
||||
MOVEMENTS_FAIL: float = -1 # default: -0.05
|
||||
MOVEMENTS_VALID: float = -1
|
||||
MOVEMENTS_FAIL: float = -1
|
||||
NOOP: float = -1
|
||||
COLLISION: float = -1
|
||||
COLLISION_DONE: float = -1
|
||||
|
@ -1,11 +1,6 @@
|
||||
import unittest
|
||||
from typing import List
|
||||
|
||||
import marl_factory_grid.modules.maintenance.constants as M
|
||||
from marl_factory_grid.environment.entity.agent import Agent
|
||||
from marl_factory_grid.modules import Door, Machine, DirtPile, Item, DropOffLocation, ItemAction
|
||||
from marl_factory_grid.utils.results import TickResult, DoneResult, ActionResult
|
||||
import marl_factory_grid.environment.constants as c
|
||||
from marl_factory_grid.utils.results import TickResult, DoneResult
|
||||
|
||||
|
||||
class Test(unittest.TestCase):
|
||||
@ -41,235 +36,3 @@ class Test(unittest.TestCase):
|
||||
|
||||
def on_check_done(self, state) -> List[DoneResult]:
|
||||
return []
|
||||
|
||||
|
||||
class MaintainerTest(Test):
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Tests whether the maintainer performs the correct actions and whether his actions register correctly in the env.
|
||||
"""
|
||||
super().__init__()
|
||||
self.temp_state_dict = {}
|
||||
pass
|
||||
|
||||
def tick_step(self, state) -> List[TickResult]:
|
||||
for maintainer in state.entities[M.MAINTAINERS]:
|
||||
self.assertIsInstance(maintainer.state, (ActionResult, TickResult))
|
||||
# print(f"state validity maintainer: {maintainer.state.validity}")
|
||||
|
||||
# will open doors when standing in front
|
||||
if maintainer._closed_door_in_path(state):
|
||||
self.assertEqual(maintainer.get_move_action(state).name, 'use_door')
|
||||
|
||||
# if maintainer._next and not maintainer._path:
|
||||
# finds valid targets when at target location
|
||||
# route = maintainer.calculate_route(maintainer._last[-1], state.floortile_graph)
|
||||
# if entities_at_target_location := [entity for entity in state.entities.by_pos(route[-1])]:
|
||||
# self.assertTrue(any(isinstance(e, Machine) for e in entities_at_target_location))
|
||||
return []
|
||||
|
||||
def tick_post_step(self, state) -> List[TickResult]:
|
||||
# do maintainers' actions have correct effects on environment i.e. doors open, machines heal
|
||||
for maintainer in state.entities[M.MAINTAINERS]:
|
||||
if maintainer._path and self.temp_state_dict != {}:
|
||||
if maintainer.identifier in self.temp_state_dict:
|
||||
last_action = self.temp_state_dict[maintainer.identifier]
|
||||
if last_action.identifier == 'DoorUse':
|
||||
if door := next((entity for entity in state.entities.get_entities_near_pos(maintainer.pos) if
|
||||
isinstance(entity, Door)), None):
|
||||
agents_near_door = [agent for agent in state.entities.get_entities_near_pos(door.pos) if
|
||||
isinstance(agent, Agent)]
|
||||
if len(agents_near_door) < 2:
|
||||
self.assertTrue(door.is_open)
|
||||
if last_action.identifier == 'MachineAction':
|
||||
if machine := next((entity for entity in state.entities.get_entities_near_pos(maintainer.pos) if
|
||||
isinstance(entity, Machine)), None):
|
||||
self.assertEqual(machine.health, 100)
|
||||
return []
|
||||
|
||||
def on_check_done(self, state) -> List[DoneResult]:
|
||||
# clear dict as the maintainer identifier increments each run the dict would fill over episodes
|
||||
self.temp_state_dict = {}
|
||||
for maintainer in state.entities[M.MAINTAINERS]:
|
||||
temp_state = maintainer._status
|
||||
if isinstance(temp_state, (ActionResult, TickResult)):
|
||||
# print(f"maintainer {temp_state}")
|
||||
self.temp_state_dict[maintainer.identifier] = temp_state
|
||||
else:
|
||||
self.temp_state_dict[maintainer.identifier] = None
|
||||
return []
|
||||
|
||||
|
||||
class DirtAgentTest(Test):
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Tests whether the dirt agent will perform the correct actions and whether the actions register correctly in the
|
||||
environment.
|
||||
"""
|
||||
super().__init__()
|
||||
self.temp_state_dict = {}
|
||||
pass
|
||||
|
||||
def on_init(self, state, lvl_map):
|
||||
return []
|
||||
|
||||
def on_reset(self):
|
||||
return []
|
||||
|
||||
def tick_step(self, state) -> List[TickResult]:
|
||||
for dirtagent in [a for a in state.entities[c.AGENT] if "Clean" in a.identifier]: # isinstance TSPDirtAgent
|
||||
# state usually is an actionresult but after a crash, tickresults are reported
|
||||
self.assertIsInstance(dirtagent.state, (ActionResult, TickResult))
|
||||
# print(f"state validity dirtagent: {dirtagent.state.validity}")
|
||||
return []
|
||||
|
||||
def tick_post_step(self, state) -> List[TickResult]:
|
||||
# do agents' actions have correct effects on environment i.e. doors open, dirt is cleaned
|
||||
for dirtagent in [a for a in state.entities[c.AGENT] if "Clean" in a.identifier]: # isinstance TSPDirtAgent
|
||||
if self.temp_state_dict != {}:
|
||||
last_action = self.temp_state_dict[dirtagent.identifier]
|
||||
if last_action.identifier == 'DoorUse':
|
||||
if door := next((entity for entity in state.entities.get_entities_near_pos(dirtagent.pos) if
|
||||
isinstance(entity, Door)), None):
|
||||
agents_near_door = [agent for agent in state.entities.get_entities_near_pos(door.pos) if
|
||||
isinstance(agent, Agent)]
|
||||
if len(agents_near_door) < 2:
|
||||
# self.assertTrue(door.is_open)
|
||||
if door.is_closed:
|
||||
print("door should be open but seems closed.")
|
||||
if last_action.identifier == 'Clean':
|
||||
if dirt := next((entity for entity in state.entities.get_entities_near_pos(dirtagent.pos) if
|
||||
isinstance(entity, DirtPile)), None):
|
||||
# print(f"dirt left on pos: {dirt.amount}")
|
||||
self.assertTrue(dirt.amount < 5) # get dirt amount one step before - clean amount
|
||||
return []
|
||||
|
||||
def on_check_done(self, state) -> List[DoneResult]:
|
||||
for dirtagent in [a for a in state.entities[c.AGENT] if "Clean" in a.identifier]: # isinstance TSPDirtAgent
|
||||
temp_state = dirtagent._status
|
||||
if isinstance(temp_state, (ActionResult, TickResult)):
|
||||
# print(f"dirtagent {temp_state}")
|
||||
self.temp_state_dict[dirtagent.identifier] = temp_state
|
||||
else:
|
||||
self.temp_state_dict[dirtagent.identifier] = None
|
||||
return []
|
||||
|
||||
|
||||
class ItemAgentTest(Test):
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Tests whether the dirt agent will perform the correct actions and whether the actions register correctly in the
|
||||
environment.
|
||||
"""
|
||||
super().__init__()
|
||||
self.temp_state_dict = {}
|
||||
pass
|
||||
|
||||
def on_init(self, state, lvl_map):
|
||||
return []
|
||||
|
||||
def on_reset(self):
|
||||
return []
|
||||
|
||||
def tick_step(self, state) -> List[TickResult]:
|
||||
for itemagent in [a for a in state.entities[c.AGENT] if "Item" in a.identifier]: # isinstance TSPItemAgent
|
||||
# state usually is an actionresult but after a crash, tickresults are reported
|
||||
self.assertIsInstance(itemagent.state, (ActionResult, TickResult))
|
||||
# self.assertEqual(agent.state.validity, True)
|
||||
# print(f"state validity itemagent: {itemagent.state.validity}")
|
||||
|
||||
return []
|
||||
|
||||
def tick_post_step(self, state) -> List[TickResult]:
|
||||
# do agents' actions have correct effects on environment i.e. doors open, items are picked up and dropped off
|
||||
for itemagent in [a for a in state.entities[c.AGENT] if "Item" in a.identifier]: # isinstance TSPItemAgent
|
||||
|
||||
if self.temp_state_dict != {}: # and
|
||||
last_action = self.temp_state_dict[itemagent.identifier]
|
||||
if last_action.identifier == 'DoorUse':
|
||||
if door := next((entity for entity in state.entities.get_entities_near_pos(itemagent.pos) if
|
||||
isinstance(entity, Door)), None):
|
||||
agents_near_door = [agent for agent in state.entities.get_entities_near_pos(door.pos) if
|
||||
isinstance(agent, Agent)]
|
||||
if len(agents_near_door) < 2:
|
||||
# self.assertTrue(door.is_open)
|
||||
if door.is_closed:
|
||||
print("door should be open but seems closed.")
|
||||
|
||||
# if last_action.identifier == 'ItemAction':
|
||||
# If it was a pick-up action the item should be in the agents inventory and not in his neighboring
|
||||
# positions anymore
|
||||
# nearby_items = [e for e in state.entities.get_entities_near_pos(itemagent.pos) if
|
||||
# isinstance(e, Item)]
|
||||
# self.assertNotIn(Item, nearby_items)
|
||||
# self.assertTrue(itemagent.bound_entity) # where is the inventory
|
||||
#
|
||||
# If it was a drop-off action the item should not be in the agents inventory anymore but instead in
|
||||
# the drop-off locations inventory
|
||||
#
|
||||
# if nearby_drop_offs := [e for e in state.entities.get_entities_near_pos(itemagent.pos) if
|
||||
# isinstance(e, DropOffLocation)]:
|
||||
# dol = nearby_drop_offs[0]
|
||||
# self.assertTrue(dol.bound_entity) # item in drop-off location?
|
||||
# self.assertNotIn(Item, state.entities.get_entities_near_pos(itemagent.pos))
|
||||
|
||||
return []
|
||||
|
||||
def on_check_done(self, state) -> List[DoneResult]:
|
||||
for itemagent in [a for a in state.entities[c.AGENT] if "Item" in a.identifier]: # isinstance TSPItemAgent
|
||||
temp_state = itemagent._status
|
||||
# print(f"itemagent {temp_state}")
|
||||
self.temp_state_dict[itemagent.identifier] = temp_state
|
||||
return []
|
||||
|
||||
|
||||
class TargetAgentTest(Test):
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Tests whether the target agent will perform the correct actions and whether the actions register correctly in the
|
||||
environment.
|
||||
"""
|
||||
super().__init__()
|
||||
self.temp_state_dict = {}
|
||||
pass
|
||||
|
||||
def on_init(self, state, lvl_map):
|
||||
return []
|
||||
|
||||
def on_reset(self):
|
||||
return []
|
||||
|
||||
def tick_step(self, state) -> List[TickResult]:
|
||||
for targetagent in [a for a in state.entities[c.AGENT] if "Target" in a.identifier]:
|
||||
# state usually is an actionresult but after a crash, tickresults are reported
|
||||
self.assertIsInstance(targetagent.state, (ActionResult, TickResult))
|
||||
# print(f"state validity targetagent: {targetagent.state.validity}")
|
||||
return []
|
||||
|
||||
def tick_post_step(self, state) -> List[TickResult]:
|
||||
# do agents' actions have correct effects on environment i.e. doors open, targets are destinations
|
||||
for targetagent in [a for a in state.entities[c.AGENT] if "Target" in a.identifier]:
|
||||
if self.temp_state_dict != {}:
|
||||
last_action = self.temp_state_dict[targetagent.identifier]
|
||||
if last_action.identifier == 'DoorUse':
|
||||
if door := next((entity for entity in state.entities.get_entities_near_pos(targetagent.pos) if
|
||||
isinstance(entity, Door)), None):
|
||||
agents_near_door = [agent for agent in state.entities.get_entities_near_pos(door.pos) if
|
||||
isinstance(agent, Agent)]
|
||||
if len(agents_near_door) < 2:
|
||||
# self.assertTrue(door.is_open)
|
||||
if door.is_closed:
|
||||
print("door should be open but seems closed.")
|
||||
|
||||
return []
|
||||
|
||||
def on_check_done(self, state) -> List[DoneResult]:
|
||||
for targetagent in [a for a in state.entities[c.AGENT] if "Target" in a.identifier]:
|
||||
temp_state = targetagent._status
|
||||
# print(f"targetagent {temp_state}")
|
||||
self.temp_state_dict[targetagent.identifier] = temp_state
|
||||
return []
|
||||
|
@ -1,5 +0,0 @@
|
||||
#####
|
||||
#---#
|
||||
#---#
|
||||
#---#
|
||||
#####
|
@ -1,24 +0,0 @@
|
||||
##############################################################
|
||||
#-----------#---#--------------------------------------------#
|
||||
#-----------#---#--------------------------------------------#
|
||||
#-----------#---#------##------##------##------##------##----#
|
||||
#-----------#---D------##------##------##------##------##----#
|
||||
#-----------D---#--------------------------------------------#
|
||||
#-----------#---#--------------------------------------------#
|
||||
#############---####################D####################D####
|
||||
#------------------------------------------------------------#
|
||||
#------------------------------------------------------------#
|
||||
#------------------------------------------------------------#
|
||||
####################-####################################D####
|
||||
#-----------------#---#------------------------------#-------#
|
||||
#-----------------#---D------------------------------#-------#
|
||||
#-----------------D---#------------------------------#-------#
|
||||
#-----------------#---#######D#############D##########-------#
|
||||
#-----------------#---D------------------------------D-------#
|
||||
###################---#------------------------------#-------#
|
||||
#-----------------#---#######D#############D##########-------#
|
||||
#-----------------D---#------------------------------#-------#
|
||||
#-----------------#---#------------------------------#-------#
|
||||
#-----------------#---#------------------------------D-------#
|
||||
#-----------------#---#------------------------------#-------#
|
||||
##############################################################
|
@ -1,47 +0,0 @@
|
||||
###########################################################################################################################
|
||||
#-----------#---#--------------------------------------------#-----------#---#--------------------------------------------#
|
||||
#-----------#---#--------------------------------------------#-----------#---#--------------------------------------------#
|
||||
#-----------#---#------##------##------##------##------##----#-----------#---#------##------##------##------##------##----#
|
||||
#-----------#---D------##------##------##------##------##----#-----------#---D------##------##------##------##------##----#
|
||||
#-----------D---#--------------------------------------------#-----------D---#--------------------------------------------#
|
||||
#-----------#---#--------------------------------------------#-----------#---#--------------------------------------------#
|
||||
#############---####################D####################D################---####################D####################D####
|
||||
#------------------------------------------------------------#------------------------------------------------------------#
|
||||
#------------------------------------------------------------D------------------------------------------------------------#
|
||||
#------------------------------------------------------------#------------------------------------------------------------#
|
||||
####################-####################################D#######################-####################################D####
|
||||
#-----------------#---#------------------------------#-------#-----------------#---#------------------------------#-------#
|
||||
#-----------------#---D------------------------------#-------#-----------------#---D------------------------------#-------#
|
||||
#-----------------D---#------------------------------#-------#-----------------D---#------------------------------#-------#
|
||||
#-----------------#---#######D#############D##########-------#-----------------#---#######D#############D##########-------#
|
||||
#-----------------#---D------------------------------D-------#-----------------#---D------------------------------D-------#
|
||||
###################---#------------------------------#-------###################---#------------------------------#-------#
|
||||
#-----------------#---#######D#############D##########-------#-----------------#---#######D#############D##########-------#
|
||||
#-----------------D---#------------------------------#-------D-----------------D---#------------------------------#-------#
|
||||
#-----------------#---#------------------------------#-------#-----------------#---#------------------------------#-------#
|
||||
#-----------------#---#------------------------------D-------#-----------------#---#------------------------------D-------#
|
||||
#-----------------#---#------------------------------#-------#-----------------#---#------------------------------#-------#
|
||||
##############D############################################################D###############################################
|
||||
#-----------#---#--------------------------------------------#-----------#---#--------------------------------------------#
|
||||
#-----------#---#--------------------------------------------#-----------#---#--------------------------------------------#
|
||||
#-----------#---#------##------##------##------##------##----#-----------#---#------##------##------##------##------##----#
|
||||
#-----------#---D------##------##------##------##------##----#-----------#---D------##------##------##------##------##----#
|
||||
#-----------D---#--------------------------------------------#-----------D---#--------------------------------------------#
|
||||
#-----------#---#--------------------------------------------#-----------#---#--------------------------------------------#
|
||||
#############---####################D####################D################---####################D####################D####
|
||||
#------------------------------------------------------------#------------------------------------------------------------#
|
||||
#------------------------------------------------------------D------------------------------------------------------------#
|
||||
#------------------------------------------------------------#------------------------------------------------------------#
|
||||
###################---###################################D######################---###################################D####
|
||||
#-----------------#---#------------------------------#-------#-----------------#---#------------------------------#-------#
|
||||
#-----------------#---D------------------------------#-------#-----------------#---D------------------------------#-------#
|
||||
#-----------------D---#------------------------------#-------#-----------------D---#------------------------------#-------#
|
||||
#-----------------#---#######D#############D##########-------#-----------------#---#######D#############D##########-------#
|
||||
#-----------------#---D------------------------------D-------#-----------------#---D------------------------------D-------#
|
||||
###################---#------------------------------#-------###################---#------------------------------#-------#
|
||||
#-----------------#---#######D#############D##########-------#-----------------#---#######D#############D##########-------#
|
||||
#-----------------D---#------------------------------#-------#-----------------D---#------------------------------#-------#
|
||||
#-----------------#---#------------------------------#-------#-----------------#---#------------------------------#-------#
|
||||
#-----------------#---#------------------------------D-------#-----------------#---#------------------------------D-------#
|
||||
#-----------------#---#------------------------------#-------#-----------------#---#------------------------------#-------#
|
||||
###########################################################################################################################
|
@ -1,5 +0,0 @@
|
||||
#######
|
||||
###-###
|
||||
#-----#
|
||||
###-###
|
||||
#######
|
@ -1,12 +0,0 @@
|
||||
############
|
||||
#----------#
|
||||
#-#######--#
|
||||
#-#-----D--#
|
||||
#-#######--#
|
||||
#-D-----D--#
|
||||
#-#-#-#-#-##
|
||||
#----------#
|
||||
#----------#
|
||||
#----------#
|
||||
#----------#
|
||||
############
|
@ -1,13 +0,0 @@
|
||||
###############
|
||||
#333x33#444444#
|
||||
#333#33#444444#
|
||||
#333333xx#4444#
|
||||
#333333#444444#
|
||||
#333333#444444#
|
||||
###x#######D###
|
||||
#1111##2222222#
|
||||
#11111#2222#22#
|
||||
#11111D2222222#
|
||||
#11111#2222222#
|
||||
#11111#2222222#
|
||||
###############
|
@ -1,13 +0,0 @@
|
||||
############
|
||||
#----------#
|
||||
#--######--#
|
||||
#----------#
|
||||
#--######--#
|
||||
#----------#
|
||||
#--######--#
|
||||
#----------#
|
||||
#--######--#
|
||||
#----------#
|
||||
#--######--#
|
||||
#----------#
|
||||
############
|
@ -1,12 +0,0 @@
|
||||
############
|
||||
#----------#
|
||||
#---#------#
|
||||
#--------#-#
|
||||
#----------#
|
||||
#--#-------#
|
||||
#----------#
|
||||
#----#-----#
|
||||
#----------#
|
||||
#-------#--#
|
||||
#----------#
|
||||
############
|
@ -1,5 +0,0 @@
|
||||
#####
|
||||
##-##
|
||||
#---#
|
||||
##-##
|
||||
#####
|
@ -1,13 +1,7 @@
|
||||
###############
|
||||
#111111#222222#
|
||||
#111111#222222#
|
||||
#111111#222222#
|
||||
#111111#222222#
|
||||
#111111#222222#
|
||||
#111111D222222#
|
||||
#111111#222222#
|
||||
#111111#222222#
|
||||
#111111#222222#
|
||||
#111111#222222#
|
||||
#111111#222222#
|
||||
#------#------#
|
||||
#------#------#
|
||||
#------D------#
|
||||
#------#------#
|
||||
#------#------#
|
||||
###############
|
@ -1,7 +0,0 @@
|
||||
###############
|
||||
#111111#222222#
|
||||
#111111#222222#
|
||||
#111111D222222#
|
||||
#111111#222222#
|
||||
#111111#222222#
|
||||
###############
|
@ -1,10 +1,6 @@
|
||||
from .batteries import *
|
||||
from .clean_up import *
|
||||
from .destinations import *
|
||||
from .doors import *
|
||||
from .items import *
|
||||
from .machines import *
|
||||
from .maintenance import *
|
||||
|
||||
"""
|
||||
modules
|
||||
|
@ -1,4 +0,0 @@
|
||||
from .actions import Charge
|
||||
from .entitites import ChargePod, Battery
|
||||
from .groups import ChargePods, Batteries
|
||||
from .rules import DoneAtBatteryDischarge, BatteryDecharge
|
@ -1,31 +0,0 @@
|
||||
from typing import Union
|
||||
|
||||
from marl_factory_grid.environment.actions import Action
|
||||
from marl_factory_grid.utils.results import ActionResult
|
||||
|
||||
from marl_factory_grid.modules.batteries import constants as b
|
||||
from marl_factory_grid.environment import constants as c
|
||||
from marl_factory_grid.utils import helpers as h
|
||||
|
||||
|
||||
class Charge(Action):
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Checks if a charge pod is present at the agent's position.
|
||||
If found, it attempts to charge the battery using the charge pod.
|
||||
"""
|
||||
super().__init__(b.ACTION_CHARGE, b.REWARD_CHARGE_VALID, b.Reward_CHARGE_FAIL)
|
||||
|
||||
def do(self, entity, state) -> Union[None, ActionResult]:
|
||||
if charge_pod := h.get_first(state[b.CHARGE_PODS].by_pos(entity.pos)):
|
||||
valid = charge_pod.charge_battery(entity, state)
|
||||
if valid:
|
||||
state.print(f'{entity.name} just charged batteries at {charge_pod.name}.')
|
||||
else:
|
||||
state.print(f'{entity.name} failed to charged batteries at {charge_pod.name}.')
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
state.print(f'{entity.name} failed to charged batteries at {entity.pos}.')
|
||||
|
||||
return self.get_result(valid, entity)
|
Binary file not shown.
Before Width: | Height: | Size: 7.9 KiB |
@ -1,17 +0,0 @@
|
||||
# Battery Env
|
||||
CHARGE_PODS = 'ChargePods'
|
||||
BATTERIES = 'Batteries'
|
||||
BATTERY_DISCHARGED = 'DISCHARGED'
|
||||
CHARGE_POD_SYMBOL = 1
|
||||
|
||||
ACTION_CHARGE = 'do_charge_action'
|
||||
|
||||
REWARD_CHARGE_VALID: float = 0.1
|
||||
Reward_CHARGE_FAIL: float = -0.1
|
||||
REWARD_BATTERY_DISCHARGED: float = -1.0
|
||||
REWARD_DISCHARGE_DONE: float = -1.0
|
||||
|
||||
|
||||
GROUPED = "single"
|
||||
SINGLE = "grouped"
|
||||
MODES = [GROUPED, SINGLE]
|
@ -1,119 +0,0 @@
|
||||
from marl_factory_grid.environment import constants as c
|
||||
from marl_factory_grid.environment.entity.agent import Agent
|
||||
from marl_factory_grid.environment.entity.entity import Entity
|
||||
from marl_factory_grid.environment.entity.object import Object
|
||||
from marl_factory_grid.modules.batteries import constants as b
|
||||
from marl_factory_grid.utils.utility_classes import RenderEntity
|
||||
|
||||
|
||||
class Battery(Object):
|
||||
|
||||
@property
|
||||
def var_can_be_bound(self):
|
||||
return True
|
||||
|
||||
@property
|
||||
def is_discharged(self) -> bool:
|
||||
"""
|
||||
Indicates whether the Batteries charge level is at 0 or not.
|
||||
|
||||
:return: Whether this battery is empty.
|
||||
"""
|
||||
return self.charge_level == 0
|
||||
|
||||
@property
|
||||
def obs_tag(self):
|
||||
return self.name
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return self.charge_level
|
||||
|
||||
def __init__(self, initial_charge_level, owner, *args, **kwargs):
|
||||
"""
|
||||
Represents a battery entity in the environment that can be bound to an agent and charged at charge pods.
|
||||
|
||||
:param initial_charge_level: The current charge level of the battery, ranging from 0 to 1.
|
||||
:type initial_charge_level: float
|
||||
|
||||
:param owner: The entity to which the battery is bound.
|
||||
:type owner: Entity
|
||||
"""
|
||||
super(Battery, self).__init__(*args, **kwargs)
|
||||
self.charge_level = initial_charge_level
|
||||
self.bind_to(owner)
|
||||
|
||||
def do_charge_action(self, amount) -> bool:
|
||||
"""
|
||||
Updates the Battery's charge level according to the passed value.
|
||||
|
||||
:param amount: Amount added to the Battery's charge level.
|
||||
:returns: whether the battery could be charged. if not, it was already fully charged.
|
||||
"""
|
||||
if self.charge_level < 1:
|
||||
# noinspection PyTypeChecker
|
||||
self.charge_level = min(1, amount + self.charge_level)
|
||||
return c.VALID
|
||||
else:
|
||||
return c.NOT_VALID
|
||||
|
||||
def decharge(self, amount) -> bool:
|
||||
"""
|
||||
Decreases the charge value of a battery. Currently only triggered by the battery-decharge rule.
|
||||
"""
|
||||
if self.charge_level != 0:
|
||||
# noinspection PyTypeChecker
|
||||
self.charge_level = max(0, amount + self.charge_level)
|
||||
return c.VALID
|
||||
else:
|
||||
return c.NOT_VALID
|
||||
|
||||
def summarize_state(self):
|
||||
summary = super().summarize_state()
|
||||
summary.update(dict(belongs_to=self._bound_entity.name, chargeLevel=self.charge_level))
|
||||
return summary
|
||||
|
||||
|
||||
class ChargePod(Entity):
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
return b.CHARGE_POD_SYMBOL
|
||||
|
||||
def __init__(self, *args, charge_rate: float = 0.4, multi_charge: bool = False, **kwargs):
|
||||
"""
|
||||
Represents a charging pod for batteries in the environment.
|
||||
|
||||
:param charge_rate: The rate at which the charging pod charges batteries. Defaults to 0.4.
|
||||
:type charge_rate: float
|
||||
|
||||
:param multi_charge: Indicates whether the charging pod supports charging multiple batteries simultaneously.
|
||||
Defaults to False.
|
||||
:type multi_charge: bool
|
||||
"""
|
||||
super(ChargePod, self).__init__(*args, **kwargs)
|
||||
self.charge_rate = charge_rate
|
||||
self.multi_charge = multi_charge
|
||||
|
||||
def charge_battery(self, entity, state) -> bool:
|
||||
"""
|
||||
Triggers the battery charge action if possible. Impossible if battery at full charge level or more than one
|
||||
agent at charge pods' position.
|
||||
|
||||
:returns: whether the action was successful (valid) or not.
|
||||
"""
|
||||
battery = state[b.BATTERIES].by_entity(entity)
|
||||
if battery.charge_level >= 1.0:
|
||||
return c.NOT_VALID
|
||||
if len([x for x in state[c.AGENT].by_pos(entity.pos)]) > 1:
|
||||
return c.NOT_VALID
|
||||
valid = battery.do_charge_action(self.charge_rate)
|
||||
return valid
|
||||
|
||||
def render(self):
|
||||
return RenderEntity(b.CHARGE_PODS, self.pos)
|
||||
|
||||
def summarize_state(self) -> dict:
|
||||
summary = super().summarize_state()
|
||||
summary.update(charge_rate=self.charge_rate)
|
||||
return summary
|
@ -1,52 +0,0 @@
|
||||
from typing import Union, List, Tuple
|
||||
|
||||
from marl_factory_grid.environment import constants as c
|
||||
from marl_factory_grid.environment.groups.collection import Collection
|
||||
from marl_factory_grid.modules.batteries.entitites import ChargePod, Battery
|
||||
from marl_factory_grid.utils.results import Result
|
||||
|
||||
|
||||
class Batteries(Collection):
|
||||
_entity = Battery
|
||||
|
||||
@property
|
||||
def var_has_position(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def var_can_be_bound(self):
|
||||
return True
|
||||
|
||||
def __init__(self, size, initial_charge_level=1.0, *args, **kwargs):
|
||||
"""
|
||||
A collection of batteries that is in charge of spawning batteries. (spawned batteries are bound to agents)
|
||||
|
||||
:param size: The maximum allowed size of the collection. Ensures that the collection does not exceed this size.
|
||||
:type size: int
|
||||
|
||||
:param initial_charge_level: The initial charge level of the battery.
|
||||
:type initial_charge_level: float
|
||||
"""
|
||||
super(Batteries, self).__init__(size, *args, **kwargs)
|
||||
self.initial_charge_level = initial_charge_level
|
||||
|
||||
def spawn(self, coords_or_quantity: Union[int, List[Tuple[(int, int)]]], *entity_args, **entity_kwargs):
|
||||
batteries = [self._entity(self.initial_charge_level, agent) for _, agent in enumerate(entity_args[0])]
|
||||
self.add_items(batteries)
|
||||
|
||||
def trigger_spawn(self, state, *entity_args, coords_or_quantity=None, **entity_kwargs):
|
||||
self.spawn(0, state[c.AGENT])
|
||||
return Result(identifier=f'{self.name}_spawn', validity=c.VALID, value=len(self))
|
||||
|
||||
|
||||
class ChargePods(Collection):
|
||||
_entity = ChargePod
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""
|
||||
A collection of charge pods in the environment.
|
||||
"""
|
||||
super(ChargePods, self).__init__(*args, **kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
return super(ChargePods, self).__repr__()
|
@ -1,128 +0,0 @@
|
||||
from typing import List, Union
|
||||
|
||||
from marl_factory_grid.environment import constants as c
|
||||
from marl_factory_grid.environment.rules import Rule
|
||||
from marl_factory_grid.modules.batteries import constants as b
|
||||
from marl_factory_grid.utils.results import TickResult, DoneResult
|
||||
|
||||
|
||||
class BatteryDecharge(Rule):
|
||||
|
||||
def __init__(self, initial_charge: float = 0.8, per_action_costs: Union[dict, float] = 0.02,
|
||||
battery_charge_reward: float = b.REWARD_CHARGE_VALID,
|
||||
battery_failed_reward: float = b.Reward_CHARGE_FAIL,
|
||||
battery_discharge_reward: float = b.REWARD_BATTERY_DISCHARGED,
|
||||
paralyze_agents_on_discharge: bool = False):
|
||||
f"""
|
||||
Enables the Battery Charge/Discharge functionality.
|
||||
|
||||
:type paralyze_agents_on_discharge: bool
|
||||
:param paralyze_agents_on_discharge: Wether agents are still able to perform actions when discharged.
|
||||
:type per_action_costs: Union[dict, float] = 0.02
|
||||
:param per_action_costs: 1. dict: with an action name as key, provide a value for each
|
||||
(maybe walking is less tedious as opening a door? Just saying...).
|
||||
2. float: each action "costs" the same.
|
||||
----
|
||||
!!! Does not introduce any Env.-Done condition.
|
||||
!!! Batteries can only be charged if agent posses the "Charge" Action.
|
||||
!!! Batteries can only be charged if there are "Charge Pods" and they are spawned!
|
||||
----
|
||||
:type initial_charge: float
|
||||
:param initial_charge: How much juice they have.
|
||||
:type battery_discharge_reward: float
|
||||
:param battery_discharge_reward: Negative reward, when agents let their batters discharge.
|
||||
Default: {b.REWARD_BATTERY_DISCHARGED}
|
||||
:type battery_failed_reward: float
|
||||
:param battery_failed_reward: Negative reward, when agent cannot charge, but do (overcharge, not on station).
|
||||
Default: {b.Reward_CHARGE_FAIL}
|
||||
:type battery_charge_reward: float
|
||||
:param battery_charge_reward: Positive reward, when agent actually charge their battery.
|
||||
Default: {b.REWARD_CHARGE_VALID}
|
||||
"""
|
||||
super().__init__()
|
||||
self.paralyze_agents_on_discharge = paralyze_agents_on_discharge
|
||||
self.battery_discharge_reward = battery_discharge_reward
|
||||
self.battery_failed_reward = battery_failed_reward
|
||||
self.battery_charge_reward = battery_charge_reward
|
||||
self.per_action_costs = per_action_costs
|
||||
self.initial_charge = initial_charge
|
||||
|
||||
def tick_step(self, state) -> List[TickResult]:
|
||||
batteries = state[b.BATTERIES]
|
||||
results = []
|
||||
|
||||
for agent in state[c.AGENT]:
|
||||
if isinstance(self.per_action_costs, dict):
|
||||
energy_consumption = self.per_action_costs[agent.state.identifier]
|
||||
else:
|
||||
energy_consumption = self.per_action_costs
|
||||
|
||||
batteries.by_entity(agent).decharge(energy_consumption)
|
||||
|
||||
results.append(TickResult(self.name, entity=agent, validity=c.VALID, value=energy_consumption))
|
||||
|
||||
return results
|
||||
|
||||
def tick_post_step(self, state) -> List[TickResult]:
|
||||
results = []
|
||||
for btry in state[b.BATTERIES]:
|
||||
if btry.is_discharged:
|
||||
state.print(f'Battery of {btry.bound_entity.name} is discharged!')
|
||||
results.append(
|
||||
TickResult(self.name, entity=btry.bound_entity, reward=self.battery_discharge_reward,
|
||||
validity=c.VALID)
|
||||
)
|
||||
if self.paralyze_agents_on_discharge:
|
||||
btry.bound_entity.paralyze(self.name)
|
||||
results.append(
|
||||
TickResult("Paralyzed", entity=btry.bound_entity, validity=c.VALID)
|
||||
)
|
||||
state.print(f'{btry.bound_entity.name} has just been paralyzed!')
|
||||
if btry.bound_entity.var_is_paralyzed and not btry.is_discharged:
|
||||
btry.bound_entity.de_paralyze(self.name)
|
||||
results.append(
|
||||
TickResult("De-Paralyzed", entity=btry.bound_entity, validity=c.VALID)
|
||||
)
|
||||
state.print(f'{btry.bound_entity.name} has just been de-paralyzed!')
|
||||
return results
|
||||
|
||||
|
||||
class DoneAtBatteryDischarge(BatteryDecharge):
|
||||
|
||||
def __init__(self, reward_discharge_done=b.REWARD_DISCHARGE_DONE, mode: str = b.SINGLE, **kwargs):
|
||||
f"""
|
||||
Enables the Battery Charge/Discharge functionality. Additionally
|
||||
|
||||
:type mode: str
|
||||
:param mode: Does this Done rule trigger, when any battery is or all batteries are discharged?
|
||||
:type per_action_costs: Union[dict, float] = 0.02
|
||||
:param per_action_costs: 1. dict: with an action name as key, provide a value for each
|
||||
(maybe walking is less tedious as opening a door? Just saying...).
|
||||
2. float: each action "costs" the same.
|
||||
|
||||
:type initial_charge: float
|
||||
:param initial_charge: How much juice they have.
|
||||
:type reward_discharge_done: float
|
||||
:param reward_discharge_done: Global negative reward, when agents let their batters discharge.
|
||||
Default: {b.REWARD_BATTERY_DISCHARGED}
|
||||
:type battery_discharge_reward: float
|
||||
:param battery_discharge_reward: Negative reward, when agents let their batters discharge.
|
||||
Default: {b.REWARD_BATTERY_DISCHARGED}
|
||||
:type battery_failed_reward: float
|
||||
:param battery_failed_reward: Negative reward, when agent cannot charge, but do (overcharge, not on station).
|
||||
Default: {b.Reward_CHARGE_FAIL}
|
||||
:type battery_charge_reward: float
|
||||
:param battery_charge_reward: Positive reward, when agent actually charge their battery.
|
||||
Default: {b.REWARD_CHARGE_VALID}
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self.mode = mode
|
||||
self.reward_discharge_done = reward_discharge_done
|
||||
|
||||
def on_check_done(self, state) -> List[DoneResult]:
|
||||
any_discharged = (self.mode == b.SINGLE and any(battery.is_discharged for battery in state[b.BATTERIES]))
|
||||
all_discharged = (self.mode == b.SINGLE and all(battery.is_discharged for battery in state[b.BATTERIES]))
|
||||
if any_discharged or all_discharged:
|
||||
return [DoneResult(self.name, validity=c.VALID, reward=self.reward_discharge_done)]
|
||||
else:
|
||||
return [DoneResult(self.name, validity=c.NOT_VALID)]
|
@ -1,11 +0,0 @@
|
||||
from .actions import ItemAction
|
||||
from .entitites import Item, DropOffLocation
|
||||
from .groups import DropOffLocations, Items, Inventory, Inventories
|
||||
|
||||
"""
|
||||
items
|
||||
=====
|
||||
|
||||
Todo
|
||||
|
||||
"""
|
@ -1,63 +0,0 @@
|
||||
from typing import Union
|
||||
|
||||
from marl_factory_grid.environment.actions import Action
|
||||
from marl_factory_grid.utils.results import ActionResult
|
||||
|
||||
from marl_factory_grid.modules.items import constants as i
|
||||
from marl_factory_grid.environment import constants as c
|
||||
|
||||
|
||||
class ItemAction(Action):
|
||||
|
||||
def __init__(self, failed_dropoff_reward: float | None = None, valid_dropoff_reward: float | None = None, **kwargs):
|
||||
"""
|
||||
Allows an entity to pick up or drop off items in the environment.
|
||||
|
||||
:param failed_drop_off_reward: The reward assigned when a drop-off action fails. Default is None.
|
||||
:type failed_dropoff_reward: float | None
|
||||
:param valid_drop_off_reward: The reward assigned when a drop-off action is successful. Default is None.
|
||||
:type valid_dropoff_reward: float | None
|
||||
"""
|
||||
super().__init__(i.ITEM_ACTION, i.REWARD_PICK_UP_FAIL, i.REWARD_PICK_UP_VALID, **kwargs)
|
||||
self.failed_drop_off_reward = failed_dropoff_reward if failed_dropoff_reward is not None else i.REWARD_DROP_OFF_FAIL
|
||||
self.valid_drop_off_reward = valid_dropoff_reward if valid_dropoff_reward is not None else i.REWARD_DROP_OFF_VALID
|
||||
|
||||
def get_dropoff_result(self, validity, entity) -> ActionResult:
|
||||
"""
|
||||
Generates an ActionResult for a drop-off action based on its validity.
|
||||
|
||||
:param validity: Whether the drop-off action is valid.
|
||||
:type validity: bool
|
||||
|
||||
:param entity: The entity performing the action.
|
||||
:type entity: Entity
|
||||
|
||||
:return: ActionResult for the drop-off action.
|
||||
:rtype: ActionResult
|
||||
"""
|
||||
reward = self.valid_drop_off_reward if validity else self.failed_drop_off_reward
|
||||
return ActionResult(self.__class__.__name__, validity, reward=reward, entity=entity)
|
||||
|
||||
def do(self, entity, state) -> Union[None, ActionResult]:
|
||||
inventory = state[i.INVENTORY].by_entity(entity)
|
||||
if drop_off := state[i.DROP_OFF].by_pos(entity.pos):
|
||||
if inventory:
|
||||
valid = drop_off.place_item(inventory.pop())
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
if valid:
|
||||
state.print(f'{entity.name} just dropped of an item at {drop_off.pos}.')
|
||||
else:
|
||||
state.print(f'{entity.name} just tried to drop off at {entity.pos}, but failed.')
|
||||
return self.get_dropoff_result(valid, entity)
|
||||
|
||||
elif items := state[i.ITEM].by_pos(entity.pos):
|
||||
item = items[0]
|
||||
item.change_parent_collection(inventory)
|
||||
item.set_pos(c.VALUE_NO_POS)
|
||||
state.print(f'{entity.name} just picked up an item at {entity.pos}')
|
||||
return self.get_result(c.VALID, entity)
|
||||
|
||||
else:
|
||||
state.print(f'{entity.name} just tried to pick up an item at {entity.pos}, but failed.')
|
||||
return self.get_result(c.NOT_VALID, entity)
|
Binary file not shown.
Before Width: | Height: | Size: 6.5 KiB |
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user