mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-06-26 13:11:36 +02:00
Merge branch 'main' into documentation
# Conflicts: # marl_factory_grid/configs/default_config.yaml # marl_factory_grid/configs/eight_puzzle.yaml # random_testrun.py
This commit is contained in:
@ -1,69 +1,89 @@
|
||||
# Gneral env. settings.
|
||||
General:
|
||||
# Just the best seed.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
# Each agent receives an inividual Reward.
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
# level file to load from .\levels\.
|
||||
level_name: eight_puzzle
|
||||
# View Radius; 0 = full observatbility
|
||||
# Partial Observability. 0 = Full Observation.
|
||||
pomdp_r: 0
|
||||
# Print all messages and events
|
||||
verbose: True
|
||||
# Run tests
|
||||
# Please do not spam me.
|
||||
verbose: false
|
||||
# Do not touch, WIP
|
||||
tests: false
|
||||
|
||||
# In the "eight puzzle" there are 8 agents standing on a 3x3 map, each with a specific destination to reach.
|
||||
# RL Surrogates
|
||||
Agents:
|
||||
# This defines the name of the agent. UTF-8
|
||||
Wolfgang:
|
||||
# Section which defines the availabll Actions per Agent
|
||||
Actions:
|
||||
Noop:
|
||||
fail_reward: -0
|
||||
valid_reward: 0
|
||||
# Move4 adds 4 actions [`North`, `East`, `South`, `West`]
|
||||
Move4:
|
||||
fail_reward: -0.1
|
||||
valid_reward: -.01
|
||||
# Reward specification which differ from the default.
|
||||
# Agent does a valid move in the environment. He actually moves.
|
||||
valid_reward: -0.1
|
||||
# Agent wants to move, but fails.
|
||||
fail_reward: 0
|
||||
# NOOP aka agent does not do a thing.
|
||||
Noop:
|
||||
# The Agent decides to not do anything. Which is always valid.
|
||||
valid_reward: 0
|
||||
# Does not do anything, just using the same interface.
|
||||
fail_reward: 0
|
||||
# What the agent wants to see.
|
||||
Observations:
|
||||
# The agent...
|
||||
# sees other agents, but himself.
|
||||
- Other
|
||||
# wants to see walls
|
||||
- Walls
|
||||
# sees his associated Destination (singular). Use the Plural for `see all destinations`.
|
||||
- Destination
|
||||
Clones:
|
||||
- Juergen
|
||||
- Soeren
|
||||
- Walter
|
||||
- Siggi
|
||||
- Dennis
|
||||
- Karl-Heinz
|
||||
- Kevin
|
||||
# multiple agents can not stand on the same location
|
||||
# You want to have 7 clones, also possible to name them by giving names as list.
|
||||
Clones: 7
|
||||
# Agents are blocking their grid position from beeing entered by others.
|
||||
is_blocking_pos: true
|
||||
|
||||
# Apart from agents, which additional endities do you want to load?
|
||||
Entities:
|
||||
# Observable destinations, which can be reached by stepping on the same position. Has additional parameters...
|
||||
Destinations:
|
||||
# Let them spawn on closed doors and agent positions
|
||||
ignore_blocking: true
|
||||
# We need a special spawn rule...
|
||||
# For 8-Puzzle, we need a special spawn rule...
|
||||
spawnrule:
|
||||
# ...which assigns the destinations per agent
|
||||
SpawnDestinationsPerAgent:
|
||||
# we use this parameter
|
||||
coords_or_quantity:
|
||||
# to enable and assign special positions per agent
|
||||
Wolfgang: 1
|
||||
Karl-Heinz: 1
|
||||
Kevin: 1
|
||||
Juergen: 1
|
||||
Soeren: 1
|
||||
Walter: 1
|
||||
Siggi: 1
|
||||
Dennis: 1
|
||||
# ...which spawn a single position just underneath an associated agent.
|
||||
SpawnDestinationOnAgent: {} # There are no parameters, so we state empty kwargs.
|
||||
|
||||
# This section defines which operations are performed beside agent action.
|
||||
# Without this section nothing happens, not even Done-condition checks.
|
||||
# Also, situation based rewards are specidief this way.
|
||||
Rules:
|
||||
# Utilities
|
||||
## Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
# Can be omited/ignored if you do not want to take care of collisions at all.
|
||||
# This does not mean, that agents can not collide, its just ignored.
|
||||
WatchCollisions:
|
||||
reward: 0
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
# Done when all agents are standing on the correct destination at the same time
|
||||
DoneAtDestinationReach:
|
||||
condition: simultaneous
|
||||
# In 8 Puzzle, do not randomize the start positions, rather move a random agent onto the single free position n-times.
|
||||
DoRandomInitialSteps:
|
||||
# How many times?
|
||||
random_steps: 2
|
||||
|
||||
## Done Conditions
|
||||
# Maximum steps per episode. There is no reward for failing.
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 500
|
||||
# After how many steps should the episode end?
|
||||
max_steps: 200
|
||||
|
||||
# For 8 Puzzle we need a done condition that checks whether destinations have been reached, so...
|
||||
DoneAtDestinationReach:
|
||||
# On every step, should there be a reward for agets that reach their associated destination? No!
|
||||
dest_reach_reward: 0 # Do not touch. This is usefull in other settings!
|
||||
# Reward should only be given when all destiantions are reached in parallel!
|
||||
condition: "simultanious"
|
||||
# Reward if this is the case. Granted to each agent when all agents are at their target position simultaniously.
|
||||
reward_at_done: 1
|
||||
|
Reference in New Issue
Block a user