Merge branch 'main' into documentation

# Conflicts:
#	marl_factory_grid/configs/default_config.yaml
#	marl_factory_grid/configs/eight_puzzle.yaml
#	random_testrun.py
This commit is contained in:
Chanumask
2023-11-28 12:06:26 +01:00
20 changed files with 269 additions and 179 deletions

View File

@ -1,69 +1,89 @@
# Gneral env. settings.
General:
# Just the best seed.
env_seed: 69
# Individual vs global rewards
# Each agent receives an inividual Reward.
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
# level file to load from .\levels\.
level_name: eight_puzzle
# View Radius; 0 = full observatbility
# Partial Observability. 0 = Full Observation.
pomdp_r: 0
# Print all messages and events
verbose: True
# Run tests
# Please do not spam me.
verbose: false
# Do not touch, WIP
tests: false
# In the "eight puzzle" there are 8 agents standing on a 3x3 map, each with a specific destination to reach.
# RL Surrogates
Agents:
# This defines the name of the agent. UTF-8
Wolfgang:
# Section which defines the availabll Actions per Agent
Actions:
Noop:
fail_reward: -0
valid_reward: 0
# Move4 adds 4 actions [`North`, `East`, `South`, `West`]
Move4:
fail_reward: -0.1
valid_reward: -.01
# Reward specification which differ from the default.
# Agent does a valid move in the environment. He actually moves.
valid_reward: -0.1
# Agent wants to move, but fails.
fail_reward: 0
# NOOP aka agent does not do a thing.
Noop:
# The Agent decides to not do anything. Which is always valid.
valid_reward: 0
# Does not do anything, just using the same interface.
fail_reward: 0
# What the agent wants to see.
Observations:
# The agent...
# sees other agents, but himself.
- Other
# wants to see walls
- Walls
# sees his associated Destination (singular). Use the Plural for `see all destinations`.
- Destination
Clones:
- Juergen
- Soeren
- Walter
- Siggi
- Dennis
- Karl-Heinz
- Kevin
# multiple agents can not stand on the same location
# You want to have 7 clones, also possible to name them by giving names as list.
Clones: 7
# Agents are blocking their grid position from beeing entered by others.
is_blocking_pos: true
# Apart from agents, which additional endities do you want to load?
Entities:
# Observable destinations, which can be reached by stepping on the same position. Has additional parameters...
Destinations:
# Let them spawn on closed doors and agent positions
ignore_blocking: true
# We need a special spawn rule...
# For 8-Puzzle, we need a special spawn rule...
spawnrule:
# ...which assigns the destinations per agent
SpawnDestinationsPerAgent:
# we use this parameter
coords_or_quantity:
# to enable and assign special positions per agent
Wolfgang: 1
Karl-Heinz: 1
Kevin: 1
Juergen: 1
Soeren: 1
Walter: 1
Siggi: 1
Dennis: 1
# ...which spawn a single position just underneath an associated agent.
SpawnDestinationOnAgent: {} # There are no parameters, so we state empty kwargs.
# This section defines which operations are performed beside agent action.
# Without this section nothing happens, not even Done-condition checks.
# Also, situation based rewards are specidief this way.
Rules:
# Utilities
## Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omited/ignored if you do not want to take care of collisions at all.
# This does not mean, that agents can not collide, its just ignored.
WatchCollisions:
reward: 0
done_at_collisions: false
# Done Conditions
# Done when all agents are standing on the correct destination at the same time
DoneAtDestinationReach:
condition: simultaneous
# In 8 Puzzle, do not randomize the start positions, rather move a random agent onto the single free position n-times.
DoRandomInitialSteps:
# How many times?
random_steps: 2
## Done Conditions
# Maximum steps per episode. There is no reward for failing.
DoneAtMaxStepsReached:
max_steps: 500
# After how many steps should the episode end?
max_steps: 200
# For 8 Puzzle we need a done condition that checks whether destinations have been reached, so...
DoneAtDestinationReach:
# On every step, should there be a reward for agets that reach their associated destination? No!
dest_reach_reward: 0 # Do not touch. This is usefull in other settings!
# Reward should only be given when all destiantions are reached in parallel!
condition: "simultanious"
# Reward if this is the case. Granted to each agent when all agents are at their target position simultaniously.
reward_at_done: 1