mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-05-22 23:06:43 +02:00
90 lines
3.5 KiB
YAML
90 lines
3.5 KiB
YAML
# Gneral env. settings.
|
|
General:
|
|
# Just the best seed.
|
|
env_seed: 69
|
|
# Each agent receives an inividual Reward.
|
|
individual_rewards: true
|
|
# level file to load from .\levels\.
|
|
level_name: eight_puzzle
|
|
# Partial Observability. 0 = Full Observation.
|
|
pomdp_r: 0
|
|
# Please do not spam me.
|
|
verbose: false
|
|
# Do not touch, WIP
|
|
tests: false
|
|
|
|
# RL Surrogates
|
|
Agents:
|
|
# This defines the name of the agent. UTF-8
|
|
Wolfgang:
|
|
# Section which defines the availabll Actions per Agent
|
|
Actions:
|
|
# Move4 adds 4 actions [`North`, `East`, `South`, `West`]
|
|
Move4:
|
|
# Reward specification which differ from the default.
|
|
# Agent does a valid move in the environment. He actually moves.
|
|
valid_reward: -0.1
|
|
# Agent wants to move, but fails.
|
|
fail_reward: 0
|
|
# NOOP aka agent does not do a thing.
|
|
Noop:
|
|
# The Agent decides to not do anything. Which is always valid.
|
|
valid_reward: 0
|
|
# Does not do anything, just using the same interface.
|
|
fail_reward: 0
|
|
# What the agent wants to see.
|
|
Observations:
|
|
# The agent...
|
|
# sees other agents, but himself.
|
|
- Other
|
|
# wants to see walls
|
|
- Walls
|
|
# sees his associated Destination (singular). Use the Plural for `see all destinations`.
|
|
- Destination
|
|
# You want to have 7 clones, also possible to name them by giving names as list.
|
|
Clones: 7
|
|
# Agents are blocking their grid position from beeing entered by others.
|
|
is_blocking_pos: true
|
|
# Apart from agents, which additional endities do you want to load?
|
|
Entities:
|
|
# Observable destinations, which can be reached by stepping on the same position. Has additional parameters...
|
|
Destinations:
|
|
# Let them spawn on closed doors and agent positions
|
|
ignore_blocking: true
|
|
# For 8-Puzzle, we need a special spawn rule...
|
|
spawnrule:
|
|
# ...which spawn a single position just underneath an associated agent.
|
|
SpawnDestinationOnAgent: {} # There are no parameters, so we state empty kwargs.
|
|
|
|
# This section defines which operations are performed beside agent action.
|
|
# Without this section nothing happens, not even Done-condition checks.
|
|
# Also, situation based rewards are specidief this way.
|
|
Rules:
|
|
## Utilities
|
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
|
# Can be omited/ignored if you do not want to take care of collisions at all.
|
|
# This does not mean, that agents can not collide, its just ignored.
|
|
WatchCollisions:
|
|
reward: 0
|
|
done_at_collisions: false
|
|
|
|
# In 8 Puzzle, do not randomize the start positions, rather move a random agent onto the single free position n-times.
|
|
DoRandomInitialSteps:
|
|
# How many times?
|
|
random_steps: 2
|
|
|
|
## Done Conditions
|
|
# Maximum steps per episode. There is no reward for failing.
|
|
DoneAtMaxStepsReached:
|
|
# After how many steps should the episode end?
|
|
max_steps: 200
|
|
|
|
# For 8 Puzzle we need a done condition that checks whether destinations have been reached, so...
|
|
DoneAtDestinationReach:
|
|
# On every step, should there be a reward for agets that reach their associated destination? No!
|
|
dest_reach_reward: 0 # Do not touch. This is usefull in other settings!
|
|
# Reward should only be given when all destiantions are reached in parallel!
|
|
condition: "simultanious"
|
|
# Reward if this is the case. Granted to each agent when all agents are at their target position simultaniously.
|
|
reward_at_done: 1
|