# Gneral env. settings. General: # Just the best seed. env_seed: 69 # Each agent receives an inividual Reward. individual_rewards: true # level file to load from .\levels\. level_name: eight_puzzle # Partial Observability. 0 = Full Observation. pomdp_r: 0 # Please do not spam me. verbose: false # Do not touch, WIP tests: false # RL Surrogates Agents: # This defines the name of the agent. UTF-8 Wolfgang: # Section which defines the availabll Actions per Agent Actions: # Move4 adds 4 actions [`North`, `East`, `South`, `West`] Move4: # Reward specification which differ from the default. # Agent does a valid move in the environment. He actually moves. valid_reward: -0.1 # Agent wants to move, but fails. fail_reward: 0 # NOOP aka agent does not do a thing. Noop: # The Agent decides to not do anything. Which is always valid. valid_reward: 0 # Does not do anything, just using the same interface. fail_reward: 0 # What the agent wants to see. Observations: # The agent... # sees other agents, but himself. - Other # wants to see walls - Walls # sees his associated Destination (singular). Use the Plural for `see all destinations`. - Destination # You want to have 7 clones, also possible to name them by giving names as list. Clones: 7 # Agents are blocking their grid position from beeing entered by others. is_blocking_pos: true # Apart from agents, which additional endities do you want to load? Entities: # Observable destinations, which can be reached by stepping on the same position. Has additional parameters... Destinations: # Let them spawn on closed doors and agent positions ignore_blocking: true # For 8-Puzzle, we need a special spawn rule... spawnrule: # ...which spawn a single position just underneath an associated agent. SpawnDestinationOnAgent: {} # There are no parameters, so we state empty kwargs. # This section defines which operations are performed beside agent action. # Without this section nothing happens, not even Done-condition checks. # Also, situation based rewards are specidief this way. Rules: ## Utilities # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards. # Can be omited/ignored if you do not want to take care of collisions at all. # This does not mean, that agents can not collide, its just ignored. WatchCollisions: reward: 0 done_at_collisions: false # In 8 Puzzle, do not randomize the start positions, rather move a random agent onto the single free position n-times. DoRandomInitialSteps: # How many times? random_steps: 2 ## Done Conditions # Maximum steps per episode. There is no reward for failing. DoneAtMaxStepsReached: # After how many steps should the episode end? max_steps: 200 # For 8 Puzzle we need a done condition that checks whether destinations have been reached, so... DoneAtDestinationReach: # On every step, should there be a reward for agets that reach their associated destination? No! dest_reach_reward: 0 # Do not touch. This is usefull in other settings! # Reward should only be given when all destiantions are reached in parallel! condition: "simultanious" # Reward if this is the case. Granted to each agent when all agents are at their target position simultaniously. reward_at_done: 1