General: # RNG-seed to sample the same "random" numbers every time, to make the different runs comparable. env_seed: 69 # Individual vs global rewards individual_rewards: true # The level.txt file to load from marl_factory_grid/levels level_name: quadrant # Radius of Partially observable Markov decision process pomdp_r: 0 # default 3 # Print all messages and events verbose: false # Run tests tests: false # In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all # other agents aim to clean dirt piles. Agents: # The clean agents Wolfgang: Actions: - Move4 - Clean - Noop Observations: - Walls - Other - DirtPiles - Self Positions: - (9,1) Reiner: Actions: - Move4 - Clean - Noop Observations: - Walls - Other - DirtPiles - Self Positions: - (9,8) # (9, 4) Entities: DirtPiles: coords_or_quantity: (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9) initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action clean_amount: 1 dirt_spawn_r_var: 0 max_global_amount: 12 max_local_amount: 1 # Rules section specifies the rules governing the dynamics of the environment. Rules: # Utilities # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards. # Can be omitted/ignored if you do not want to take care of collisions at all. WatchCollisions: done_at_collisions: false # Done Conditions # Define the conditions for the environment to stop. Either success or a fail conditions. # The environment stops when all dirt is cleaned DoneOnAllDirtCleaned: DoneAtMaxStepsReached: max_steps: 200