Merge remote-tracking branch 'origin/unit_testing' into unit_testing

This commit is contained in:
Chanumask
2024-03-21 15:05:57 +01:00
98 changed files with 2608 additions and 554 deletions

View File

@ -1,17 +1,35 @@
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: rooms
# Radius of Partially observable Markov decision process
pomdp_r: 3
# Print all messages and events
verbose: true
# Run tests
tests: false
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
# other agents aim to clean dirt piles.
Agents:
# The clean agents
Wolfgang:
Actions:
- Move8
- DoorUse
- Clean
- Noop
- Move8
- DoorUse
- Clean
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
- Walls
- Doors
- Other
- DirtPiles
Clones: 8
# The item agent
Juergen:
Actions:
- Move8
@ -38,37 +56,37 @@ Entities:
DropOffLocations:
coords_or_quantity: 1
max_dropoff_storage_size: 0
Inventories: {}
Inventories: { }
Items:
coords_or_quantity: 5
General:
env_seed: 69
individual_rewards: true
level_name: rooms
pomdp_r: 3
verbose: True
tests: false
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Environment Dynamics
# When stepping over a dirt pile, entities carry a ratio of the dirt to their next position
EntitiesSmearDirtOnMove:
smear_ratio: 0.2
# Doors automatically close after a certain number of time steps
DoorAutoClose:
close_frequency: 7
# Respawn Stuff
# Define how dirt should respawn after the initial spawn
RespawnDirt:
respawn_freq: 30
# Define how items should respawn after the initial spawn
RespawnItems:
respawn_freq: 50
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
DoneAtMaxStepsReached:
max_steps: 500

View File

@ -1,37 +1,74 @@
# Default Configuration File
General:
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: large
# View Radius; 0 = full observatbility
pomdp_r: 3
# Print all messages and events
verbose: false
# Run tests
tests: false
# Agents section defines the characteristics of different agents in the environment.
# An Agent requires a list of actions and observations.
# Possible actions: Noop, Charge, Clean, DestAction, DoorUse, ItemAction, MachineAction, Move8, Move4, North, NorthEast, ...
# Possible observations: All, Combined, GlobalPosition, Battery, ChargePods, DirtPiles, Destinations, Doors, Items, Inventory, DropOffLocations, Maintainers, ...
# You can use 'clone' as the agent name to have multiple instances with either a list of names or an int specifying the number of clones.
Agents:
Wolfgang:
Actions:
- Noop
- Charge
- Clean
- DestAction
- DoorUse
- ItemAction
- Move8
- Noop
- Charge
- Clean
- DestAction
- DoorUse
- ItemAction
- Move8
Observations:
- Combined:
- Other
- Walls
- GlobalPosition
- Battery
- ChargePods
- DirtPiles
- Destinations
- Doors
- Items
- Inventory
- DropOffLocations
- Maintainers
Entities:
- Combined:
- Other
- Walls
- GlobalPosition
- Battery
- ChargePods
- DirtPiles
- Destinations
- Doors
- Items
- Inventory
- DropOffLocations
- Maintainers
# Entities section defines the initial parameters and behaviors of different entities in the environment.
# Entities all spawn using coords_or_quantity, a number of entities or coordinates to place them.
Entities:
# Batteries: Entities representing power sources for agents.
Batteries:
initial_charge: 0.8
per_action_costs: 0.02
# ChargePods: Entities representing charging stations for Batteries.
ChargePods:
coords_or_quantity: 2
# Destinations: Entities representing target locations for agents.
# - spawn_mode: GROUPED or SINGLE. Determines how destinations are spawned.
Destinations:
coords_or_quantity: 1
spawn_mode: GROUPED
# DirtPiles: Entities representing piles of dirt.
# - initial_amount: Initial amount of dirt in each pile.
# - clean_amount: Amount of dirt cleaned in each cleaning action.
# - dirt_spawn_r_var: Random variation in dirt spawn amounts.
# - max_global_amount: Maximum total amount of dirt allowed in the environment.
# - max_local_amount: Maximum amount of dirt allowed in one position.
DirtPiles:
coords_or_quantity: 10
initial_amount: 2
@ -39,50 +76,71 @@ Entities:
dirt_spawn_r_var: 0.1
max_global_amount: 20
max_local_amount: 5
# Doors are spawned using the level map.
Doors:
# DropOffLocations: Entities representing locations where agents can drop off items.
# - max_dropoff_storage_size: Maximum storage capacity at each drop-off location.
DropOffLocations:
coords_or_quantity: 1
max_dropoff_storage_size: 0
GlobalPositions: {}
Inventories: {}
# GlobalPositions.
GlobalPositions: { }
# Inventories: Entities representing inventories for agents.
Inventories: { }
# Items: Entities representing items in the environment.
Items:
coords_or_quantity: 5
# Machines: Entities representing machines in the environment.
Machines:
coords_or_quantity: 2
# Maintainers: Entities representing maintainers that aim to maintain machines.
Maintainers:
coords_or_quantity: 1
Zones: {}
General:
env_seed: 69
individual_rewards: true
level_name: large
pomdp_r: 3
verbose: False
tests: false
# Rules section specifies the rules governing the dynamics of the environment.
Rules:
# Environment Dynamics
# When stepping over a dirt pile, entities carry a ratio of the dirt to their next position
EntitiesSmearDirtOnMove:
smear_ratio: 0.2
# Doors automatically close after a certain number of time steps
DoorAutoClose:
close_frequency: 10
# Maintainers move at every time step
MoveMaintainers:
# Respawn Stuff
# Define how dirt should respawn after the initial spawn
RespawnDirt:
respawn_freq: 15
# Define how items should respawn after the initial spawn
RespawnItems:
respawn_freq: 15
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
# Can be omitted/ignored if you do not want to take care of collisions at all.
WatchCollisions:
done_at_collisions: false
# Done Conditions
# Define the conditions for the environment to stop. Either success or a fail conditions.
# The environment stops when an agent reaches a destination
DoneAtDestinationReach:
# The environment stops when all dirt is cleaned
DoneOnAllDirtCleaned:
# The environment stops when a battery is discharged
DoneAtBatteryDischarge:
# The environment stops when a maintainer reports a collision
DoneAtMaintainerCollision:
# The environment stops after max steps
DoneAtMaxStepsReached:
max_steps: 500

View File

@ -84,6 +84,6 @@ Rules:
# On every step, should there be a reward for agets that reach their associated destination? No!
dest_reach_reward: 0 # Do not touch. This is usefull in other settings!
# Reward should only be given when all destiantions are reached in parallel!
condition: "simultanious"
condition: "simultaneous"
# Reward if this is the case. Granted to each agent when all agents are at their target position simultaniously.
reward_at_done: 1

View File

@ -1,14 +1,16 @@
General:
# Your Seed
env_seed: 69
# Individual or global rewards?
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load
# The level.txt file to load from marl_factory_grid/levels
level_name: narrow_corridor
# View Radius; 0 = full observatbility
pomdp_r: 0
# print all messages and events
verbose: true
# Run tests
tests: false
Agents:
# Agents are identified by their name

View File

@ -1,51 +1,61 @@
General:
env_seed: 69
# Individual vs global rewards
individual_rewards: true
# The level.txt file to load from marl_factory_grid/levels
level_name: two_rooms
# View Radius; 0 = full observatbility
pomdp_r: 3
# Print all messages and events
verbose: false
# Run tests
tests: false
Entities:
Destinations: {}
Doors: {}
GlobalPositions: {}
Zones: {}
Rules:
# Init:
AssignGlobalPositions: {}
ZoneInit: {}
AgentSingleZonePlacement: {}
IndividualDestinationZonePlacement: {}
# Env Rules
MaxStepsReached:
max_steps: 10
Collision:
done_at_collisions: false
DoorAutoClose:
close_frequency: 10
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
Agents:
Wolfgang:
Actions:
- Move8
- Noop
- DestAction
- DoorUse
- Move8
- Noop
- DestAction
- DoorUse
Observations:
- Walls
- Other
- Doors
- Destination
- Walls
- Other
- Doors
- Destination
Sigmund:
Actions:
- Move8
- Noop
- DestAction
- DoorUse
- Move8
- Noop
- DestAction
- DoorUse
Observations:
- Combined:
- Other
- Walls
- Destination
- Doors
- Combined:
- Other
- Walls
- Destination
- Doors
Entities:
Destinations: { }
Doors: { }
GlobalPositions: { }
Rules:
# Environment Dynamics
DoorAutoClose:
close_frequency: 10
# Utilities
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
WatchCollisions:
done_at_collisions: false
# Init
AssignGlobalPositions: { }
# Done Conditions
MaxStepsReached:
max_steps: 10