mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-07-05 17:11:35 +02:00
Merge branch 'main' into 'unit_testing'
# Conflicts: # marl_factory_grid/algorithms/static/TSP_dirt_agent.py # marl_factory_grid/utils/config_parser.py
This commit is contained in:
@ -1,17 +1,35 @@
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: rooms
|
||||
# Radius of Partially observable Markov decision process
|
||||
pomdp_r: 3
|
||||
# Print all messages and events
|
||||
verbose: true
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# In the "clean and bring" Scenario one agent aims to pick up all items and drop them at drop-off locations while all
|
||||
# other agents aim to clean dirt piles.
|
||||
Agents:
|
||||
# The clean agents
|
||||
Wolfgang:
|
||||
Actions:
|
||||
- Move8
|
||||
- DoorUse
|
||||
- Clean
|
||||
- Noop
|
||||
- Move8
|
||||
- DoorUse
|
||||
- Clean
|
||||
- Noop
|
||||
Observations:
|
||||
- Walls
|
||||
- Doors
|
||||
- Other
|
||||
- DirtPiles
|
||||
- Walls
|
||||
- Doors
|
||||
- Other
|
||||
- DirtPiles
|
||||
Clones: 8
|
||||
|
||||
# The item agent
|
||||
Juergen:
|
||||
Actions:
|
||||
- Move8
|
||||
@ -38,37 +56,37 @@ Entities:
|
||||
DropOffLocations:
|
||||
coords_or_quantity: 1
|
||||
max_dropoff_storage_size: 0
|
||||
Inventories: {}
|
||||
Inventories: { }
|
||||
Items:
|
||||
coords_or_quantity: 5
|
||||
|
||||
|
||||
General:
|
||||
env_seed: 69
|
||||
individual_rewards: true
|
||||
level_name: rooms
|
||||
pomdp_r: 3
|
||||
verbose: True
|
||||
tests: false
|
||||
|
||||
# Rules section specifies the rules governing the dynamics of the environment.
|
||||
Rules:
|
||||
# Environment Dynamics
|
||||
# When stepping over a dirt pile, entities carry a ratio of the dirt to their next position
|
||||
EntitiesSmearDirtOnMove:
|
||||
smear_ratio: 0.2
|
||||
# Doors automatically close after a certain number of time steps
|
||||
DoorAutoClose:
|
||||
close_frequency: 7
|
||||
|
||||
# Respawn Stuff
|
||||
# Define how dirt should respawn after the initial spawn
|
||||
RespawnDirt:
|
||||
respawn_freq: 30
|
||||
# Define how items should respawn after the initial spawn
|
||||
RespawnItems:
|
||||
respawn_freq: 50
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||
# The environment stops when all dirt is cleaned
|
||||
DoneOnAllDirtCleaned:
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 500
|
||||
|
@ -1,37 +1,74 @@
|
||||
# Default Configuration File
|
||||
|
||||
General:
|
||||
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: large
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 3
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
# Agents section defines the characteristics of different agents in the environment.
|
||||
|
||||
# An Agent requires a list of actions and observations.
|
||||
# Possible actions: Noop, Charge, Clean, DestAction, DoorUse, ItemAction, MachineAction, Move8, Move4, North, NorthEast, ...
|
||||
# Possible observations: All, Combined, GlobalPosition, Battery, ChargePods, DirtPiles, Destinations, Doors, Items, Inventory, DropOffLocations, Maintainers, ...
|
||||
# You can use 'clone' as the agent name to have multiple instances with either a list of names or an int specifying the number of clones.
|
||||
Agents:
|
||||
Wolfgang:
|
||||
Actions:
|
||||
- Noop
|
||||
- Charge
|
||||
- Clean
|
||||
- DestAction
|
||||
- DoorUse
|
||||
- ItemAction
|
||||
- Move8
|
||||
- Noop
|
||||
- Charge
|
||||
- Clean
|
||||
- DestAction
|
||||
- DoorUse
|
||||
- ItemAction
|
||||
- Move8
|
||||
Observations:
|
||||
- Combined:
|
||||
- Other
|
||||
- Walls
|
||||
- GlobalPosition
|
||||
- Battery
|
||||
- ChargePods
|
||||
- DirtPiles
|
||||
- Destinations
|
||||
- Doors
|
||||
- Items
|
||||
- Inventory
|
||||
- DropOffLocations
|
||||
- Maintainers
|
||||
Entities:
|
||||
- Combined:
|
||||
- Other
|
||||
- Walls
|
||||
- GlobalPosition
|
||||
- Battery
|
||||
- ChargePods
|
||||
- DirtPiles
|
||||
- Destinations
|
||||
- Doors
|
||||
- Items
|
||||
- Inventory
|
||||
- DropOffLocations
|
||||
- Maintainers
|
||||
|
||||
# Entities section defines the initial parameters and behaviors of different entities in the environment.
|
||||
# Entities all spawn using coords_or_quantity, a number of entities or coordinates to place them.
|
||||
Entities:
|
||||
# Batteries: Entities representing power sources for agents.
|
||||
Batteries:
|
||||
initial_charge: 0.8
|
||||
per_action_costs: 0.02
|
||||
|
||||
# ChargePods: Entities representing charging stations for Batteries.
|
||||
ChargePods:
|
||||
coords_or_quantity: 2
|
||||
|
||||
# Destinations: Entities representing target locations for agents.
|
||||
# - spawn_mode: GROUPED or SINGLE. Determines how destinations are spawned.
|
||||
Destinations:
|
||||
coords_or_quantity: 1
|
||||
spawn_mode: GROUPED
|
||||
|
||||
# DirtPiles: Entities representing piles of dirt.
|
||||
# - initial_amount: Initial amount of dirt in each pile.
|
||||
# - clean_amount: Amount of dirt cleaned in each cleaning action.
|
||||
# - dirt_spawn_r_var: Random variation in dirt spawn amounts.
|
||||
# - max_global_amount: Maximum total amount of dirt allowed in the environment.
|
||||
# - max_local_amount: Maximum amount of dirt allowed in one position.
|
||||
DirtPiles:
|
||||
coords_or_quantity: 10
|
||||
initial_amount: 2
|
||||
@ -39,50 +76,71 @@ Entities:
|
||||
dirt_spawn_r_var: 0.1
|
||||
max_global_amount: 20
|
||||
max_local_amount: 5
|
||||
|
||||
# Doors are spawned using the level map.
|
||||
Doors:
|
||||
|
||||
# DropOffLocations: Entities representing locations where agents can drop off items.
|
||||
# - max_dropoff_storage_size: Maximum storage capacity at each drop-off location.
|
||||
DropOffLocations:
|
||||
coords_or_quantity: 1
|
||||
max_dropoff_storage_size: 0
|
||||
GlobalPositions: {}
|
||||
Inventories: {}
|
||||
|
||||
# GlobalPositions.
|
||||
GlobalPositions: { }
|
||||
|
||||
# Inventories: Entities representing inventories for agents.
|
||||
Inventories: { }
|
||||
|
||||
# Items: Entities representing items in the environment.
|
||||
Items:
|
||||
coords_or_quantity: 5
|
||||
|
||||
# Machines: Entities representing machines in the environment.
|
||||
Machines:
|
||||
coords_or_quantity: 2
|
||||
|
||||
# Maintainers: Entities representing maintainers that aim to maintain machines.
|
||||
Maintainers:
|
||||
coords_or_quantity: 1
|
||||
Zones: {}
|
||||
|
||||
General:
|
||||
env_seed: 69
|
||||
individual_rewards: true
|
||||
level_name: large
|
||||
pomdp_r: 3
|
||||
verbose: False
|
||||
tests: false
|
||||
|
||||
# Rules section specifies the rules governing the dynamics of the environment.
|
||||
Rules:
|
||||
# Environment Dynamics
|
||||
# When stepping over a dirt pile, entities carry a ratio of the dirt to their next position
|
||||
EntitiesSmearDirtOnMove:
|
||||
smear_ratio: 0.2
|
||||
# Doors automatically close after a certain number of time steps
|
||||
DoorAutoClose:
|
||||
close_frequency: 10
|
||||
# Maintainers move at every time step
|
||||
MoveMaintainers:
|
||||
|
||||
# Respawn Stuff
|
||||
# Define how dirt should respawn after the initial spawn
|
||||
RespawnDirt:
|
||||
respawn_freq: 15
|
||||
# Define how items should respawn after the initial spawn
|
||||
RespawnItems:
|
||||
respawn_freq: 15
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Done Conditions
|
||||
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||
# The environment stops when an agent reaches a destination
|
||||
DoneAtDestinationReach:
|
||||
# The environment stops when all dirt is cleaned
|
||||
DoneOnAllDirtCleaned:
|
||||
# The environment stops when a battery is discharged
|
||||
DoneAtBatteryDischarge:
|
||||
# The environment stops when a maintainer reports a collision
|
||||
DoneAtMaintainerCollision:
|
||||
# The environment stops after max steps
|
||||
DoneAtMaxStepsReached:
|
||||
max_steps: 500
|
||||
|
@ -84,6 +84,6 @@ Rules:
|
||||
# On every step, should there be a reward for agets that reach their associated destination? No!
|
||||
dest_reach_reward: 0 # Do not touch. This is usefull in other settings!
|
||||
# Reward should only be given when all destiantions are reached in parallel!
|
||||
condition: "simultanious"
|
||||
condition: "simultaneous"
|
||||
# Reward if this is the case. Granted to each agent when all agents are at their target position simultaniously.
|
||||
reward_at_done: 1
|
||||
|
@ -1,14 +1,16 @@
|
||||
General:
|
||||
# Your Seed
|
||||
env_seed: 69
|
||||
# Individual or global rewards?
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: narrow_corridor
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 0
|
||||
# print all messages and events
|
||||
verbose: true
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
Agents:
|
||||
# Agents are identified by their name
|
||||
|
@ -1,51 +1,61 @@
|
||||
General:
|
||||
env_seed: 69
|
||||
# Individual vs global rewards
|
||||
individual_rewards: true
|
||||
# The level.txt file to load from marl_factory_grid/levels
|
||||
level_name: two_rooms
|
||||
# View Radius; 0 = full observatbility
|
||||
pomdp_r: 3
|
||||
# Print all messages and events
|
||||
verbose: false
|
||||
# Run tests
|
||||
tests: false
|
||||
|
||||
Entities:
|
||||
Destinations: {}
|
||||
Doors: {}
|
||||
GlobalPositions: {}
|
||||
Zones: {}
|
||||
|
||||
Rules:
|
||||
# Init:
|
||||
AssignGlobalPositions: {}
|
||||
ZoneInit: {}
|
||||
AgentSingleZonePlacement: {}
|
||||
IndividualDestinationZonePlacement: {}
|
||||
# Env Rules
|
||||
MaxStepsReached:
|
||||
max_steps: 10
|
||||
Collision:
|
||||
done_at_collisions: false
|
||||
DoorAutoClose:
|
||||
close_frequency: 10
|
||||
|
||||
# In "two rooms one door" scenario 2 agents spawn in 2 different rooms that are connected by a single door. Their aim
|
||||
# is to reach the destination in the room they didn't spawn in leading to a conflict at the door.
|
||||
Agents:
|
||||
Wolfgang:
|
||||
Actions:
|
||||
- Move8
|
||||
- Noop
|
||||
- DestAction
|
||||
- DoorUse
|
||||
- Move8
|
||||
- Noop
|
||||
- DestAction
|
||||
- DoorUse
|
||||
Observations:
|
||||
- Walls
|
||||
- Other
|
||||
- Doors
|
||||
- Destination
|
||||
- Walls
|
||||
- Other
|
||||
- Doors
|
||||
- Destination
|
||||
Sigmund:
|
||||
Actions:
|
||||
- Move8
|
||||
- Noop
|
||||
- DestAction
|
||||
- DoorUse
|
||||
- Move8
|
||||
- Noop
|
||||
- DestAction
|
||||
- DoorUse
|
||||
Observations:
|
||||
- Combined:
|
||||
- Other
|
||||
- Walls
|
||||
- Destination
|
||||
- Doors
|
||||
- Combined:
|
||||
- Other
|
||||
- Walls
|
||||
- Destination
|
||||
- Doors
|
||||
|
||||
Entities:
|
||||
Destinations: { }
|
||||
Doors: { }
|
||||
GlobalPositions: { }
|
||||
|
||||
Rules:
|
||||
# Environment Dynamics
|
||||
DoorAutoClose:
|
||||
close_frequency: 10
|
||||
|
||||
# Utilities
|
||||
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||
WatchCollisions:
|
||||
done_at_collisions: false
|
||||
|
||||
# Init
|
||||
AssignGlobalPositions: { }
|
||||
|
||||
# Done Conditions
|
||||
MaxStepsReached:
|
||||
max_steps: 10
|
||||
|
Reference in New Issue
Block a user