mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-10-23 18:46:52 +02:00
updated readme
This commit is contained in:
203
README.md
203
README.md
@@ -3,7 +3,7 @@
|
|||||||
Tackling emergent dysfunctions (EDYs) in cooperation with Fraunhofer-IKS
|
Tackling emergent dysfunctions (EDYs) in cooperation with Fraunhofer-IKS
|
||||||
|
|
||||||
## Setup
|
## Setup
|
||||||
Just install this environment by `pip install marl-factory-grid`.
|
Install this environment using `pip install marl-factory-grid`.
|
||||||
|
|
||||||
## First Steps
|
## First Steps
|
||||||
|
|
||||||
@@ -13,59 +13,157 @@ Most of the env. objects (entites, rules and assets) can be loaded automatically
|
|||||||
Just define what your environment needs in a *yaml*-configfile like:
|
Just define what your environment needs in a *yaml*-configfile like:
|
||||||
|
|
||||||
<details><summary>Example ConfigFile</summary>
|
<details><summary>Example ConfigFile</summary>
|
||||||
|
|
||||||
|
# Default Configuration File
|
||||||
|
|
||||||
General:
|
General:
|
||||||
level_name: rooms
|
# RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
|
||||||
env_seed: 69
|
env_seed: 69
|
||||||
verbose: !!bool False
|
# Individual vs global rewards
|
||||||
pomdp_r: 5
|
individual_rewards: true
|
||||||
individual_rewards: !!bool True
|
# The level.txt file to load from marl_factory_grid/levels
|
||||||
|
level_name: large
|
||||||
Entities:
|
# View Radius; 0 = full observatbility
|
||||||
Defaults: {}
|
pomdp_r: 3
|
||||||
Doors:
|
# Print all messages and events
|
||||||
closed_on_init: True
|
verbose: false
|
||||||
auto_close_interval: 10
|
# Run tests
|
||||||
indicate_area: False
|
tests: false
|
||||||
Destinations: {}
|
|
||||||
|
# Agents section defines the characteristics of different agents in the environment.
|
||||||
|
|
||||||
|
# An Agent requires a list of actions and observations.
|
||||||
|
# Possible actions: Noop, Charge, Clean, DestAction, DoorUse, ItemAction, MachineAction, Move8, Move4, North, NorthEast, ...
|
||||||
|
# Possible observations: All, Combined, GlobalPosition, Battery, ChargePods, DirtPiles, Destinations, Doors, Items, Inventory, DropOffLocations, Maintainers, ...
|
||||||
|
# You can use 'clone' as the agent name to have multiple instances with either a list of names or an int specifying the number of clones.
|
||||||
Agents:
|
Agents:
|
||||||
Wolfgang:
|
Wolfgang:
|
||||||
Actions:
|
Actions:
|
||||||
- Move8
|
- Noop
|
||||||
- Noop
|
- Charge
|
||||||
- DoorUse
|
- Clean
|
||||||
- ItemAction
|
- DestAction
|
||||||
Observations:
|
- DoorUse
|
||||||
- All
|
- ItemAction
|
||||||
- Placeholder
|
- Move8
|
||||||
- Walls
|
Observations:
|
||||||
- Items
|
- Combined:
|
||||||
- Placeholder
|
- Other
|
||||||
- Doors
|
- Walls
|
||||||
- Doors
|
- GlobalPosition
|
||||||
Armin:
|
- Battery
|
||||||
Actions:
|
- ChargePods
|
||||||
- Move4
|
- DirtPiles
|
||||||
- ItemAction
|
- Destinations
|
||||||
- DoorUse
|
- Doors
|
||||||
Observations:
|
- Items
|
||||||
- Combined:
|
- Inventory
|
||||||
- Agent['Wolfgang']
|
- DropOffLocations
|
||||||
- Walls
|
- Maintainers
|
||||||
- Doors
|
|
||||||
- Items
|
# Entities section defines the initial parameters and behaviors of different entities in the environment.
|
||||||
|
# Entities all spawn using coords_or_quantity, a number of entities or coordinates to place them.
|
||||||
|
Entities:
|
||||||
|
# Batteries: Entities representing power sources for agents.
|
||||||
|
Batteries:
|
||||||
|
initial_charge: 0.8
|
||||||
|
per_action_costs: 0.02
|
||||||
|
|
||||||
|
# ChargePods: Entities representing charging stations for Batteries.
|
||||||
|
ChargePods:
|
||||||
|
coords_or_quantity: 2
|
||||||
|
|
||||||
|
# Destinations: Entities representing target locations for agents.
|
||||||
|
# - spawn_mode: GROUPED or SINGLE. Determines how destinations are spawned.
|
||||||
|
Destinations:
|
||||||
|
coords_or_quantity: 1
|
||||||
|
spawn_mode: GROUPED
|
||||||
|
|
||||||
|
# DirtPiles: Entities representing piles of dirt.
|
||||||
|
# - initial_amount: Initial amount of dirt in each pile.
|
||||||
|
# - clean_amount: Amount of dirt cleaned in each cleaning action.
|
||||||
|
# - dirt_spawn_r_var: Random variation in dirt spawn amounts.
|
||||||
|
# - max_global_amount: Maximum total amount of dirt allowed in the environment.
|
||||||
|
# - max_local_amount: Maximum amount of dirt allowed in one position.
|
||||||
|
DirtPiles:
|
||||||
|
coords_or_quantity: 10
|
||||||
|
initial_amount: 2
|
||||||
|
clean_amount: 1
|
||||||
|
dirt_spawn_r_var: 0.1
|
||||||
|
max_global_amount: 20
|
||||||
|
max_local_amount: 5
|
||||||
|
|
||||||
|
# Doors are spawned using the level map.
|
||||||
|
Doors:
|
||||||
|
|
||||||
|
# DropOffLocations: Entities representing locations where agents can drop off items.
|
||||||
|
# - max_dropoff_storage_size: Maximum storage capacity at each drop-off location.
|
||||||
|
DropOffLocations:
|
||||||
|
coords_or_quantity: 1
|
||||||
|
max_dropoff_storage_size: 0
|
||||||
|
|
||||||
|
# GlobalPositions.
|
||||||
|
GlobalPositions: { }
|
||||||
|
|
||||||
|
# Inventories: Entities representing inventories for agents.
|
||||||
|
Inventories: { }
|
||||||
|
|
||||||
|
# Items: Entities representing items in the environment.
|
||||||
|
Items:
|
||||||
|
coords_or_quantity: 5
|
||||||
|
|
||||||
|
# Machines: Entities representing machines in the environment.
|
||||||
|
Machines:
|
||||||
|
coords_or_quantity: 2
|
||||||
|
|
||||||
|
# Maintainers: Entities representing maintainers that aim to maintain machines.
|
||||||
|
Maintainers:
|
||||||
|
coords_or_quantity: 1
|
||||||
|
|
||||||
|
# Zones: Entities representing zones in the environment.
|
||||||
|
Zones: { }
|
||||||
|
|
||||||
|
|
||||||
|
# Rules section specifies the rules governing the dynamics of the environment.
|
||||||
Rules:
|
Rules:
|
||||||
Defaults: {}
|
# Environment Dynamics
|
||||||
WatchCollisions:
|
# When stepping over a dirt pile, entities carry a ratio of the dirt to their next position
|
||||||
done_at_collisions: !!bool True
|
EntitiesSmearDirtOnMove:
|
||||||
ItemRespawn:
|
smear_ratio: 0.2
|
||||||
spawn_freq: 5
|
# Doors automatically close after a certain number of time steps
|
||||||
DoorAutoClose: {}
|
DoorAutoClose:
|
||||||
|
close_frequency: 10
|
||||||
|
# Maintainers move at every time step
|
||||||
|
MoveMaintainers:
|
||||||
|
|
||||||
|
# Respawn Stuff
|
||||||
|
# Define how dirt should respawn after the initial spawn
|
||||||
|
RespawnDirt:
|
||||||
|
respawn_freq: 15
|
||||||
|
# Define how items should respawn after the initial spawn
|
||||||
|
RespawnItems:
|
||||||
|
respawn_freq: 15
|
||||||
|
|
||||||
|
# Utilities
|
||||||
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
|
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||||
|
WatchCollisions:
|
||||||
|
done_at_collisions: false
|
||||||
|
|
||||||
|
# Done Conditions
|
||||||
|
# Define the conditions for the environment to stop. Either success or a fail conditions.
|
||||||
|
# The environment stops when an agent reaches a destination
|
||||||
|
DoneAtDestinationReach:
|
||||||
|
# The environment stops when all dirt is cleaned
|
||||||
|
DoneOnAllDirtCleaned:
|
||||||
|
# The environment stops when a battery is discharged
|
||||||
|
DoneAtBatteryDischarge:
|
||||||
|
# The environment stops when a maintainer reports a collision
|
||||||
|
DoneAtMaintainerCollision:
|
||||||
|
# The environment stops after max steps
|
||||||
|
DoneAtMaxStepsReached:
|
||||||
|
max_steps: 500
|
||||||
|
|
||||||
Assets:
|
|
||||||
- Defaults
|
|
||||||
- Items
|
|
||||||
- Doors
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
Have a look in [\quickstart](./quickstart) for further configuration examples.
|
Have a look in [\quickstart](./quickstart) for further configuration examples.
|
||||||
@@ -80,12 +178,11 @@ General:
|
|||||||
level_name: rooms # 'double', 'large', 'simple', ...
|
level_name: rooms # 'double', 'large', 'simple', ...
|
||||||
```
|
```
|
||||||
... or create your own , maybe with the help of [asciiflow.com](https://asciiflow.com/#/).
|
... or create your own , maybe with the help of [asciiflow.com](https://asciiflow.com/#/).
|
||||||
Make sure to use `#` as [Walls](marl_factory_grid/environment/entity/wall.py), `-` as free (walkable) [Floor](marl_factory_grid/environment/entity/wall.py)-Tiles, `D` for [Walls](./modules/doors/entities.py).
|
Make sure to use `#` as [Walls](marl_factory_grid/environment/entity/wall.py), `-` as free (walkable) floor, `D` for [Walls](./modules/doors/entities.py).
|
||||||
Other Entites (define you own) may bring their own `Symbols`
|
Other Entites (define you own) may bring their own `Symbols`
|
||||||
|
|
||||||
#### Entites
|
#### Entites
|
||||||
Entites, either [Objects](marl_factory_grid/environment/entity/object.py) for tracking stats
|
Entites are [Objects](marl_factory_grid/environment/entity/object.py) that can additionally be assigned a position.
|
||||||
or env. [Entity](marl_factory_grid/environment/entity/entity.py) which can interact.
|
|
||||||
Abstract Entities are provided.
|
Abstract Entities are provided.
|
||||||
|
|
||||||
#### Groups
|
#### Groups
|
||||||
|
@@ -8,7 +8,7 @@ General:
|
|||||||
# Radius of Partially observable Markov decision process
|
# Radius of Partially observable Markov decision process
|
||||||
pomdp_r: 3
|
pomdp_r: 3
|
||||||
# Print all messages and events
|
# Print all messages and events
|
||||||
verbose: True
|
verbose: true
|
||||||
# Run tests
|
# Run tests
|
||||||
tests: false
|
tests: false
|
||||||
|
|
||||||
@@ -79,7 +79,8 @@ Rules:
|
|||||||
respawn_freq: 50
|
respawn_freq: 50
|
||||||
|
|
||||||
# Utilities
|
# Utilities
|
||||||
# Define what happens on entity collisions
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
|
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||||
WatchCollisions:
|
WatchCollisions:
|
||||||
done_at_collisions: false
|
done_at_collisions: false
|
||||||
|
|
||||||
|
@@ -129,7 +129,8 @@ Rules:
|
|||||||
respawn_freq: 15
|
respawn_freq: 15
|
||||||
|
|
||||||
# Utilities
|
# Utilities
|
||||||
# Define what happens on entity collisions
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
|
# Can be omitted/ignored if you do not want to take care of collisions at all.
|
||||||
WatchCollisions:
|
WatchCollisions:
|
||||||
done_at_collisions: false
|
done_at_collisions: false
|
||||||
|
|
||||||
|
@@ -84,6 +84,6 @@ Rules:
|
|||||||
# On every step, should there be a reward for agets that reach their associated destination? No!
|
# On every step, should there be a reward for agets that reach their associated destination? No!
|
||||||
dest_reach_reward: 0 # Do not touch. This is usefull in other settings!
|
dest_reach_reward: 0 # Do not touch. This is usefull in other settings!
|
||||||
# Reward should only be given when all destiantions are reached in parallel!
|
# Reward should only be given when all destiantions are reached in parallel!
|
||||||
condition: "simultanious"
|
condition: "simultaneous"
|
||||||
# Reward if this is the case. Granted to each agent when all agents are at their target position simultaniously.
|
# Reward if this is the case. Granted to each agent when all agents are at their target position simultaniously.
|
||||||
reward_at_done: 1
|
reward_at_done: 1
|
||||||
|
@@ -50,7 +50,7 @@ Rules:
|
|||||||
close_frequency: 10
|
close_frequency: 10
|
||||||
|
|
||||||
# Utilities
|
# Utilities
|
||||||
# Define what happens on entity collisions
|
# This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
|
||||||
WatchCollisions:
|
WatchCollisions:
|
||||||
done_at_collisions: false
|
done_at_collisions: false
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user