updated readme

2025-10-23 18:46:52 +02:00 · 2023-11-28 12:27:25 +01:00
parent c9ac92f044
commit 72a1c0a149
5 changed files with 157 additions and 58 deletions
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 Tackling emergent dysfunctions (EDYs) in cooperation with Fraunhofer-IKS
 ## Setup
-Just install this environment by `pip install marl-factory-grid`.
+Install this environment using `pip install marl-factory-grid`.
 ## First Steps
@@ -13,59 +13,157 @@ Most of the env. objects (entites, rules and assets) can be loaded automatically
 Just define what your environment needs in a *yaml*-configfile like:
 <details><summary>Example ConfigFile</summary>    
    # Default Configuration File
    General:
-    level_name: rooms
+      # RNG-seed to sample the same "random" numbers every time, to make the different runs comparable.
-    env_seed: 69
+      env_seed: 69
-    verbose: !!bool False
+      # Individual vs global rewards
-    pomdp_r: 5
+      individual_rewards: true
-    individual_rewards: !!bool True
+      # The level.txt file to load from marl_factory_grid/levels
-
+      level_name: large
-    Entities:
+      # View Radius; 0 = full observatbility
-        Defaults: {}
+      pomdp_r: 3
-        Doors:
+      # Print all messages and events
-            closed_on_init: True
+      verbose: false
-            auto_close_interval: 10
+      # Run tests
-            indicate_area: False
+      tests: false
-        Destinations: {}
+    
-
+    # Agents section defines the characteristics of different agents in the environment.
    # An Agent requires a list of actions and observations.
    # Possible actions: Noop, Charge, Clean, DestAction, DoorUse, ItemAction, MachineAction, Move8, Move4, North, NorthEast, ...
    # Possible observations: All, Combined, GlobalPosition, Battery, ChargePods, DirtPiles, Destinations, Doors, Items, Inventory, DropOffLocations, Maintainers, ...
    # You can use 'clone' as the agent name to have multiple instances with either a list of names or an int specifying the number of clones.
    Agents:
-        Wolfgang:
+      Wolfgang:
-            Actions:
+        Actions:
-                - Move8
+          - Noop
-                - Noop
+          - Charge
-                - DoorUse
+          - Clean
-                - ItemAction
+          - DestAction
-            Observations:
+          - DoorUse
-                - All
+          - ItemAction
-                - Placeholder
+          - Move8
-                - Walls
+        Observations:
-                - Items
+          - Combined:
-                - Placeholder
+              - Other
-                - Doors
+              - Walls
-                - Doors
+          - GlobalPosition
-        Armin:
+          - Battery
-            Actions:
+          - ChargePods
-                - Move4
+          - DirtPiles
-                - ItemAction
+          - Destinations
-                - DoorUse
+          - Doors
-            Observations:
+          - Items
-                - Combined:
+          - Inventory
-                    - Agent['Wolfgang']
+          - DropOffLocations
-                    - Walls
+          - Maintainers
-                    - Doors
+    
-                    - Items
+    # Entities section defines the initial parameters and behaviors of different entities in the environment.
    # Entities all spawn using coords_or_quantity, a number of entities or coordinates to place them.
    Entities:
      # Batteries: Entities representing power sources for agents.
      Batteries:
        initial_charge: 0.8
        per_action_costs: 0.02
      # ChargePods: Entities representing charging stations for Batteries.
      ChargePods:
        coords_or_quantity: 2
      # Destinations: Entities representing target locations for agents.
      # - spawn_mode: GROUPED or SINGLE. Determines how destinations are spawned.
      Destinations:
        coords_or_quantity: 1
        spawn_mode: GROUPED
      # DirtPiles: Entities representing piles of dirt.
      # - initial_amount: Initial amount of dirt in each pile.
      # - clean_amount: Amount of dirt cleaned in each cleaning action.
      # - dirt_spawn_r_var: Random variation in dirt spawn amounts.
      # - max_global_amount: Maximum total amount of dirt allowed in the environment.
      # - max_local_amount: Maximum amount of dirt allowed in one position.
      DirtPiles:
        coords_or_quantity: 10
        initial_amount: 2
        clean_amount: 1
        dirt_spawn_r_var: 0.1
        max_global_amount: 20
        max_local_amount: 5
      # Doors are spawned using the level map.
      Doors:
      # DropOffLocations: Entities representing locations where agents can drop off items.
      # - max_dropoff_storage_size: Maximum storage capacity at each drop-off location.
      DropOffLocations:
        coords_or_quantity: 1
        max_dropoff_storage_size: 0
      # GlobalPositions.
      GlobalPositions: { }
      # Inventories: Entities representing inventories for agents.
      Inventories: { }
      # Items: Entities representing items in the environment.
      Items:
        coords_or_quantity: 5
      # Machines: Entities representing machines in the environment.
      Machines:
        coords_or_quantity: 2
      # Maintainers: Entities representing maintainers that aim to maintain machines.
      Maintainers:
        coords_or_quantity: 1
      # Zones: Entities representing zones in the environment.
      Zones: { }
    # Rules section specifies the rules governing the dynamics of the environment.
    Rules:
-        Defaults: {}
+      # Environment Dynamics
-        WatchCollisions:
+      # When stepping over a dirt pile, entities carry a ratio of the dirt to their next position
-            done_at_collisions: !!bool True
+      EntitiesSmearDirtOnMove:
-        ItemRespawn:
+        smear_ratio: 0.2
-            spawn_freq: 5
+      # Doors automatically close after a certain number of time steps
-        DoorAutoClose: {}
+      DoorAutoClose:
        close_frequency: 10
      # Maintainers move at every time step
      MoveMaintainers:
      # Respawn Stuff
      # Define how dirt should respawn after the initial spawn
      RespawnDirt:
        respawn_freq: 15
      # Define how items should respawn after the initial spawn
      RespawnItems:
        respawn_freq: 15
      # Utilities
      # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
      # Can be omitted/ignored if you do not want to take care of collisions at all.
      WatchCollisions:
        done_at_collisions: false
      # Done Conditions
      # Define the conditions for the environment to stop. Either success or a fail conditions.
      # The environment stops when an agent reaches a destination
      DoneAtDestinationReach:
      # The environment stops when all dirt is cleaned
      DoneOnAllDirtCleaned:
      # The environment stops when a battery is discharged
      DoneAtBatteryDischarge:
      # The environment stops when a maintainer reports a collision
      DoneAtMaintainerCollision:
      # The environment stops after max steps
      DoneAtMaxStepsReached:
        max_steps: 500
    Assets:
    - Defaults
    - Items
    - Doors
   </details>
 Have a look in [\quickstart](./quickstart) for further configuration examples.
@@ -80,12 +178,11 @@ General:
    level_name: rooms  # 'double', 'large', 'simple', ...
 ```
 ... or create your own , maybe with the help of [asciiflow.com](https://asciiflow.com/#/).
-Make sure to use `#` as [Walls](marl_factory_grid/environment/entity/wall.py), `-` as free (walkable) [Floor](marl_factory_grid/environment/entity/wall.py)-Tiles, `D` for [Walls](./modules/doors/entities.py).
+Make sure to use `#` as [Walls](marl_factory_grid/environment/entity/wall.py), `-` as free (walkable) floor, `D` for [Walls](./modules/doors/entities.py).
 Other Entites (define you own) may bring their own `Symbols`
 #### Entites
-Entites, either [Objects](marl_factory_grid/environment/entity/object.py) for tracking stats 
+Entites are [Objects](marl_factory_grid/environment/entity/object.py) that can additionally be assigned a position.
 or env. [Entity](marl_factory_grid/environment/entity/entity.py) which can interact.
 Abstract Entities are provided.
 #### Groups
--- a/marl_factory_grid/configs/clean_and_bring.yaml
+++ b/marl_factory_grid/configs/clean_and_bring.yaml
@@ -8,7 +8,7 @@ General:
  # Radius of Partially observable Markov decision process
  pomdp_r: 3
  # Print all messages and events
-  verbose: True
+  verbose: true
  # Run tests
  tests: false
@@ -79,7 +79,8 @@ Rules:
    respawn_freq: 50
  # Utilities
-  # Define what happens on entity collisions
+  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
  # Can be omitted/ignored if you do not want to take care of collisions at all.
  WatchCollisions:
    done_at_collisions: false
--- a/marl_factory_grid/configs/default_config.yaml
+++ b/marl_factory_grid/configs/default_config.yaml
@@ -129,7 +129,8 @@ Rules:
    respawn_freq: 15
  # Utilities
-  # Define what happens on entity collisions
+  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
  # Can be omitted/ignored if you do not want to take care of collisions at all.
  WatchCollisions:
    done_at_collisions: false
--- a/marl_factory_grid/configs/eight_puzzle.yaml
+++ b/marl_factory_grid/configs/eight_puzzle.yaml
@@ -84,6 +84,6 @@ Rules:
    # On every step, should there be a reward for agets that reach their associated destination? No!
    dest_reach_reward: 0  # Do not touch. This is usefull in other settings!
    # Reward should only be given when all destiantions are reached in parallel!
-    condition: "simultanious"
+    condition: "simultaneous"
    # Reward if this is the case. Granted to each agent when all agents are at their target position simultaniously.
    reward_at_done: 1
--- a/marl_factory_grid/configs/two_rooms_one_door.yaml
+++ b/marl_factory_grid/configs/two_rooms_one_door.yaml
@@ -50,7 +50,7 @@ Rules:
    close_frequency: 10
  # Utilities
-  # Define what happens on entity collisions
+  # This rule defines the collision mechanic, introduces a related DoneCondition and lets you specify rewards.
  WatchCollisions:
    done_at_collisions: false