major redesign ob observations and entittes

2025-12-25 07:56:06 +01:00 · 2023-06-09 14:04:17 +02:00
parent 901fbcbc32
commit c552c35f66
161 changed files with 4458 additions and 4163 deletions
--- a/quickstart/all_test_config.yaml
+++ b/quickstart/all_test_config.yaml
@@ -0,0 +1,111 @@
+---
+General:
+  level_name: large
+  env_seed: 69
+  verbose: !!bool False
+  pomdp_r: 3
+  individual_rewards: !!bool True
+
+Entities:
+  Defaults: {}
+  DirtPiles:
+      initial_dirt_ratio: 0.01          # On INIT, on max how many tiles does the dirt spawn in percent.
+      dirt_spawn_r_var: 0.5             # How much does the dirt spawn amount vary?
+      initial_amount: 1
+      max_local_amount: 3               # Max dirt amount per tile.
+      max_global_amount: 30             # Max dirt amount in the whole environment.
+  Doors:
+      closed_on_init: True
+      auto_close_interval: 10
+      indicate_area: False
+  Batteries: {}
+  ChargePods: {}
+  Destinations: {}
+  ReachedDestinations: {}
+  Items: {}
+  Inventories: {}
+  DropOffLocations: {}
+
+Agents:
+  Wolfgang:
+    Actions:
+      - Move8
+      - DoorUse
+      - CleanUp
+    Observations:
+      - Self
+      - Placeholder
+      - Walls
+      - DirtPiles
+      - Placeholder
+      - Doors
+      - Doors
+  Bjoern:
+    Actions:
+      # Move4, Noop
+      - Move8
+      - DoorUse
+      - ItemAction
+    Observations:
+      - Defaults
+      - Combined:
+          - Other
+          - Walls
+      - Items
+      - Inventory
+  Karl-Heinz:
+    Actions:
+      - Move8
+      - DoorUse
+    Observations:
+      # Wall, Only Other Agents
+      - Defaults
+      - Combined:
+          - Other
+          - Self
+          - Walls
+          - Doors
+      - Destinations
+  Manfred:
+    Actions:
+      - Move8
+      - ItemAction
+      - DoorUse
+      - CleanUp
+      - DestAction
+      - BtryCharge
+    Observations:
+      - Defaults
+      - Battery
+      - Destinations
+      - DirtPiles
+      - Doors
+      - Items
+      - Inventory
+      - DropOffLocations
+Rules:
+  Defaults: {}
+  Collision:
+    done_at_collisions: !!bool False
+  DirtRespawnRule:
+    spawn_freq: 15
+  DirtSmearOnMove:
+    smear_amount: 0.12
+  DoorAutoClose: {}
+  DirtAllCleanDone: {}
+  Btry: {}
+  BtryDoneAtDischarge: {}
+  DestinationReach: {}
+  DestinationSpawn: {}
+  DestinationDone: {}
+  ItemRules: {}
+
+Assets:
+  - Defaults
+  - Dirt
+  - Door
+  - Machine
+  - Item
+  - Destination
+  - DropOffLocation
+  - Chargepod
--- a/quickstart/combine_and_monitor_rerun.py
+++ b/quickstart/combine_and_monitor_rerun.py
@@ -134,7 +134,7 @@ if __name__ == '__main__':
                    else:
                        assert combined_env_kwargs[key] == val, "Check the combinations you try to make!"

-            # Update and combine all kwargs to account for multiple agents etc.
+            # Update and combine all kwargs to account for multiple agent etc.
            # We cannot capture all configuration cases!
            for key, val in factory_kwargs.items():
                if key not in combined_env_kwargs:
--- a/quickstart/default_config.yaml
+++ b/quickstart/default_config.yaml
@@ -0,0 +1,68 @@
+---
+General:
+  level_name: rooms
+  env_seed: 69
+  verbose: !!bool False
+  pomdp_r: 5
+  individual_rewards: !!bool True
+
+Entities:
+  Defaults: {}
+  DirtPiles:
+      initial_dirt_ratio: 0.3           # On INIT, on max how many tiles does the dirt spawn in percent.
+      dirt_spawn_r_var: 0.05    # How much does the dirt spawn amount vary?
+      initial_amount: 3
+      max_local_amount: 5               # Max dirt amount per tile.
+      max_global_amount: 20             # Max dirt amount in the whole environment.
+  Doors:
+      closed_on_init: True
+      auto_close_interval: 10
+      indicate_area: False
+Agents:
+  Wolfgang:
+    Actions:
+      - Move8
+      - Noop
+      - DoorUse
+      - CleanUp
+    Observations:
+      - Self
+      - Placeholder
+      - Walls
+      - DirtPiles
+      - Placeholder
+      - Doors
+      - Doors
+  Björn:
+    Actions:
+      # Move4, Noop
+      - Move4
+      - DoorUse
+      - CleanUp
+    Observations:
+      - Defaults
+      - Combined
+  Jürgen:
+    Actions:
+      # Move4, Noop
+      - Defaults
+      - DoorUse
+      - CleanUp
+    Observations:
+      - Walls
+      - Placeholder
+      - Agent[Björn]
+Rules:
+  Defaults: {}
+  Collision:
+    done_at_collisions: !!bool False
+  DirtRespawnRule:
+    spawn_freq: 5
+  DirtSmearOnMove:
+    smear_amount: 0.12
+  DoorAutoClose: {}
+  DirtAllCleanDone: {}
+Assets:
+  - Defaults
+  - Dirt
+  - Doors
--- a/quickstart/single_agent_train_battery_target_env.py
+++ b/quickstart/single_agent_train_battery_target_env.py
@@ -55,7 +55,7 @@ if __name__ == '__main__':
    # Define a global studi save path
    start_time = int(time.time())
    study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
-    # Create an identifier, which is unique for every combination and easy to read in filesystem
+    # Create an _identifier, which is unique for every combination and easy to read in filesystem
    identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}'
    exp_path = study_root_path / identifier

@@ -67,9 +67,9 @@ if __name__ == '__main__':
    #  'ObservationProperties' are for specifying how the agent sees the env.
    obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,  # Agents won`t be shown in the obs at all
                                      omit_agent_self=True,                  # This is default
-                                      additional_agent_placeholder=None,     # We will not take care of future agents
+                                      additional_agent_placeholder=None,     # We will not take care of future agent
                                      frames_to_stack=3,                     # To give the agent a notion of time
-                                      pomdp_r=2                              # the agents view-radius
+                                      pomdp_r=2                              # the agent view-radius
                                      )
    #  'MovementProperties' are for specifying how the agent is allowed to move in the env.
    move_props = MovementProperties(allow_diagonal_movement=True,   # Euclidean style (vertices)
@@ -169,7 +169,7 @@ if __name__ == '__main__':
    # Evaluation starts here #####################################################
    # First Iterate over every model and monitor "as trained"
    print('Start Measurement Tracking')
-    # For trained policy in study_root_path / identifier
+    # For trained policy in study_root_path / _identifier
    for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]:

        # retrieve model class
--- a/quickstart/single_agent_train_dest_env.py
+++ b/quickstart/single_agent_train_dest_env.py
@@ -54,7 +54,7 @@ if __name__ == '__main__':
    # Define a global studi save path
    start_time = int(time.time())
    study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
-    # Create an identifier, which is unique for every combination and easy to read in filesystem
+    # Create an _identifier, which is unique for every combination and easy to read in filesystem
    identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}'
    exp_path = study_root_path / identifier

@@ -66,9 +66,9 @@ if __name__ == '__main__':
    #  'ObservationProperties' are for specifying how the agent sees the env.
    obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,  # Agents won`t be shown in the obs at all
                                      omit_agent_self=True,                  # This is default
-                                      additional_agent_placeholder=None,     # We will not take care of future agents
+                                      additional_agent_placeholder=None,     # We will not take care of future agent
                                      frames_to_stack=3,                     # To give the agent a notion of time
-                                      pomdp_r=2                              # the agents view-radius
+                                      pomdp_r=2                              # the agent view-radius
                                      )
    #  'MovementProperties' are for specifying how the agent is allowed to move in the env.
    move_props = MovementProperties(allow_diagonal_movement=True,   # Euclidean style (vertices)
@@ -159,7 +159,7 @@ if __name__ == '__main__':
    # Evaluation starts here #####################################################
    # First Iterate over every model and monitor "as trained"
    print('Start Measurement Tracking')
-    # For trained policy in study_root_path / identifier
+    # For trained policy in study_root_path / _identifier
    for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]:

        # retrieve model class
--- a/quickstart/single_agent_train_dirt_env.py
+++ b/quickstart/single_agent_train_dirt_env.py
@@ -54,7 +54,7 @@ if __name__ == '__main__':
    # Define a global studi save path
    start_time = int(time.time())
    study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
-    # Create an identifier, which is unique for every combination and easy to read in filesystem
+    # Create an _identifier, which is unique for every combination and easy to read in filesystem
    identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}'
    exp_path = study_root_path / identifier

@@ -66,9 +66,9 @@ if __name__ == '__main__':
    #  'ObservationProperties' are for specifying how the agent sees the env.
    obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,  # Agents won`t be shown in the obs at all
                                      omit_agent_self=True,                  # This is default
-                                      additional_agent_placeholder=None,     # We will not take care of future agents
+                                      additional_agent_placeholder=None,     # We will not take care of future agent
                                      frames_to_stack=3,                     # To give the agent a notion of time
-                                      pomdp_r=2                              # the agents' view-radius
+                                      pomdp_r=2                              # the agent' view-radius
                                      )
    #  'MovementProperties' are for specifying how the agent is allowed to move in the env.
    move_props = MovementProperties(allow_diagonal_movement=True,   # Euclidean style (vertices)
@@ -161,7 +161,7 @@ if __name__ == '__main__':
    # Evaluation starts here #####################################################
    # First Iterate over every model and monitor "as trained"
    print('Start Measurement Tracking')
-    # For trained policy in study_root_path / identifier
+    # For trained policy in study_root_path / _identifier
    for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]:

        # retrieve model class
--- a/quickstart/single_agent_train_item_env.py
+++ b/quickstart/single_agent_train_item_env.py
@@ -54,7 +54,7 @@ if __name__ == '__main__':
    # Define a global studi save path
    start_time = int(time.time())
    study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
-    # Create an identifier, which is unique for every combination and easy to read in filesystem
+    # Create an _identifier, which is unique for every combination and easy to read in filesystem
    identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}'
    exp_path = study_root_path / identifier

@@ -65,9 +65,9 @@ if __name__ == '__main__':
    #  'ObservationProperties' are for specifying how the agent sees the env.
    obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,  # Agents won`t be shown in the obs at all
                                      omit_agent_self=True,                  # This is default
-                                      additional_agent_placeholder=None,     # We will not take care of future agents
+                                      additional_agent_placeholder=None,     # We will not take care of future agent
                                      frames_to_stack=3,                     # To give the agent a notion of time
-                                      pomdp_r=2                              # the agents view-radius
+                                      pomdp_r=2                              # the agent view-radius
                                      )
    #  'MovementProperties' are for specifying how the agent is allowed to move in the env.
    move_props = MovementProperties(allow_diagonal_movement=True,   # Euclidean style (vertices)
@@ -157,7 +157,7 @@ if __name__ == '__main__':
    # Evaluation starts here #####################################################
    # First Iterate over every model and monitor "as trained"
    print('Start Measurement Tracking')
-    # For trained policy in study_root_path / identifier
+    # For trained policy in study_root_path / _identifier
    for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]:

        # retrieve model class