firs commit for our new MARL algorithms library, contains working implementations of IAC, SNAC and SEAC

2022-01-21 15:31:07 +01:00
parent 3e19970a60
commit ffc47752a7
24 changed files with 762 additions and 847 deletions
--- a/environments/factory/init.py
+++ b/environments/factory/init.py
@ -1,22 +1,25 @@
-def make(env_name, pomdp_r=2, max_steps=400, stack_n_frames=3, n_agents=1,  individual_rewards=False):
+def make(env_name, pomdp_r=2, max_steps=400, stack_n_frames=3, n_agents=1, individual_rewards=False):
    import yaml
    from pathlib import Path
    from environments.factory.combined_factories import DirtItemFactory
    from environments.factory.factory_item import ItemFactory, ItemProperties
-    from environments.factory.factory_dirt import DirtProperties, DirtFactory
-    from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
+    from environments.factory.factory_dirt import DirtProperties, DirtFactory, RewardsDirt
+    from environments.utility_classes import AgentRenderOptions

    with (Path(__file__).parent / 'levels' / 'parameters' / f'{env_name}.yaml').open('r') as stream:
        dictionary = yaml.load(stream, Loader=yaml.FullLoader)

-    obs_props = ObservationProperties(render_agents=AgentRenderOptions.COMBINED,
-                                      frames_to_stack=stack_n_frames, pomdp_r=pomdp_r)
+    obs_props = dict(render_agents=AgentRenderOptions.COMBINED,
+                     pomdp_r=pomdp_r,
+                     indicate_door_area=True,
+                     show_global_position_info=False,
+                     frames_to_stack=stack_n_frames)

-    factory_kwargs = dict(n_agents=n_agents, individual_rewards=individual_rewards,
-                          max_steps=max_steps, obs_prop=obs_props,
-                          mv_prop=MovementProperties(**dictionary['movement_props']),
-                          dirt_prop=DirtProperties(**dictionary['dirt_props']),
-                          record_episodes=False, verbose=False, **dictionary['factory_props']
+    factory_kwargs = dict(**dictionary,
+                          n_agents=n_agents,
+                          individual_rewards=individual_rewards,
+                          max_steps=max_steps,
+                          obs_prop=obs_props,
+                          verbose=False,
                          )
-
    return DirtFactory(**factory_kwargs).__enter__()