From 6a24e7b518da0e0aed15663a43b022b7f037169e Mon Sep 17 00:00:00 2001 From: Steffen Illium Date: Thu, 4 Aug 2022 14:57:48 +0200 Subject: [PATCH] Adjustments and Documentation, recording and new environments, refactoring --- environments/factory/__init__.py | 7 +- environments/factory/additional/__init__.py | 0 .../factory/additional/btry/__init__.py | 0 .../additional/btry/btry_collections.py | 50 ++++ .../factory/additional/btry/btry_objects.py | 67 +++++ .../factory/additional/btry/btry_util.py | 30 ++ .../additional/btry/factory_battery.py | 139 +++++++++ .../{ => additional}/combined_factories.py | 19 +- .../factory/additional/dest/__init__.py | 0 .../additional/dest/dest_collections.py | 44 +++ .../factory/additional/dest/dest_enitites.py | 44 +++ .../factory/additional/dest/dest_util.py | 41 +++ .../{ => additional/dest}/factory_dest.py | 146 ++-------- .../factory/additional/dirt/__init__.py | 0 .../additional/dirt/dirt_collections.py | 42 +++ .../factory/additional/dirt/dirt_entity.py | 26 ++ .../factory/additional/dirt/dirt_util.py | 30 ++ .../{ => additional/dirt}/factory_dirt.py | 114 +------- .../factory/additional/item/__init__.py | 0 .../{ => additional/item}/factory_item.py | 173 +---------- .../additional/item/item_collections.py | 87 ++++++ .../factory/additional/item/item_entities.py | 61 ++++ .../factory/additional/item/item_util.py | 31 ++ environments/factory/base/base_factory.py | 22 +- environments/factory/base/objects.py | 1 - environments/factory/base/registers.py | 2 +- environments/factory/factory_battery.py | 273 ------------------ .../factory_dirt_stationary_machines.py | 9 +- environments/helpers.py | 39 ++- environments/logging/envmonitor.py | 4 +- environments/logging/recorder.py | 51 ++-- environments/utility_classes.py | 17 +- plotting/compare_runs.py | 9 +- quickstart/combine_and_monitor_rerun.py | 187 ++++++++++++ .../single_agent_train_battery_target_env.py | 203 +++++++++++++ quickstart/single_agent_train_dest_env.py | 193 +++++++++++++ quickstart/single_agent_train_dirt_env.py | 45 +-- quickstart/single_agent_train_item_env.py | 191 ++++++++++++ reload_agent.py | 8 +- studies/e_1.py | 6 +- studies/single_run_with_export.py | 9 +- 41 files changed, 1660 insertions(+), 760 deletions(-) create mode 100644 environments/factory/additional/__init__.py create mode 100644 environments/factory/additional/btry/__init__.py create mode 100644 environments/factory/additional/btry/btry_collections.py create mode 100644 environments/factory/additional/btry/btry_objects.py create mode 100644 environments/factory/additional/btry/btry_util.py create mode 100644 environments/factory/additional/btry/factory_battery.py rename environments/factory/{ => additional}/combined_factories.py (78%) create mode 100644 environments/factory/additional/dest/__init__.py create mode 100644 environments/factory/additional/dest/dest_collections.py create mode 100644 environments/factory/additional/dest/dest_enitites.py create mode 100644 environments/factory/additional/dest/dest_util.py rename environments/factory/{ => additional/dest}/factory_dest.py (63%) create mode 100644 environments/factory/additional/dirt/__init__.py create mode 100644 environments/factory/additional/dirt/dirt_collections.py create mode 100644 environments/factory/additional/dirt/dirt_entity.py create mode 100644 environments/factory/additional/dirt/dirt_util.py rename environments/factory/{ => additional/dirt}/factory_dirt.py (71%) create mode 100644 environments/factory/additional/item/__init__.py rename environments/factory/{ => additional/item}/factory_item.py (59%) create mode 100644 environments/factory/additional/item/item_collections.py create mode 100644 environments/factory/additional/item/item_entities.py create mode 100644 environments/factory/additional/item/item_util.py delete mode 100644 environments/factory/factory_battery.py create mode 100644 quickstart/combine_and_monitor_rerun.py create mode 100644 quickstart/single_agent_train_battery_target_env.py create mode 100644 quickstart/single_agent_train_dest_env.py create mode 100644 quickstart/single_agent_train_item_env.py diff --git a/environments/factory/__init__.py b/environments/factory/__init__.py index a34815a..55211ad 100644 --- a/environments/factory/__init__.py +++ b/environments/factory/__init__.py @@ -2,8 +2,11 @@ def make(env_name, pomdp_r=2, max_steps=400, stack_n_frames=3, n_agents=1, indiv import yaml from pathlib import Path from environments.factory.combined_factories import DirtItemFactory - from environments.factory.factory_item import ItemFactory, ItemProperties - from environments.factory.factory_dirt import DirtProperties, DirtFactory, RewardsDirt + from environments.factory.factory_item import ItemFactory + from environments.factory.additional.item.item_util import ItemProperties + from environments.factory.factory_dirt import DirtFactory + from environments.factory.dirt_util import DirtProperties + from environments.factory.dirt_util import RewardsDirt from environments.utility_classes import AgentRenderOptions with (Path(__file__).parent / 'levels' / 'parameters' / f'{env_name}.yaml').open('r') as stream: diff --git a/environments/factory/additional/__init__.py b/environments/factory/additional/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/environments/factory/additional/btry/__init__.py b/environments/factory/additional/btry/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/environments/factory/additional/btry/btry_collections.py b/environments/factory/additional/btry/btry_collections.py new file mode 100644 index 0000000..350324e --- /dev/null +++ b/environments/factory/additional/btry/btry_collections.py @@ -0,0 +1,50 @@ +from environments.factory.additional.btry.btry_objects import Battery, ChargePod +from environments.factory.base.registers import EnvObjectCollection, EntityCollection + + +class BatteriesRegister(EnvObjectCollection): + + _accepted_objects = Battery + + def __init__(self, *args, **kwargs): + super(BatteriesRegister, self).__init__(*args, individual_slices=True, + is_blocking_light=False, can_be_shadowed=False, **kwargs) + self.is_observable = True + + def spawn_batteries(self, agents, initial_charge_level): + batteries = [self._accepted_objects(initial_charge_level, agent, self) for _, agent in enumerate(agents)] + self.add_additional_items(batteries) + + def summarize_states(self, n_steps=None): + # as dict with additional nesting + # return dict(items=super(Inventories, cls).summarize_states()) + return super(BatteriesRegister, self).summarize_states(n_steps=n_steps) + + # Todo Move this to Mixin! + def by_entity(self, entity): + try: + return next((x for x in self if x.belongs_to_entity(entity))) + except StopIteration: + return None + + def idx_by_entity(self, entity): + try: + return next((idx for idx, x in enumerate(self) if x.belongs_to_entity(entity))) + except StopIteration: + return None + + def as_array_by_entity(self, entity): + return self._array[self.idx_by_entity(entity)] + + +class ChargePods(EntityCollection): + + _accepted_objects = ChargePod + + def __repr__(self): + super(ChargePods, self).__repr__() + + def summarize_states(self, n_steps=None): + # as dict with additional nesting + # return dict(items=super(Inventories, cls).summarize_states()) + return super(ChargePods, self).summarize_states(n_steps=n_steps) diff --git a/environments/factory/additional/btry/btry_objects.py b/environments/factory/additional/btry/btry_objects.py new file mode 100644 index 0000000..36cae74 --- /dev/null +++ b/environments/factory/additional/btry/btry_objects.py @@ -0,0 +1,67 @@ +from environments import helpers as h +from environments.factory.base.objects import BoundingMixin, EnvObject, Entity +from environments.factory.additional.btry.btry_util import Constants as c + + +class Battery(BoundingMixin, EnvObject): + + @property + def is_discharged(self): + return self.charge_level == 0 + + def __init__(self, initial_charge_level: float, *args, **kwargs): + super(Battery, self).__init__(*args, **kwargs) + self.charge_level = initial_charge_level + + def encoding(self): + return self.charge_level + + def do_charge_action(self, amount): + if self.charge_level < 1: + # noinspection PyTypeChecker + self.charge_level = min(1, amount + self.charge_level) + return c.VALID + else: + return c.NOT_VALID + + def decharge(self, amount) -> c: + if self.charge_level != 0: + # noinspection PyTypeChecker + self.charge_level = max(0, amount + self.charge_level) + self._collection.notify_change_to_value(self) + return c.VALID + else: + return c.NOT_VALID + + def summarize_state(self, **_): + attr_dict = {key: str(val) for key, val in self.__dict__.items() if not key.startswith('_') and key != 'data'} + attr_dict.update(dict(name=self.name)) + return attr_dict + + +class ChargePod(Entity): + + @property + def encoding(self): + return c.CHARGE_POD + + def __init__(self, *args, charge_rate: float = 0.4, + multi_charge: bool = False, **kwargs): + super(ChargePod, self).__init__(*args, **kwargs) + self.charge_rate = charge_rate + self.multi_charge = multi_charge + + def charge_battery(self, battery: Battery): + if battery.charge_level == 1.0: + return c.NOT_VALID + if sum(guest for guest in self.tile.guests if 'agent' in guest.name.lower()) > 1: + return c.NOT_VALID + valid = battery.do_charge_action(self.charge_rate) + return valid + + def summarize_state(self, n_steps=None) -> dict: + if n_steps == h.STEPS_START: + summary = super().summarize_state(n_steps=n_steps) + return summary + else: + {} diff --git a/environments/factory/additional/btry/btry_util.py b/environments/factory/additional/btry/btry_util.py new file mode 100644 index 0000000..e619be1 --- /dev/null +++ b/environments/factory/additional/btry/btry_util.py @@ -0,0 +1,30 @@ +from typing import NamedTuple, Union + +from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions + + +class Constants(BaseConstants): + # Battery Env + CHARGE_PODS = 'Charge_Pod' + BATTERIES = 'BATTERIES' + BATTERY_DISCHARGED = 'DISCHARGED' + CHARGE_POD = 1 + + +class Actions(BaseActions): + CHARGE = 'do_charge_action' + + +class RewardsBtry(NamedTuple): + CHARGE_VALID: float = 0.1 + CHARGE_FAIL: float = -0.1 + BATTERY_DISCHARGED: float = -1.0 + + +class BatteryProperties(NamedTuple): + initial_charge: float = 0.8 # + charge_rate: float = 0.4 # + charge_locations: int = 20 # + per_action_costs: Union[dict, float] = 0.02 + done_when_discharged: bool = False + multi_charge: bool = False diff --git a/environments/factory/additional/btry/factory_battery.py b/environments/factory/additional/btry/factory_battery.py new file mode 100644 index 0000000..3271c6e --- /dev/null +++ b/environments/factory/additional/btry/factory_battery.py @@ -0,0 +1,139 @@ +from typing import Dict, List + +import numpy as np + +from environments.factory.additional.btry.btry_collections import BatteriesRegister, ChargePods +from environments.factory.additional.btry.btry_util import Constants, Actions, RewardsBtry, BatteryProperties +from environments.factory.base.base_factory import BaseFactory +from environments.factory.base.objects import Agent, Action +from environments.factory.base.renderer import RenderEntity + +c = Constants +a = Actions + + +class BatteryFactory(BaseFactory): + + def __init__(self, *args, btry_prop=BatteryProperties(), rewards_btry: RewardsBtry = RewardsBtry(), + **kwargs): + if isinstance(btry_prop, dict): + btry_prop = BatteryProperties(**btry_prop) + if isinstance(rewards_btry, dict): + rewards_btry = RewardsBtry(**rewards_btry) + self.btry_prop = btry_prop + self.rewards_dest = rewards_btry + super().__init__(*args, **kwargs) + + def per_agent_raw_observations_hook(self, agent) -> Dict[str, np.typing.ArrayLike]: + additional_raw_observations = super().per_agent_raw_observations_hook(agent) + additional_raw_observations.update({c.BATTERIES: self[c.BATTERIES].as_array_by_entity(agent)}) + return additional_raw_observations + + def observations_hook(self) -> Dict[str, np.typing.ArrayLike]: + additional_observations = super().observations_hook() + additional_observations.update({c.CHARGE_PODS: self[c.CHARGE_PODS].as_array()}) + return additional_observations + + @property + def entities_hook(self): + super_entities = super().entities_hook + + empty_tiles = self[c.FLOOR].empty_tiles[:self.btry_prop.charge_locations] + charge_pods = ChargePods.from_tiles( + empty_tiles, self._level_shape, + entity_kwargs=dict(charge_rate=self.btry_prop.charge_rate, + multi_charge=self.btry_prop.multi_charge) + ) + + batteries = BatteriesRegister(self._level_shape if not self._pomdp_r else ((self.pomdp_diameter,) * 2), + ) + batteries.spawn_batteries(self[c.AGENT], self.btry_prop.initial_charge) + super_entities.update({c.BATTERIES: batteries, c.CHARGE_PODS: charge_pods}) + return super_entities + + def step_hook(self) -> (List[dict], dict): + super_reward_info = super(BatteryFactory, self).step_hook() + + # Decharge + batteries = self[c.BATTERIES] + + for agent in self[c.AGENT]: + if isinstance(self.btry_prop.per_action_costs, dict): + energy_consumption = self.btry_prop.per_action_costs[agent.temp_action] + else: + energy_consumption = self.btry_prop.per_action_costs + + batteries.by_entity(agent).decharge(energy_consumption) + + return super_reward_info + + def do_charge_action(self, agent) -> (dict, dict): + if charge_pod := self[c.CHARGE_PODS].by_pos(agent.pos): + valid = charge_pod.charge_battery(self[c.BATTERIES].by_entity(agent)) + if valid: + info_dict = {f'{agent.name}_{a.CHARGE}_VALID': 1} + self.print(f'{agent.name} just charged batteries at {charge_pod.name}.') + else: + info_dict = {f'{agent.name}_{a.CHARGE}_FAIL': 1} + self.print(f'{agent.name} failed to charged batteries at {charge_pod.name}.') + else: + valid = c.NOT_VALID + info_dict = {f'{agent.name}_{a.CHARGE}_FAIL': 1} + # info_dict = {f'{agent.name}_no_charger': 1} + self.print(f'{agent.name} failed to charged batteries at {agent.pos}.') + reward = dict(value=self.rewards_dest.CHARGE_VALID if valid else self.rewards_dest.CHARGE_FAIL, + reason=a.CHARGE, info=info_dict) + return valid, reward + + def do_additional_actions(self, agent: Agent, action: Action) -> (bool, dict): + action_result = super().do_additional_actions(agent, action) + if action_result is None: + if action == a.CHARGE: + action_result = self.do_charge_action(agent) + return action_result + else: + return None + else: + return action_result + pass + + def reset_hook(self) -> (List[dict], dict): + super_reward_info = super(BatteryFactory, self).reset_hook() + # There is Nothing to reset. + return super_reward_info + + def check_additional_done(self) -> (bool, dict): + super_done, super_dict = super(BatteryFactory, self).check_additional_done() + if super_done: + return super_done, super_dict + else: + if self.btry_prop.done_when_discharged: + if btry_done := any(battery.is_discharged for battery in self[c.BATTERIES]): + super_dict.update(DISCHARGE_DONE=1) + return btry_done, super_dict + else: + pass + else: + pass + return super_done, super_dict + + def per_agent_reward_hook(self, agent: Agent) -> List[dict]: + reward_event_list = super(BatteryFactory, self).per_agent_reward_hook(agent) + if self[c.BATTERIES].by_entity(agent).is_discharged: + self.print(f'{agent.name} Battery is discharged!') + info_dict = {f'{agent.name}_{c.BATTERY_DISCHARGED}': 1} + reward_event_list.append({'value': self.rewards_dest.BATTERY_DISCHARGED, + 'reason': c.BATTERY_DISCHARGED, + 'info': info_dict} + ) + else: + # All Fine + pass + return reward_event_list + + def render_assets_hook(self): + # noinspection PyUnresolvedReferences + additional_assets = super().render_assets_hook() + charge_pods = [RenderEntity(c.CHARGE_PODS, charge_pod.tile.pos) for charge_pod in self[c.CHARGE_PODS]] + additional_assets.extend(charge_pods) + return additional_assets diff --git a/environments/factory/combined_factories.py b/environments/factory/additional/combined_factories.py similarity index 78% rename from environments/factory/combined_factories.py rename to environments/factory/additional/combined_factories.py index 8451b45..1c43488 100644 --- a/environments/factory/combined_factories.py +++ b/environments/factory/additional/combined_factories.py @@ -1,12 +1,15 @@ import random -from environments.factory.factory_battery import BatteryFactory, BatteryProperties -from environments.factory.factory_dest import DestFactory -from environments.factory.factory_dirt import DirtFactory, DirtProperties -from environments.factory.factory_item import ItemFactory - # noinspection PyAbstractClass +from environments.factory.additional.btry.btry_util import BatteryProperties +from environments.factory.additional.btry.factory_battery import BatteryFactory +from environments.factory.additional.dest.factory_dest import DestFactory +from environments.factory.additional.dirt.dirt_util import DirtProperties +from environments.factory.additional.dirt.factory_dirt import DirtFactory +from environments.factory.additional.item.factory_item import ItemFactory + + class DirtItemFactory(ItemFactory, DirtFactory): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -24,6 +27,12 @@ class DirtDestItemFactory(ItemFactory, DirtFactory, DestFactory): super().__init__(*args, **kwargs) +# noinspection PyAbstractClass +class DestBatteryFactory(BatteryFactory, DestFactory): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + if __name__ == '__main__': from environments.utility_classes import AgentRenderOptions as ARO, ObservationProperties diff --git a/environments/factory/additional/dest/__init__.py b/environments/factory/additional/dest/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/environments/factory/additional/dest/dest_collections.py b/environments/factory/additional/dest/dest_collections.py new file mode 100644 index 0000000..6919898 --- /dev/null +++ b/environments/factory/additional/dest/dest_collections.py @@ -0,0 +1,44 @@ +from environments.factory.base.registers import EntityCollection +from environments.factory.additional.dest.dest_util import Constants as c +from environments.factory.additional.dest.dest_enitites import Destination + + +class Destinations(EntityCollection): + + _accepted_objects = Destination + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.is_blocking_light = False + self.can_be_shadowed = False + + def as_array(self): + self._array[:] = c.FREE_CELL + # ToDo: Switch to new Style Array Put + # indices = list(zip(range(len(cls)), *zip(*[x.pos for x in cls]))) + # np.put(cls._array, [np.ravel_multi_index(x, cls._array.shape) for x in indices], cls.encodings) + for item in self: + if item.pos != c.NO_POS: + self._array[0, item.x, item.y] = item.encoding + return self._array + + def __repr__(self): + return super(Destinations, self).__repr__() + + def summarize_states(self, n_steps=None): + return {} + + +class ReachedDestinations(Destinations): + _accepted_objects = Destination + + def __init__(self, *args, **kwargs): + super(ReachedDestinations, self).__init__(*args, **kwargs) + self.can_be_shadowed = False + self.is_blocking_light = False + + def summarize_states(self, n_steps=None): + return {} + + def __repr__(self): + return super(ReachedDestinations, self).__repr__() diff --git a/environments/factory/additional/dest/dest_enitites.py b/environments/factory/additional/dest/dest_enitites.py new file mode 100644 index 0000000..5050a6d --- /dev/null +++ b/environments/factory/additional/dest/dest_enitites.py @@ -0,0 +1,44 @@ +from collections import defaultdict + +from environments.factory.base.objects import Entity, Agent +from environments.factory.additional.dest.dest_util import Constants as c + + +class Destination(Entity): + + @property + def any_agent_has_dwelled(self): + return bool(len(self._per_agent_times)) + + @property + def currently_dwelling_names(self): + return self._per_agent_times.keys() + + @property + def encoding(self): + return c.DESTINATION + + def __init__(self, *args, dwell_time: int = 0, **kwargs): + super(Destination, self).__init__(*args, **kwargs) + self.dwell_time = dwell_time + self._per_agent_times = defaultdict(lambda: dwell_time) + + def do_wait_action(self, agent: Agent): + self._per_agent_times[agent.name] -= 1 + return c.VALID + + def leave(self, agent: Agent): + del self._per_agent_times[agent.name] + + @property + def is_considered_reached(self): + agent_at_position = any(c.AGENT.lower() in x.name.lower() for x in self.tile.guests_that_can_collide) + return (agent_at_position and not self.dwell_time) or any(x == 0 for x in self._per_agent_times.values()) + + def agent_is_dwelling(self, agent: Agent): + return self._per_agent_times[agent.name] < self.dwell_time + + def summarize_state(self, n_steps=None) -> dict: + state_summary = super().summarize_state(n_steps=n_steps) + state_summary.update(per_agent_times=self._per_agent_times) + return state_summary diff --git a/environments/factory/additional/dest/dest_util.py b/environments/factory/additional/dest/dest_util.py new file mode 100644 index 0000000..0746543 --- /dev/null +++ b/environments/factory/additional/dest/dest_util.py @@ -0,0 +1,41 @@ +from typing import NamedTuple + +from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions + + +class Constants(BaseConstants): + # Destination Env + DEST = 'Destination' + DESTINATION = 1 + DESTINATION_DONE = 0.5 + DEST_REACHED = 'ReachedDestination' + + +class Actions(BaseActions): + WAIT_ON_DEST = 'WAIT' + + +class RewardsDest(NamedTuple): + + WAIT_VALID: float = 0.1 + WAIT_FAIL: float = -0.1 + DEST_REACHED: float = 5.0 + + +class DestModeOptions(object): + DONE = 'DONE' + GROUPED = 'GROUPED' + PER_DEST = 'PER_DEST' + + +class DestProperties(NamedTuple): + n_dests: int = 1 # How many destinations are there + dwell_time: int = 0 # How long does the agent need to "wait" on a destination + spawn_frequency: int = 0 + spawn_in_other_zone: bool = True # + spawn_mode: str = DestModeOptions.DONE + + assert dwell_time >= 0, 'dwell_time cannot be < 0!' + assert spawn_frequency >= 0, 'spawn_frequency cannot be < 0!' + assert n_dests >= 0, 'n_destinations cannot be < 0!' + assert (spawn_mode == DestModeOptions.DONE) != bool(spawn_frequency) diff --git a/environments/factory/factory_dest.py b/environments/factory/additional/dest/factory_dest.py similarity index 63% rename from environments/factory/factory_dest.py rename to environments/factory/additional/dest/factory_dest.py index b5c1477..be85402 100644 --- a/environments/factory/factory_dest.py +++ b/environments/factory/additional/dest/factory_dest.py @@ -1,132 +1,19 @@ import time -from collections import defaultdict from enum import Enum -from typing import List, Union, NamedTuple, Dict +from typing import List, Union, Dict import numpy as np import random +from environments.factory.additional.dest.dest_collections import Destinations, ReachedDestinations +from environments.factory.additional.dest.dest_enitites import Destination +from environments.factory.additional.dest.dest_util import Constants, Actions, RewardsDest, DestModeOptions, \ + DestProperties from environments.factory.base.base_factory import BaseFactory -from environments.helpers import Constants as BaseConstants -from environments.helpers import EnvActions as BaseActions -from environments.factory.base.objects import Agent, Entity, Action -from environments.factory.base.registers import Entities, EntityCollection +from environments.factory.base.objects import Agent, Action +from environments.factory.base.registers import Entities from environments.factory.base.renderer import RenderEntity - -class Constants(BaseConstants): - # Destination Env - DEST = 'Destination' - DESTINATION = 1 - DESTINATION_DONE = 0.5 - DEST_REACHED = 'ReachedDestination' - - -class Actions(BaseActions): - WAIT_ON_DEST = 'WAIT' - - -class RewardsDest(NamedTuple): - - WAIT_VALID: float = 0.1 - WAIT_FAIL: float = -0.1 - DEST_REACHED: float = 5.0 - - -class Destination(Entity): - - @property - def any_agent_has_dwelled(self): - return bool(len(self._per_agent_times)) - - @property - def currently_dwelling_names(self): - return self._per_agent_times.keys() - - @property - def encoding(self): - return c.DESTINATION - - def __init__(self, *args, dwell_time: int = 0, **kwargs): - super(Destination, self).__init__(*args, **kwargs) - self.dwell_time = dwell_time - self._per_agent_times = defaultdict(lambda: dwell_time) - - def do_wait_action(self, agent: Agent): - self._per_agent_times[agent.name] -= 1 - return c.VALID - - def leave(self, agent: Agent): - del self._per_agent_times[agent.name] - - @property - def is_considered_reached(self): - agent_at_position = any(c.AGENT.lower() in x.name.lower() for x in self.tile.guests_that_can_collide) - return (agent_at_position and not self.dwell_time) or any(x == 0 for x in self._per_agent_times.values()) - - def agent_is_dwelling(self, agent: Agent): - return self._per_agent_times[agent.name] < self.dwell_time - - def summarize_state(self, n_steps=None) -> dict: - state_summary = super().summarize_state(n_steps=n_steps) - state_summary.update(per_agent_times=self._per_agent_times) - return state_summary - - -class Destinations(EntityCollection): - - _accepted_objects = Destination - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.is_blocking_light = False - self.can_be_shadowed = False - - def as_array(self): - self._array[:] = c.FREE_CELL - # ToDo: Switch to new Style Array Put - # indices = list(zip(range(len(cls)), *zip(*[x.pos for x in cls]))) - # np.put(cls._array, [np.ravel_multi_index(x, cls._array.shape) for x in indices], cls.encodings) - for item in self: - if item.pos != c.NO_POS: - self._array[0, item.x, item.y] = item.encoding - return self._array - - def __repr__(self): - super(Destinations, self).__repr__() - - -class ReachedDestinations(Destinations): - _accepted_objects = Destination - - def __init__(self, *args, **kwargs): - super(ReachedDestinations, self).__init__(*args, **kwargs) - self.can_be_shadowed = False - self.is_blocking_light = False - - def summarize_states(self, n_steps=None): - return {} - - -class DestModeOptions(object): - DONE = 'DONE' - GROUPED = 'GROUPED' - PER_DEST = 'PER_DEST' - - -class DestProperties(NamedTuple): - n_dests: int = 1 # How many destinations are there - dwell_time: int = 0 # How long does the agent need to "wait" on a destination - spawn_frequency: int = 0 - spawn_in_other_zone: bool = True # - spawn_mode: str = DestModeOptions.DONE - - assert dwell_time >= 0, 'dwell_time cannot be < 0!' - assert spawn_frequency >= 0, 'spawn_frequency cannot be < 0!' - assert n_dests >= 0, 'n_destinations cannot be < 0!' - assert (spawn_mode == DestModeOptions.DONE) != bool(spawn_frequency) - - c = Constants a = Actions @@ -135,7 +22,7 @@ a = Actions class DestFactory(BaseFactory): # noinspection PyMissingConstructor - def __init__(self, *args, dest_prop: DestProperties = DestProperties(), rewards_dest: RewardsDest = RewardsDest(), + def __init__(self, *args, dest_prop: DestProperties = DestProperties(), rewards_dest: RewardsDest = RewardsDest(), env_seed=time.time_ns(), **kwargs): if isinstance(dest_prop, dict): dest_prop = DestProperties(**dest_prop) @@ -151,6 +38,7 @@ class DestFactory(BaseFactory): def actions_hook(self) -> Union[Action, List[Action]]: # noinspection PyUnresolvedReferences super_actions = super().actions_hook + # If targets are considers reached after some time, agents need an action for that. if self.dest_prop.dwell_time: super_actions.append(Action(enum_ident=a.WAIT_ON_DEST)) return super_actions @@ -207,7 +95,7 @@ class DestFactory(BaseFactory): if destinations_to_spawn: n_dest_to_spawn = len(destinations_to_spawn) if self.dest_prop.spawn_mode != DestModeOptions.GROUPED: - destinations = [Destination(tile, c.DEST) for tile in self[c.FLOOR].empty_tiles[:n_dest_to_spawn]] + destinations = [Destination(tile, self[c.DEST]) for tile in self[c.FLOOR].empty_tiles[:n_dest_to_spawn]] self[c.DEST].add_additional_items(destinations) for dest in destinations_to_spawn: del self._dest_spawn_timer[dest] @@ -229,9 +117,10 @@ class DestFactory(BaseFactory): super_reward_info = super().step_hook() for key, val in self._dest_spawn_timer.items(): self._dest_spawn_timer[key] = min(self.dest_prop.spawn_frequency, self._dest_spawn_timer[key] + 1) + for dest in list(self[c.DEST].values()): if dest.is_considered_reached: - dest.change_parent_collection(self[c.DEST]) + dest.change_parent_collection(self[c.DEST_REACHED]) self._dest_spawn_timer[dest.name] = 0 self.print(f'{dest.name} is reached now, removing...') else: @@ -251,18 +140,19 @@ class DestFactory(BaseFactory): additional_observations.update({c.DEST: self[c.DEST].as_array()}) return additional_observations - def per_agent_reward_hook(self, agent: Agent) -> Dict[str, dict]: + def per_agent_reward_hook(self, agent: Agent) -> List[dict]: # noinspection PyUnresolvedReferences - reward_event_dict = super().per_agent_reward_hook(agent) + reward_event_list = super().per_agent_reward_hook(agent) if len(self[c.DEST_REACHED]): for reached_dest in list(self[c.DEST_REACHED]): if agent.pos == reached_dest.pos: self.print(f'{agent.name} just reached destination at {agent.pos}') self[c.DEST_REACHED].delete_env_object(reached_dest) info_dict = {f'{agent.name}_{c.DEST_REACHED}': 1} - reward_event_dict.update({c.DEST_REACHED: {'reward': self.rewards_dest.DEST_REACHED, - 'info': info_dict}}) - return reward_event_dict + reward_event_list.append({'value': self.rewards_dest.DEST_REACHED, + 'reason': c.DEST_REACHED, + 'info': info_dict}) + return reward_event_list def render_assets_hook(self, mode='human'): # noinspection PyUnresolvedReferences diff --git a/environments/factory/additional/dirt/__init__.py b/environments/factory/additional/dirt/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/environments/factory/additional/dirt/dirt_collections.py b/environments/factory/additional/dirt/dirt_collections.py new file mode 100644 index 0000000..79ae2d4 --- /dev/null +++ b/environments/factory/additional/dirt/dirt_collections.py @@ -0,0 +1,42 @@ +from environments.factory.additional.dirt.dirt_entity import Dirt +from environments.factory.additional.dirt.dirt_util import DirtProperties +from environments.factory.base.objects import Floor +from environments.factory.base.registers import EntityCollection +from environments.factory.additional.dirt.dirt_util import Constants as c + + +class DirtRegister(EntityCollection): + + _accepted_objects = Dirt + + @property + def amount(self): + return sum([dirt.amount for dirt in self]) + + @property + def dirt_properties(self): + return self._dirt_properties + + def __init__(self, dirt_properties, *args): + super(DirtRegister, self).__init__(*args) + self._dirt_properties: DirtProperties = dirt_properties + + def spawn_dirt(self, then_dirty_tiles) -> bool: + if isinstance(then_dirty_tiles, Floor): + then_dirty_tiles = [then_dirty_tiles] + for tile in then_dirty_tiles: + if not self.amount > self.dirt_properties.max_global_amount: + dirt = self.by_pos(tile.pos) + if dirt is None: + dirt = Dirt(tile, self, amount=self.dirt_properties.max_spawn_amount) + self.add_item(dirt) + else: + new_value = dirt.amount + self.dirt_properties.max_spawn_amount + dirt.set_new_amount(min(new_value, self.dirt_properties.max_local_amount)) + else: + return c.NOT_VALID + return c.VALID + + def __repr__(self): + s = super(DirtRegister, self).__repr__() + return f'{s[:-1]}, {self.amount})' diff --git a/environments/factory/additional/dirt/dirt_entity.py b/environments/factory/additional/dirt/dirt_entity.py new file mode 100644 index 0000000..69c94ca --- /dev/null +++ b/environments/factory/additional/dirt/dirt_entity.py @@ -0,0 +1,26 @@ +from environments.factory.base.objects import Entity + + +class Dirt(Entity): + + @property + def amount(self): + return self._amount + + @property + def encoding(self): + # Edit this if you want items to be drawn in the ops differntly + return self._amount + + def __init__(self, *args, amount=None, **kwargs): + super(Dirt, self).__init__(*args, **kwargs) + self._amount = amount + + def set_new_amount(self, amount): + self._amount = amount + self._collection.notify_change_to_value(self) + + def summarize_state(self, **kwargs): + state_dict = super().summarize_state(**kwargs) + state_dict.update(amount=float(self.amount)) + return state_dict diff --git a/environments/factory/additional/dirt/dirt_util.py b/environments/factory/additional/dirt/dirt_util.py new file mode 100644 index 0000000..b5a23bf --- /dev/null +++ b/environments/factory/additional/dirt/dirt_util.py @@ -0,0 +1,30 @@ +from typing import NamedTuple + +from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions + + +class Constants(BaseConstants): + DIRT = 'Dirt' + + +class Actions(BaseActions): + CLEAN_UP = 'do_cleanup_action' + + +class RewardsDirt(NamedTuple): + CLEAN_UP_VALID: float = 0.5 + CLEAN_UP_FAIL: float = -0.1 + CLEAN_UP_LAST_PIECE: float = 4.5 + + +class DirtProperties(NamedTuple): + initial_dirt_ratio: float = 0.3 # On INIT, on max how many tiles does the dirt spawn in percent. + initial_dirt_spawn_r_var: float = 0.05 # How much does the dirt spawn amount vary? + clean_amount: float = 1 # How much does the robot clean with one actions. + max_spawn_ratio: float = 0.20 # On max how many tiles does the dirt spawn in percent. + max_spawn_amount: float = 0.3 # How much dirt does spawn per tile at max. + spawn_frequency: int = 0 # Spawn Frequency in Steps. + max_local_amount: int = 2 # Max dirt amount per tile. + max_global_amount: int = 20 # Max dirt amount in the whole environment. + dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place. + done_when_clean: bool = True diff --git a/environments/factory/factory_dirt.py b/environments/factory/additional/dirt/factory_dirt.py similarity index 71% rename from environments/factory/factory_dirt.py rename to environments/factory/additional/dirt/factory_dirt.py index ecd4dc9..dcdcd9d 100644 --- a/environments/factory/factory_dirt.py +++ b/environments/factory/additional/dirt/factory_dirt.py @@ -1,111 +1,22 @@ import time from pathlib import Path -from typing import List, Union, NamedTuple, Dict +from typing import List, Union, Dict import random import numpy as np -from algorithms.TSP_dirt_agent import TSPDirtAgent -from environments.helpers import Constants as BaseConstants -from environments.helpers import EnvActions as BaseActions +from environments.factory.additional.dirt.dirt_collections import DirtRegister +from environments.factory.additional.dirt.dirt_entity import Dirt +from environments.factory.additional.dirt.dirt_util import Constants, Actions, RewardsDirt, DirtProperties from environments.factory.base.base_factory import BaseFactory -from environments.factory.base.objects import Agent, Action, Entity, Floor -from environments.factory.base.registers import Entities, EntityCollection +from environments.factory.base.objects import Agent, Action +from environments.factory.base.registers import Entities from environments.factory.base.renderer import RenderEntity from environments.utility_classes import ObservationProperties -class Constants(BaseConstants): - DIRT = 'Dirt' - - -class Actions(BaseActions): - CLEAN_UP = 'do_cleanup_action' - - -class RewardsDirt(NamedTuple): - CLEAN_UP_VALID: float = 0.5 - CLEAN_UP_FAIL: float = -0.1 - CLEAN_UP_LAST_PIECE: float = 4.5 - - -class DirtProperties(NamedTuple): - initial_dirt_ratio: float = 0.3 # On INIT, on max how many tiles does the dirt spawn in percent. - initial_dirt_spawn_r_var: float = 0.05 # How much does the dirt spawn amount vary? - clean_amount: float = 1 # How much does the robot clean with one actions. - max_spawn_ratio: float = 0.20 # On max how many tiles does the dirt spawn in percent. - max_spawn_amount: float = 0.3 # How much dirt does spawn per tile at max. - spawn_frequency: int = 0 # Spawn Frequency in Steps. - max_local_amount: int = 2 # Max dirt amount per tile. - max_global_amount: int = 20 # Max dirt amount in the whole environment. - dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place. - done_when_clean: bool = True - - -class Dirt(Entity): - - @property - def amount(self): - return self._amount - - @property - def encoding(self): - # Edit this if you want items to be drawn in the ops differntly - return self._amount - - def __init__(self, *args, amount=None, **kwargs): - super(Dirt, self).__init__(*args, **kwargs) - self._amount = amount - - def set_new_amount(self, amount): - self._amount = amount - self._collection.notify_change_to_value(self) - - def summarize_state(self, **kwargs): - state_dict = super().summarize_state(**kwargs) - state_dict.update(amount=float(self.amount)) - return state_dict - - -class DirtRegister(EntityCollection): - - _accepted_objects = Dirt - - @property - def amount(self): - return sum([dirt.amount for dirt in self]) - - @property - def dirt_properties(self): - return self._dirt_properties - - def __init__(self, dirt_properties, *args): - super(DirtRegister, self).__init__(*args) - self._dirt_properties: DirtProperties = dirt_properties - - def spawn_dirt(self, then_dirty_tiles) -> bool: - if isinstance(then_dirty_tiles, Floor): - then_dirty_tiles = [then_dirty_tiles] - for tile in then_dirty_tiles: - if not self.amount > self.dirt_properties.max_global_amount: - dirt = self.by_pos(tile.pos) - if dirt is None: - dirt = Dirt(tile, self, amount=self.dirt_properties.max_spawn_amount) - self.add_item(dirt) - else: - new_value = dirt.amount + self.dirt_properties.max_spawn_amount - dirt.set_new_amount(min(new_value, self.dirt_properties.max_local_amount)) - else: - return c.NOT_VALID - return c.VALID - - def __repr__(self): - s = super(DirtRegister, self).__repr__() - return f'{s[:-1]}, {self.amount})' - - def softmax(x): """Compute softmax values for each sets of scores in x.""" e_x = np.exp(x - np.max(x)) @@ -200,8 +111,8 @@ class DirtFactory(BaseFactory): super_reward_info = super().step_hook() if smear_amount := self.dirt_prop.dirt_smear_amount: for agent in self[c.AGENT]: - if agent.temp_valid and agent.last_pos != c.NO_POS: - if self._actions.is_moving_action(agent.temp_action): + if agent.step_result['action_valid'] and agent.last_pos != c.NO_POS: + if self._actions.is_moving_action(agent.step_result['action_name']): if old_pos_dirt := self[c.DIRT].by_pos(agent.last_pos): if smeared_dirt := round(old_pos_dirt.amount * smear_amount, 2): old_pos_dirt.set_new_amount(max(0, old_pos_dirt.amount-smeared_dirt)) @@ -248,8 +159,8 @@ class DirtFactory(BaseFactory): additional_observations.update({c.DIRT: self[c.DIRT].as_array()}) return additional_observations - def gather_additional_info(self, agent: Agent) -> dict: - event_reward_dict = super().per_agent_reward_hook(agent) + def post_step_hook(self) -> List[Dict[str, int]]: + super_post_step = super(DirtFactory, self).post_step_hook() info_dict = dict() dirt = [dirt.amount for dirt in self[c.DIRT]] @@ -264,8 +175,8 @@ class DirtFactory(BaseFactory): info_dict.update(dirt_amount=current_dirt_amount) info_dict.update(dirty_tile_count=dirty_tile_count) - event_reward_dict.update({'info': info_dict}) - return event_reward_dict + super_post_step.append(info_dict) + return super_post_step if __name__ == '__main__': @@ -304,7 +215,6 @@ if __name__ == '__main__': # inject_agents=[TSPDirtAgent], ) - factory.save_params(Path('rewards_param')) # noinspection DuplicatedCode n_actions = factory.action_space.n - 1 diff --git a/environments/factory/additional/item/__init__.py b/environments/factory/additional/item/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/environments/factory/factory_item.py b/environments/factory/additional/item/factory_item.py similarity index 59% rename from environments/factory/factory_item.py rename to environments/factory/additional/item/factory_item.py index 8d91e7f..dc6b3e8 100644 --- a/environments/factory/factory_item.py +++ b/environments/factory/additional/item/factory_item.py @@ -1,179 +1,16 @@ import time -from collections import deque -from typing import List, Union, NamedTuple, Dict +from typing import List, Union, Dict import numpy as np import random +from environments.factory.additional.item.item_collections import ItemRegister, Inventories, DropOffLocations +from environments.factory.additional.item.item_util import Constants, Actions, RewardsItem, ItemProperties from environments.factory.base.base_factory import BaseFactory -from environments.helpers import Constants as BaseConstants -from environments.helpers import EnvActions as BaseActions -from environments import helpers as h -from environments.factory.base.objects import Agent, Entity, Action, Floor -from environments.factory.base.registers import Entities, EntityCollection, BoundEnvObjCollection, ObjectCollection +from environments.factory.base.objects import Agent, Action +from environments.factory.base.registers import Entities from environments.factory.base.renderer import RenderEntity - -class Constants(BaseConstants): - NO_ITEM = 0 - ITEM_DROP_OFF = 1 - # Item Env - ITEM = 'Item' - INVENTORY = 'Inventory' - DROP_OFF = 'Drop_Off' - - -class Actions(BaseActions): - ITEM_ACTION = 'ITEMACTION' - - -class RewardsItem(NamedTuple): - DROP_OFF_VALID: float = 0.1 - DROP_OFF_FAIL: float = -0.1 - PICK_UP_FAIL: float = -0.1 - PICK_UP_VALID: float = 0.1 - - -class Item(Entity): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._auto_despawn = -1 - - @property - def auto_despawn(self): - return self._auto_despawn - - @property - def encoding(self): - # Edit this if you want items to be drawn in the ops differently - return 1 - - def set_auto_despawn(self, auto_despawn): - self._auto_despawn = auto_despawn - - def set_tile_to(self, no_pos_tile): - assert self._collection.__class__.__name__ != ItemRegister.__class__ - self._tile = no_pos_tile - - -class ItemRegister(EntityCollection): - - _accepted_objects = Item - - def spawn_items(self, tiles: List[Floor]): - items = [Item(tile, self) for tile in tiles] - self.add_additional_items(items) - - def despawn_items(self, items: List[Item]): - items = [items] if isinstance(items, Item) else items - for item in items: - del self[item] - - -class Inventory(BoundEnvObjCollection): - - @property - def name(self): - return f'{self.__class__.__name__}({self._bound_entity.name})' - - def __init__(self, agent: Agent, capacity: int, *args, **kwargs): - super(Inventory, self).__init__(agent, *args, is_blocking_light=False, can_be_shadowed=False, **kwargs) - self.capacity = capacity - - def as_array(self): - if self._array is None: - self._array = np.zeros((1, *self._shape)) - return super(Inventory, self).as_array() - - def summarize_states(self, **kwargs): - attr_dict = {key: str(val) for key, val in self.__dict__.items() if not key.startswith('_') and key != 'data'} - attr_dict.update(dict(items={key: val.summarize_state(**kwargs) for key, val in self.items()})) - attr_dict.update(dict(name=self.name)) - return attr_dict - - def pop(self): - item_to_pop = self[0] - self.delete_env_object(item_to_pop) - return item_to_pop - - -class Inventories(ObjectCollection): - - _accepted_objects = Inventory - is_blocking_light = False - can_be_shadowed = False - - def __init__(self, obs_shape, *args, **kwargs): - super(Inventories, self).__init__(*args, is_per_agent=True, individual_slices=True, **kwargs) - self._obs_shape = obs_shape - - def as_array(self): - return np.stack([inventory.as_array() for inv_idx, inventory in enumerate(self)]) - - def spawn_inventories(self, agents, capacity): - inventories = [self._accepted_objects(agent, capacity, self._obs_shape) - for _, agent in enumerate(agents)] - self.add_additional_items(inventories) - - def idx_by_entity(self, entity): - try: - return next((idx for idx, inv in enumerate(self) if inv.belongs_to_entity(entity))) - except StopIteration: - return None - - def by_entity(self, entity): - try: - return next((inv for inv in self if inv.belongs_to_entity(entity))) - except StopIteration: - return None - - def summarize_states(self, **kwargs): - return {key: val.summarize_states(**kwargs) for key, val in self.items()} - - -class DropOffLocation(Entity): - - @property - def encoding(self): - return Constants.ITEM_DROP_OFF - - def __init__(self, *args, storage_size_until_full: int = 5, auto_item_despawn_interval: int = 5, **kwargs): - super(DropOffLocation, self).__init__(*args, **kwargs) - self.auto_item_despawn_interval = auto_item_despawn_interval - self.storage = deque(maxlen=storage_size_until_full or None) - - def place_item(self, item: Item): - if self.is_full: - raise RuntimeWarning("There is currently no way to clear the storage or make it unfull.") - return c.NOT_VALID - else: - self.storage.append(item) - item.set_auto_despawn(self.auto_item_despawn_interval) - return c.VALID - - @property - def is_full(self): - return False if not self.storage.maxlen else self.storage.maxlen == len(self.storage) - - def summarize_state(self, n_steps=None) -> dict: - if n_steps == h.STEPS_START: - return super().summarize_state(n_steps=n_steps) - - -class DropOffLocations(EntityCollection): - - _accepted_objects = DropOffLocation - - -class ItemProperties(NamedTuple): - n_items: int = 5 # How many items are there at the same time - spawn_frequency: int = 10 # Spawn Frequency in Steps - n_drop_off_locations: int = 5 # How many DropOff locations are there at the same time - max_dropoff_storage_size: int = 0 # How many items are needed until the dropoff is full - max_agent_inventory_capacity: int = 5 # How many items are needed until the agent inventory is full - - c = Constants a = Actions diff --git a/environments/factory/additional/item/item_collections.py b/environments/factory/additional/item/item_collections.py new file mode 100644 index 0000000..7a12b88 --- /dev/null +++ b/environments/factory/additional/item/item_collections.py @@ -0,0 +1,87 @@ +from typing import List + +import numpy as np + +from environments.factory.base.objects import Floor, Agent +from environments.factory.base.registers import EntityCollection, BoundEnvObjCollection, ObjectCollection +from environments.factory.additional.item.item_entities import Item, DropOffLocation + + +class ItemRegister(EntityCollection): + + _accepted_objects = Item + + def spawn_items(self, tiles: List[Floor]): + items = [Item(tile, self) for tile in tiles] + self.add_additional_items(items) + + def despawn_items(self, items: List[Item]): + items = [items] if isinstance(items, Item) else items + for item in items: + del self[item] + + +class Inventory(BoundEnvObjCollection): + + @property + def name(self): + return f'{self.__class__.__name__}({self._bound_entity.name})' + + def __init__(self, agent: Agent, capacity: int, *args, **kwargs): + super(Inventory, self).__init__(agent, *args, is_blocking_light=False, can_be_shadowed=False, **kwargs) + self.capacity = capacity + + def as_array(self): + if self._array is None: + self._array = np.zeros((1, *self._shape)) + return super(Inventory, self).as_array() + + def summarize_states(self, **kwargs): + attr_dict = {key: str(val) for key, val in self.__dict__.items() if not key.startswith('_') and key != 'data'} + attr_dict.update(dict(items={key: val.summarize_state(**kwargs) for key, val in self.items()})) + attr_dict.update(dict(name=self.name)) + return attr_dict + + def pop(self): + item_to_pop = self[0] + self.delete_env_object(item_to_pop) + return item_to_pop + + +class Inventories(ObjectCollection): + + _accepted_objects = Inventory + is_blocking_light = False + can_be_shadowed = False + + def __init__(self, obs_shape, *args, **kwargs): + super(Inventories, self).__init__(*args, is_per_agent=True, individual_slices=True, **kwargs) + self._obs_shape = obs_shape + + def as_array(self): + return np.stack([inventory.as_array() for inv_idx, inventory in enumerate(self)]) + + def spawn_inventories(self, agents, capacity): + inventories = [self._accepted_objects(agent, capacity, self._obs_shape) + for _, agent in enumerate(agents)] + self.add_additional_items(inventories) + + def idx_by_entity(self, entity): + try: + return next((idx for idx, inv in enumerate(self) if inv.belongs_to_entity(entity))) + except StopIteration: + return None + + def by_entity(self, entity): + try: + return next((inv for inv in self if inv.belongs_to_entity(entity))) + except StopIteration: + return None + + def summarize_states(self, **kwargs): + return {key: val.summarize_states(**kwargs) for key, val in self.items()} + + +class DropOffLocations(EntityCollection): + + _accepted_objects = DropOffLocation diff --git a/environments/factory/additional/item/item_entities.py b/environments/factory/additional/item/item_entities.py new file mode 100644 index 0000000..c8566c8 --- /dev/null +++ b/environments/factory/additional/item/item_entities.py @@ -0,0 +1,61 @@ +from collections import deque + +from environments import helpers as h +from environments.factory.additional.item.item_util import Constants +from environments.factory.base.objects import Entity + + +class Item(Entity): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._auto_despawn = -1 + + @property + def auto_despawn(self): + return self._auto_despawn + + @property + def encoding(self): + # Edit this if you want items to be drawn in the ops differently + return 1 + + def set_auto_despawn(self, auto_despawn): + self._auto_despawn = auto_despawn + + def set_tile_to(self, no_pos_tile): + self._tile = no_pos_tile + + def summarize_state(self, **_) -> dict: + super_summarization = super(Item, self).summarize_state() + super_summarization.update(dict(auto_despawn=self.auto_despawn)) + return super_summarization + + +class DropOffLocation(Entity): + + @property + def encoding(self): + return Constants.ITEM_DROP_OFF + + def __init__(self, *args, storage_size_until_full: int = 5, auto_item_despawn_interval: int = 5, **kwargs): + super(DropOffLocation, self).__init__(*args, **kwargs) + self.auto_item_despawn_interval = auto_item_despawn_interval + self.storage = deque(maxlen=storage_size_until_full or None) + + def place_item(self, item: Item): + if self.is_full: + raise RuntimeWarning("There is currently no way to clear the storage or make it unfull.") + return c.NOT_VALID + else: + self.storage.append(item) + item.set_auto_despawn(self.auto_item_despawn_interval) + return Constants.VALID + + @property + def is_full(self): + return False if not self.storage.maxlen else self.storage.maxlen == len(self.storage) + + def summarize_state(self, n_steps=None) -> dict: + if n_steps == h.STEPS_START: + return super().summarize_state(n_steps=n_steps) diff --git a/environments/factory/additional/item/item_util.py b/environments/factory/additional/item/item_util.py new file mode 100644 index 0000000..f9f4eb8 --- /dev/null +++ b/environments/factory/additional/item/item_util.py @@ -0,0 +1,31 @@ +from typing import NamedTuple + +from environments.helpers import Constants as BaseConstants, EnvActions as BaseActions + + +class Constants(BaseConstants): + NO_ITEM = 0 + ITEM_DROP_OFF = 1 + # Item Env + ITEM = 'Item' + INVENTORY = 'Inventory' + DROP_OFF = 'Drop_Off' + + +class Actions(BaseActions): + ITEM_ACTION = 'ITEMACTION' + + +class RewardsItem(NamedTuple): + DROP_OFF_VALID: float = 0.1 + DROP_OFF_FAIL: float = -0.1 + PICK_UP_FAIL: float = -0.1 + PICK_UP_VALID: float = 0.1 + + +class ItemProperties(NamedTuple): + n_items: int = 5 # How many items are there at the same time + spawn_frequency: int = 10 # Spawn Frequency in Steps + n_drop_off_locations: int = 5 # How many DropOff locations are there at the same time + max_dropoff_storage_size: int = 0 # How many items are needed until the dropoff is full + max_agent_inventory_capacity: int = 5 # How many items are needed until the agent inventory is full diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py index cd397e4..7233270 100644 --- a/environments/factory/base/base_factory.py +++ b/environments/factory/base/base_factory.py @@ -180,6 +180,7 @@ class BaseFactory(gym.Env): self._entities.add_additional_items({c.DOORS: doors}) # Actions + # TODO: Move this to Agent init, so that agents can have individual action sets. self._actions = Actions(self.mv_prop, can_use_doors=self.parse_doors) if additional_actions := self.actions_hook: self._actions.add_additional_items(additional_actions) @@ -308,7 +309,8 @@ class BaseFactory(gym.Env): info.update(self._summarize_state()) # Post step Hook for later use - info.update(self.post_step_hook()) + for post_step_info in self.post_step_hook(): + info.update(post_step_info) obs, _ = self._build_observations() @@ -367,14 +369,16 @@ class BaseFactory(gym.Env): agent_obs = global_agent_obs.copy() agent_obs[(0, *agent.pos)] -= agent.encoding else: - agent_obs = global_agent_obs + agent_obs = global_agent_obs.copy() else: + # agent_obs == None!!!!! agent_obs = global_agent_obs # Build Level Observations if self.obs_prop.render_agents == a_obs.LEVEL: + assert agent_obs is not None lvl_obs = lvl_obs.copy() - lvl_obs += global_agent_obs + lvl_obs += agent_obs obs_dict[c.WALLS] = lvl_obs if self.obs_prop.render_agents in [a_obs.SEPERATE, a_obs.COMBINED] and agent_obs is not None: @@ -600,7 +604,9 @@ class BaseFactory(gym.Env): for reward in agent.step_result['rewards']: combined_info_dict.update(reward['info']) + # Combine Info dicts into a global one combined_info_dict = dict(combined_info_dict) + combined_info_dict.update(info) global_reward_sum = sum(global_env_rewards) @@ -616,9 +622,11 @@ class BaseFactory(gym.Env): def start_recording(self): self._record_episodes = True + return self._record_episodes def stop_recording(self): self._record_episodes = False + return not self._record_episodes # noinspection PyGlobalUndefined def render(self, mode='human'): @@ -719,12 +727,12 @@ class BaseFactory(gym.Env): return {} @abc.abstractmethod - def per_agent_reward_hook(self, agent: Agent) -> Dict[str, dict]: - return {} + def per_agent_reward_hook(self, agent: Agent) -> List[dict]: + return [] @abc.abstractmethod - def post_step_hook(self) -> dict: - return {} + def post_step_hook(self) -> List[dict]: + return [] @abc.abstractmethod def per_agent_raw_observations_hook(self, agent) -> Dict[str, np.typing.ArrayLike]: diff --git a/environments/factory/base/objects.py b/environments/factory/base/objects.py index 8980b39..0e6fbab 100644 --- a/environments/factory/base/objects.py +++ b/environments/factory/base/objects.py @@ -119,7 +119,6 @@ class Entity(EnvObject): def __repr__(self): return super(Entity, self).__repr__() + f'(@{self.pos})' -# With Position in Env # TODO: Missing Documentation diff --git a/environments/factory/base/registers.py b/environments/factory/base/registers.py index 66ca818..5bc800a 100644 --- a/environments/factory/base/registers.py +++ b/environments/factory/base/registers.py @@ -117,7 +117,7 @@ class EnvObjectCollection(ObjectCollection): return self._array def summarize_states(self, n_steps=None): - return [val.summarize_state(n_steps=n_steps) for val in self.values()] + return [entity.summarize_state(n_steps=n_steps) for entity in self.values()] def notify_change_to_free(self, env_object: EnvObject): self._array_change_notifyer(env_object, value=c.FREE_CELL) diff --git a/environments/factory/factory_battery.py b/environments/factory/factory_battery.py deleted file mode 100644 index 0ab132c..0000000 --- a/environments/factory/factory_battery.py +++ /dev/null @@ -1,273 +0,0 @@ -from typing import Union, NamedTuple, Dict, List - -import numpy as np - -from environments.factory.base.base_factory import BaseFactory -from environments.factory.base.objects import Agent, Action, Entity, EnvObject, BoundingMixin -from environments.factory.base.registers import EntityCollection, EnvObjectCollection -from environments.factory.base.renderer import RenderEntity -from environments.helpers import Constants as BaseConstants -from environments.helpers import EnvActions as BaseActions - -from environments import helpers as h - - -class Constants(BaseConstants): - # Battery Env - CHARGE_PODS = 'Charge_Pod' - BATTERIES = 'BATTERIES' - BATTERY_DISCHARGED = 'DISCHARGED' - CHARGE_POD = 1 - - -class Actions(BaseActions): - CHARGE = 'do_charge_action' - - -class RewardsBtry(NamedTuple): - CHARGE_VALID: float = 0.1 - CHARGE_FAIL: float = -0.1 - BATTERY_DISCHARGED: float = -1.0 - - -class BatteryProperties(NamedTuple): - initial_charge: float = 0.8 # - charge_rate: float = 0.4 # - charge_locations: int = 20 # - per_action_costs: Union[dict, float] = 0.02 - done_when_discharged = False - multi_charge: bool = False - - -c = Constants -a = Actions - - -class Battery(BoundingMixin, EnvObject): - - @property - def is_discharged(self): - return self.charge_level == 0 - - def __init__(self, initial_charge_level: float, *args, **kwargs): - super(Battery, self).__init__(*args, **kwargs) - self.charge_level = initial_charge_level - - def encoding(self): - return self.charge_level - - def do_charge_action(self, amount): - if self.charge_level < 1: - # noinspection PyTypeChecker - self.charge_level = min(1, amount + self.charge_level) - return c.VALID - else: - return c.NOT_VALID - - def decharge(self, amount) -> c: - if self.charge_level != 0: - # noinspection PyTypeChecker - self.charge_level = max(0, amount + self.charge_level) - self._collection.notify_change_to_value(self) - return c.VALID - else: - return c.NOT_VALID - - def summarize_state(self, **_): - attr_dict = {key: str(val) for key, val in self.__dict__.items() if not key.startswith('_') and key != 'data'} - attr_dict.update(dict(name=self.name)) - return attr_dict - - -class BatteriesRegister(EnvObjectCollection): - - _accepted_objects = Battery - - def __init__(self, *args, **kwargs): - super(BatteriesRegister, self).__init__(*args, individual_slices=True, - is_blocking_light=False, can_be_shadowed=False, **kwargs) - self.is_observable = True - - def spawn_batteries(self, agents, initial_charge_level): - batteries = [self._accepted_objects(initial_charge_level, agent, self) for _, agent in enumerate(agents)] - self.add_additional_items(batteries) - - def summarize_states(self, n_steps=None): - # as dict with additional nesting - # return dict(items=super(Inventories, cls).summarize_states()) - return super(BatteriesRegister, self).summarize_states(n_steps=n_steps) - - # Todo Move this to Mixin! - def by_entity(self, entity): - try: - return next((x for x in self if x.belongs_to_entity(entity))) - except StopIteration: - return None - - def idx_by_entity(self, entity): - try: - return next((idx for idx, x in enumerate(self) if x.belongs_to_entity(entity))) - except StopIteration: - return None - - def as_array_by_entity(self, entity): - return self._array[self.idx_by_entity(entity)] - - -class ChargePod(Entity): - - @property - def encoding(self): - return c.CHARGE_POD - - def __init__(self, *args, charge_rate: float = 0.4, - multi_charge: bool = False, **kwargs): - super(ChargePod, self).__init__(*args, **kwargs) - self.charge_rate = charge_rate - self.multi_charge = multi_charge - - def charge_battery(self, battery: Battery): - if battery.charge_level == 1.0: - return c.NOT_VALID - if sum(guest for guest in self.tile.guests if 'agent' in guest.name.lower()) > 1: - return c.NOT_VALID - valid = battery.do_charge_action(self.charge_rate) - return valid - - def summarize_state(self, n_steps=None) -> dict: - if n_steps == h.STEPS_START: - summary = super().summarize_state(n_steps=n_steps) - return summary - - -class ChargePods(EntityCollection): - - _accepted_objects = ChargePod - - def __repr__(self): - super(ChargePods, self).__repr__() - - -class BatteryFactory(BaseFactory): - - def __init__(self, *args, btry_prop=BatteryProperties(), rewards_dest: RewardsBtry = RewardsBtry(), - **kwargs): - if isinstance(btry_prop, dict): - btry_prop = BatteryProperties(**btry_prop) - if isinstance(rewards_dest, dict): - rewards_dest = BatteryProperties(**rewards_dest) - self.btry_prop = btry_prop - self.rewards_dest = rewards_dest - super().__init__(*args, **kwargs) - - def per_agent_raw_observations_hook(self, agent) -> Dict[str, np.typing.ArrayLike]: - additional_raw_observations = super().per_agent_raw_observations_hook(agent) - additional_raw_observations.update({c.BATTERIES: self[c.BATTERIES].as_array_by_entity(agent)}) - return additional_raw_observations - - def observations_hook(self) -> Dict[str, np.typing.ArrayLike]: - additional_observations = super().observations_hook() - additional_observations.update({c.CHARGE_PODS: self[c.CHARGE_PODS].as_array()}) - return additional_observations - - @property - def entities_hook(self): - super_entities = super().entities_hook - - empty_tiles = self[c.FLOOR].empty_tiles[:self.btry_prop.charge_locations] - charge_pods = ChargePods.from_tiles( - empty_tiles, self._level_shape, - entity_kwargs=dict(charge_rate=self.btry_prop.charge_rate, - multi_charge=self.btry_prop.multi_charge) - ) - - batteries = BatteriesRegister(self._level_shape if not self._pomdp_r else ((self.pomdp_diameter,) * 2), - ) - batteries.spawn_batteries(self[c.AGENT], self.btry_prop.initial_charge) - super_entities.update({c.BATTERIES: batteries, c.CHARGE_PODS: charge_pods}) - return super_entities - - def step_hook(self) -> (List[dict], dict): - super_reward_info = super(BatteryFactory, self).step_hook() - - # Decharge - batteries = self[c.BATTERIES] - - for agent in self[c.AGENT]: - if isinstance(self.btry_prop.per_action_costs, dict): - energy_consumption = self.btry_prop.per_action_costs[agent.temp_action] - else: - energy_consumption = self.btry_prop.per_action_costs - - batteries.by_entity(agent).decharge(energy_consumption) - - return super_reward_info - - def do_charge_action(self, agent) -> (dict, dict): - if charge_pod := self[c.CHARGE_PODS].by_pos(agent.pos): - valid = charge_pod.charge_battery(self[c.BATTERIES].by_entity(agent)) - if valid: - info_dict = {f'{agent.name}_{a.CHARGE}_VALID': 1} - self.print(f'{agent.name} just charged batteries at {charge_pod.name}.') - else: - info_dict = {f'{agent.name}_{a.CHARGE}_FAIL': 1} - self.print(f'{agent.name} failed to charged batteries at {charge_pod.name}.') - else: - valid = c.NOT_VALID - info_dict = {f'{agent.name}_{a.CHARGE}_FAIL': 1} - # info_dict = {f'{agent.name}_no_charger': 1} - self.print(f'{agent.name} failed to charged batteries at {agent.pos}.') - reward = dict(value=self.rewards_dest.CHARGE_VALID if valid else self.rewards_dest.CHARGE_FAIL, - reason=a.CHARGE, info=info_dict) - return valid, reward - - def do_additional_actions(self, agent: Agent, action: Action) -> (bool, dict): - action_result = super().do_additional_actions(agent, action) - if action_result is None: - if action == a.CHARGE: - action_result = self.do_charge_action(agent) - return action_result - else: - return None - else: - return action_result - pass - - def reset_hook(self) -> None: - # There is Nothing to reset. - pass - - def check_additional_done(self) -> (bool, dict): - super_done, super_dict = super(BatteryFactory, self).check_additional_done() - if super_done: - return super_done, super_dict - else: - if self.btry_prop.done_when_discharged: - if btry_done := any(battery.is_discharged for battery in self[c.BATTERIES]): - super_dict.update(DISCHARGE_DONE=1) - return btry_done, super_dict - else: - pass - else: - pass - pass - - def per_agent_reward_hook(self, agent: Agent) -> Dict[str, dict]: - reward_event_dict = super(BatteryFactory, self).per_agent_reward_hook(agent) - if self[c.BATTERIES].by_entity(agent).is_discharged: - self.print(f'{agent.name} Battery is discharged!') - info_dict = {f'{agent.name}_{c.BATTERY_DISCHARGED}': 1} - reward_event_dict.update({c.BATTERY_DISCHARGED: {'reward': self.rewards_dest.BATTERY_DISCHARGED, - 'info': info_dict}} - ) - else: - # All Fine - pass - return reward_event_dict - - def render_assets_hook(self): - # noinspection PyUnresolvedReferences - additional_assets = super().render_assets_hook() - charge_pods = [RenderEntity(c.CHARGE_PODS, charge_pod.tile.pos) for charge_pod in self[c.CHARGE_PODS]] - additional_assets.extend(charge_pods) - return additional_assets diff --git a/environments/factory/factory_dirt_stationary_machines.py b/environments/factory/factory_dirt_stationary_machines.py index 377666e..7c79444 100644 --- a/environments/factory/factory_dirt_stationary_machines.py +++ b/environments/factory/factory_dirt_stationary_machines.py @@ -3,7 +3,9 @@ from typing import Dict, List, Union import numpy as np from environments.factory.base.objects import Agent, Entity, Action -from environments.factory.factory_dirt import Dirt, DirtRegister, DirtFactory +from environments.factory.factory_dirt import DirtFactory +from environments.factory.additional.dirt.dirt_collections import DirtRegister +from environments.factory.additional.dirt.dirt_entity import Dirt from environments.factory.base.objects import Floor from environments.factory.base.registers import Floors, Entities, EntityCollection @@ -28,7 +30,6 @@ class StationaryMachinesDirtFactory(DirtFactory): def entities_hook(self) -> Dict[(str, Entities)]: super_entities = super().entities_hook() - return super_entities def reset_hook(self) -> None: @@ -48,8 +49,8 @@ class StationaryMachinesDirtFactory(DirtFactory): super_per_agent_raw_observations = super().per_agent_raw_observations_hook(agent) return super_per_agent_raw_observations - def per_agent_reward_hook(self, agent: Agent) -> Dict[str, dict]: - pass + def per_agent_reward_hook(self, agent: Agent) -> List[dict]: + return super(StationaryMachinesDirtFactory, self).per_agent_reward_hook(agent) def pre_step_hook(self) -> None: pass diff --git a/environments/helpers.py b/environments/helpers.py index 526b612..5298092 100644 --- a/environments/helpers.py +++ b/environments/helpers.py @@ -173,9 +173,9 @@ class RewardsBase(NamedTuple): class ObservationTranslator: - def __init__(self, obs_shape_2d: (int, int), this_named_observation_space: Dict[str, dict], + def __init__(self, this_named_observation_space: Dict[str, dict], *per_agent_named_obs_spaces: Dict[str, dict], - placeholder_fill_value: Union[int, str] = 'N'): + placeholder_fill_value: Union[int, str, None] = None): """ This is a helper class, which converts agents observations from joined environments. For example, agents trained in different environments may expect different observations. @@ -183,8 +183,6 @@ class ObservationTranslator: A string identifier based approach is used. Currently, it is not possible to mix different obs shapes. - :param obs_shape_2d: The shape of the observation the agents expect. - :type obs_shape_2d: tuple(int, int) :param this_named_observation_space: `Named observation space` of the joined environment. :type this_named_observation_space: Dict[str, dict] @@ -196,15 +194,15 @@ class ObservationTranslator: :type placeholder_fill_value: Union[int, str] = 'N') """ - assert len(obs_shape_2d) == 2 - self.obs_shape = obs_shape_2d if isinstance(placeholder_fill_value, str): if placeholder_fill_value.lower() in ['normal', 'n']: - self.random_fill = lambda: np.random.normal(size=self.obs_shape) + self.random_fill = np.random.normal elif placeholder_fill_value.lower() in ['uniform', 'u']: - self.random_fill = lambda: np.random.uniform(size=self.obs_shape) + self.random_fill = np.random.uniform else: - raise ValueError('Please chooe between "uniform" or "normal"') + raise ValueError('Please chooe between "uniform" or "normal" ("u", "n").') + elif isinstance(placeholder_fill_value, int): + raise NotImplementedError('"Future Work."') else: self.random_fill = None @@ -213,9 +211,21 @@ class ObservationTranslator: def translate_observation(self, agent_idx: int, obs: np.ndarray): target_obs_space = self._per_agent_named_obs_space[agent_idx] - translation = [idx_space_dict for name, idx_space_dict in target_obs_space.items()] - flat_translation = [x for y in translation for x in y] - return np.take(obs, flat_translation, axis=1 if obs.ndim == 4 else 0) + translation = dict() + for name, idxs in target_obs_space.items(): + if name in self._this_named_obs_space: + for target_idx, this_idx in zip(idxs, self._this_named_obs_space[name]): + taken_slice = np.take(obs, [this_idx], axis=1 if obs.ndim == 4 else 0) + translation[target_idx] = taken_slice + elif random_fill := self.random_fill: + for target_idx in idxs: + translation[target_idx] = random_fill(size=obs.shape[:-3] + (1,) + obs.shape[-2:]) + else: + for target_idx in idxs: + translation[target_idx] = np.zeros(shape=(obs.shape[:-3] + (1,) + obs.shape[-2:])) + + translation = dict(sorted(translation.items())) + return np.concatenate(list(translation.values()), axis=-3) def translate_observations(self, observations: List[ArrayLike]): return [self.translate_observation(idx, observation) for idx, observation in enumerate(observations)] @@ -241,7 +251,10 @@ class ActionTranslator: """ self._target_named_action_space = target_named_action_space - self._per_agent_named_action_space = list(per_agent_named_action_space) + if isinstance(per_agent_named_action_space, (list, tuple)): + self._per_agent_named_action_space = per_agent_named_action_space + else: + self._per_agent_named_action_space = list(per_agent_named_action_space) self._per_agent_idx_actions = [{idx: a for a, idx in x.items()} for x in self._per_agent_named_action_space] def translate_action(self, agent_idx: int, action: int): diff --git a/environments/logging/envmonitor.py b/environments/logging/envmonitor.py index 2e18922..ad3df0d 100644 --- a/environments/logging/envmonitor.py +++ b/environments/logging/envmonitor.py @@ -71,8 +71,8 @@ class EnvMonitor(BaseCallback): pass return - def save_run(self, filepath: Union[Path, str], auto_plotting_keys=None): - filepath = Path(filepath) + def save_run(self, filepath: Union[Path, str, None] = None, auto_plotting_keys=None): + filepath = Path(filepath or self._filepath) filepath.parent.mkdir(exist_ok=True, parents=True) with filepath.open('wb') as f: pickle.dump(self._monitor_df.reset_index(), f, protocol=pickle.HIGHEST_PROTOCOL) diff --git a/environments/logging/recorder.py b/environments/logging/recorder.py index 795d4b9..9abf683 100644 --- a/environments/logging/recorder.py +++ b/environments/logging/recorder.py @@ -1,4 +1,5 @@ from collections import defaultdict +from os import PathLike from pathlib import Path from typing import Union @@ -12,11 +13,14 @@ from environments.factory.base.base_factory import REC_TAC class EnvRecorder(BaseCallback): - def __init__(self, env, entities='all'): + def __init__(self, env, entities: str = 'all', filepath: Union[str, PathLike] = None, freq: int = 0): super(EnvRecorder, self).__init__() + self.filepath = filepath self.unwrapped = env + self.freq = freq self._recorder_dict = defaultdict(list) self._recorder_out_list = list() + self._episode_counter = 1 if isinstance(entities, str): if entities.lower() == 'all': self._entities = None @@ -29,46 +33,47 @@ class EnvRecorder(BaseCallback): return getattr(self.unwrapped, item) def reset(self): - self.unwrapped.start_recording() + self._on_training_start() return self.unwrapped.reset() def _on_training_start(self) -> None: - self.unwrapped._record_episodes = True - pass + assert self.start_recording() def _read_info(self, env_idx, info: dict): if info_dict := {key.replace(REC_TAC, ''): val for key, val in info.items() if key.startswith(f'{REC_TAC}')}: if self._entities: info_dict = {k: v for k, v in info_dict.items() if k in self._entities} - - info_dict.update(episode=(self.num_timesteps + env_idx)) self._recorder_dict[env_idx].append(info_dict) else: pass - return + return True def _read_done(self, env_idx, done): if done: self._recorder_out_list.append({'steps': self._recorder_dict[env_idx], - 'episode': len(self._recorder_out_list)}) + 'episode': self._episode_counter}) self._recorder_dict[env_idx] = list() else: pass def step(self, actions): step_result = self.unwrapped.step(actions) - # 0, 1, 2 , 3 = idx - # _, _, done_bool, info_obj = step_result - self._read_info(0, step_result[3]) - self._read_done(0, step_result[2]) + self._on_step() return step_result - def save_records(self, filepath: Union[Path, str], save_occupation_map=False, save_trajectory_map=False): - filepath = Path(filepath) + def finalize(self): + self._on_training_end() + return True + + def save_records(self, filepath: Union[Path, str, None] = None, save_occupation_map=False, save_trajectory_map=False): + filepath = Path(filepath or self.filepath) filepath.parent.mkdir(exist_ok=True, parents=True) # cls.out_file.unlink(missing_ok=True) with filepath.open('w') as f: - out_dict = {'episodes': self._recorder_out_list, 'header': self.unwrapped.params} + out_dict = {'n_episodes': self._episode_counter, + 'header': self.unwrapped.params, + 'episodes': self._recorder_out_list + } try: simplejson.dump(out_dict, f, indent=4) except TypeError: @@ -76,6 +81,7 @@ class EnvRecorder(BaseCallback): if save_occupation_map: a = np.zeros((15, 15)) + # noinspection PyTypeChecker for episode in out_dict['episodes']: df = pd.DataFrame([y for x in episode['steps'] for y in x['Agents']]) @@ -94,15 +100,22 @@ class EnvRecorder(BaseCallback): raise NotImplementedError('This has not yet been implemented.') def _on_step(self) -> bool: + do_record = self.freq == -1 or self._episode_counter % self.freq == 0 for env_idx, info in enumerate(self.locals.get('infos', [])): - self._read_info(env_idx, info) - + if do_record: + self._read_info(env_idx, info) dones = list(enumerate(self.locals.get('dones', []))) dones.extend(list(enumerate(self.locals.get('done', [])))) for env_idx, done in dones: - self._read_done(env_idx, done) - + if do_record: + self._read_done(env_idx, done) + if done: + self._episode_counter += 1 return True def _on_training_end(self) -> None: + for env_idx in range(len(self._recorder_dict)): + if self._recorder_dict[env_idx]: + self._recorder_out_list.append({'steps': self._recorder_dict[env_idx], + 'episode': self._episode_counter}) pass diff --git a/environments/utility_classes.py b/environments/utility_classes.py index c91d42b..6c8f8c1 100644 --- a/environments/utility_classes.py +++ b/environments/utility_classes.py @@ -3,6 +3,21 @@ import gym from gym.wrappers.frame_stack import FrameStack +class EnvCombiner(object): + + def __init__(self, *envs_cls): + self._env_dict = {env_cls.__name__: env_cls for env_cls in envs_cls} + + @staticmethod + def combine_cls(name, *envs_cls): + return type(name,envs_cls,{}) + + def build(self): + name = f'{"".join([x.lower().replace("factory").capitalize() for x in self._env_dict.keys()])}Factory' + + return self.combine_cls(name, tuple(self._env_dict.values())) + + class AgentRenderOptions(object): """ Class that specifies the available options for the way agents are represented in the env observation. @@ -46,7 +61,7 @@ class ObservationProperties(NamedTuple): Property holder; for setting multiple related parameters through a single parameter. Comes with default values. """ - """How to represent agents in the observation space. This may also alters the obs-shape.""" + """How to represent agents in the observation space. This may also alter the obs-shape.""" render_agents: AgentRenderOptions = AgentRenderOptions.SEPERATE """Obserations are build per agent; whether the current agent should be represented in its own observation.""" diff --git a/plotting/compare_runs.py b/plotting/compare_runs.py index 7496479..1963372 100644 --- a/plotting/compare_runs.py +++ b/plotting/compare_runs.py @@ -76,10 +76,11 @@ def compare_seed_runs(run_path: Union[str, PathLike], use_tex: bool = False): skip_n = round(df_melted['Episode'].max() * 0.02) df_melted = df_melted[df_melted['Episode'] % skip_n == 0] - if run_path.is_dir(): - prepare_plot(run_path / f'{run_path}_monitor_lineplot.png', df_melted, use_tex=use_tex) - elif run_path.exists() and run_path.is_file(): - prepare_plot(run_path.parent / f'{run_path.parent}_monitor_lineplot.png', df_melted, use_tex=use_tex) + run_path.mkdir(parents=True, exist_ok=True) + if run_path.exists() and run_path.is_file(): + prepare_plot(run_path.parent / f'{run_path.name}_monitor_lineplot.png', df_melted, use_tex=use_tex) + else: + prepare_plot(run_path / f'{run_path.name}_monitor_lineplot.png', df_melted, use_tex=use_tex) print('Plotting done.') diff --git a/quickstart/combine_and_monitor_rerun.py b/quickstart/combine_and_monitor_rerun.py new file mode 100644 index 0000000..6c6aea8 --- /dev/null +++ b/quickstart/combine_and_monitor_rerun.py @@ -0,0 +1,187 @@ +import sys +from pathlib import Path + +############################################## +# keep this for stand alone script execution # +############################################## +from environments.factory.base.base_factory import BaseFactory +from environments.logging.recorder import EnvRecorder + +try: + # noinspection PyUnboundLocalVariable + if __package__ is None: + DIR = Path(__file__).resolve().parent + sys.path.insert(0, str(DIR.parent)) + __package__ = DIR.name + else: + DIR = None +except NameError: + DIR = None + pass +############################################## +############################################## +############################################## + + +import simplejson + +from environments import helpers as h +from environments.factory.additional.combined_factories import DestBatteryFactory +from environments.factory.additional.dest.factory_dest import DestFactory +from environments.factory.additional.dirt.factory_dirt import DirtFactory +from environments.factory.additional.item.factory_item import ItemFactory +from environments.helpers import ObservationTranslator, ActionTranslator +from environments.logging.envmonitor import EnvMonitor +from environments.utility_classes import ObservationProperties, AgentRenderOptions, MovementProperties + + +def policy_model_kwargs(): + return dict(ent_coef=0.01) + + +def dqn_model_kwargs(): + return dict(buffer_size=50000, + learning_starts=64, + batch_size=64, + target_update_interval=5000, + exploration_fraction=0.25, + exploration_final_eps=0.025 + ) + + +def encapsule_env_factory(env_fctry, env_kwrgs): + + def _init(): + with env_fctry(**env_kwrgs) as init_env: + return init_env + + return _init + + +if __name__ == '__main__': + + # Define Global Env Parameters + # Define properties object parameters + factory_kwargs = dict( + max_steps=400, parse_doors=True, + level_name='rooms', + doors_have_area=True, verbose=False, + mv_prop=MovementProperties(allow_diagonal_movement=True, + allow_square_movement=True, + allow_no_op=False), + obs_prop=ObservationProperties( + frames_to_stack=3, + cast_shadows=True, + omit_agent_self=True, + render_agents=AgentRenderOptions.LEVEL, + additional_agent_placeholder=None, + ) + ) + + # Bundle both environments with global kwargs and parameters + # Todo: find a better solution, like outo module loading + env_map = {'DirtFactory': DirtFactory, + 'ItemFactory': ItemFactory, + 'DestFactory': DestFactory, + 'DestBatteryFactory': DestBatteryFactory + } + env_names = list(env_map.keys()) + + # Put all your multi-seed agends in a single folder, we do not need specific names etc. + available_models = dict() + available_envs = dict() + available_runs_kwargs = dict() + available_runs_agents = dict() + max_seed = 0 + # Define this folder + combinations_path = Path('combinations') + # Those are all differently trained combinations of mdoels, env and parameters + for combination in (x for x in combinations_path.iterdir() if x.is_dir()): + # These are all the models for this specific combination + for model_run in (x for x in combination.iterdir() if x.is_dir()): + model_name, env_name = model_run.name.split('_')[:2] + if model_name not in available_models: + available_models[model_name] = h.MODEL_MAP[model_name] + if env_name not in available_envs: + available_envs[env_name] = env_map[env_name] + # Those are all available seeds + for seed_run in (x for x in model_run.iterdir() if x.is_dir()): + max_seed = max(int(seed_run.name.split('_')[0]), max_seed) + # Read the env configuration from ROM + with next(seed_run.glob('env_params.json')).open('r') as f: + env_kwargs = simplejson.load(f) + available_runs_kwargs[seed_run.name] = env_kwargs + # Read the trained model_path from ROM + model_path = next(seed_run.glob('model.zip')) + available_runs_agents[seed_run.name] = model_path + + # We start by combining all SAME MODEL CLASSES per available Seed, across ALL available ENVIRONMENTS. + for model_name, model_cls in available_models.items(): + for seed in range(max_seed): + combined_env_kwargs = dict() + model_paths = list() + comparable_runs = {key: val for key, val in available_runs_kwargs.items() if ( + key.startswith(str(seed)) and model_name in key and key != 'key') + } + for name, run_kwargs in comparable_runs.items(): + # Select trained agent as a candidate: + model_paths.append(available_runs_agents[name]) + # Sort Env Kwars: + for key, val in run_kwargs.items(): + if key not in combined_env_kwargs: + combined_env_kwargs.update(dict(key=val)) + else: + assert combined_env_kwargs[key] == val, "Check the combinations you try to make!" + + # Update and combine all kwargs to account for multiple agents etc. + # We cannot capture all configuration cases! + for key, val in factory_kwargs.items(): + if key not in combined_env_kwargs: + combined_env_kwargs[key] = val + else: + assert combined_env_kwargs[key] == val + combined_env_kwargs.update(n_agents=len(comparable_runs)) + + with(type("CombinedEnv", tuple(available_envs.values()), {})(**combined_env_kwargs)) as combEnv: + # EnvMonitor Init + comb = f'comb_{model_name}_{seed}' + comb_monitor_path = combinations_path / comb / f'{comb}_monitor.pick' + comb_recorder_path = combinations_path / comb / f'{comb}_recorder.pick' + comb_monitor_path.parent.mkdir(parents=True, exist_ok=True) + + monitoredCombEnv = EnvMonitor(combEnv, filepath=comb_monitor_path) + # monitoredCombEnv = EnvRecorder(monitoredCombEnv, filepath=comb_monitor_path) + + # Evaluation starts here ##################################################### + # Load all models + loaded_models = [available_models[model_name].load(model_path) for model_path in model_paths] + obs_translators = ObservationTranslator( + monitoredCombEnv.named_observation_space, + *[agent.named_observation_space for agent in loaded_models], + placeholder_fill_value='n') + act_translators = ActionTranslator( + monitoredCombEnv.named_action_space, + *(agent.named_action_space for agent in loaded_models) + ) + + for episode in range(50): + obs, _ = monitoredCombEnv.reset(), monitoredCombEnv.render() + rew, done_bool = 0, False + while not done_bool: + actions = [] + for i, model in enumerate(loaded_models): + pred = model.predict(obs_translators.translate_observation(i, obs[i]))[0] + actions.append(act_translators.translate_action(i, pred)) + + obs, step_r, done_bool, info_obj = monitoredCombEnv.step(actions) + + rew += step_r + monitoredCombEnv.render() + if done_bool: + break + print(f'Factory run {episode} done, reward is:\n {rew}') + # Eval monitor outputs are automatically stored by the monitor object + # TODO: Plotting + monitoredCombEnv.save_records(comb_monitor_path) + monitoredCombEnv.save_run() + pass diff --git a/quickstart/single_agent_train_battery_target_env.py b/quickstart/single_agent_train_battery_target_env.py new file mode 100644 index 0000000..30620f3 --- /dev/null +++ b/quickstart/single_agent_train_battery_target_env.py @@ -0,0 +1,203 @@ +import sys +import time + +from pathlib import Path +import simplejson + +import stable_baselines3 as sb3 + +# This is needed, when you put this file in a subfolder. +try: + # noinspection PyUnboundLocalVariable + if __package__ is None: + DIR = Path(__file__).resolve().parent + sys.path.insert(0, str(DIR.parent)) + __package__ = DIR.name + else: + DIR = None +except NameError: + DIR = None + pass + +from environments import helpers as h +from environments.factory.additional.dest.dest_util import DestModeOptions, DestProperties +from environments.factory.additional.btry.btry_util import BatteryProperties +from environments.logging.envmonitor import EnvMonitor +from environments.logging.recorder import EnvRecorder +from environments.factory.additional.combined_factories import DestBatteryFactory +from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions + +from plotting.compare_runs import compare_seed_runs + +""" +Welcome to this quick start file. Here we will see how to: + 0. Setup I/O Paths + 1. Setup parameters for the environments (dirt-factory). + 2. Setup parameters for the agent training (SB3: PPO) and save metrics. + Run the training. + 3. Save env and agent for later analysis. + 4. Load the agent from drive + 5. Rendering the env with a run of the trained agent. + 6. Plot metrics +""" + +if __name__ == '__main__': + ######################################################### + # 0. Setup I/O Paths + # Define some general parameters + train_steps = 1e6 + n_seeds = 3 + model_class = sb3.PPO + env_class = DestBatteryFactory + + env_params_json = 'env_params.json' + + # Define a global studi save path + start_time = int(time.time()) + study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' + # Create an identifier, which is unique for every combination and easy to read in filesystem + identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}' + exp_path = study_root_path / identifier + + ######################################################### + # 1. Setup parameters for the environments (dirt-factory). + + + # Define property object parameters. + # 'ObservationProperties' are for specifying how the agent sees the env. + obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all + omit_agent_self=True, # This is default + additional_agent_placeholder=None, # We will not take care of future agents + frames_to_stack=3, # To give the agent a notion of time + pomdp_r=2 # the agents view-radius + ) + # 'MovementProperties' are for specifying how the agent is allowed to move in the env. + move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices) + allow_square_movement=True, # Manhattan (edges) + allow_no_op=False) # Pause movement (do nothing) + + # 'DirtProperties' control if and how dirt is spawned + # TODO: Comments + dest_props = DestProperties( + n_dests = 2, # How many destinations are there + dwell_time = 0, # How long does the agent need to "wait" on a destination + spawn_frequency = 0, + spawn_in_other_zone = True, # + spawn_mode = DestModeOptions.DONE, + ) + btry_props = BatteryProperties( + initial_charge = 0.9, # + charge_rate = 0.4, # + charge_locations = 3, # + per_action_costs = 0.01, + done_when_discharged = True, + multi_charge = False, + ) + + # These are the EnvKwargs for initializing the env class, holding all former parameter-classes + # TODO: Comments + factory_kwargs = dict(n_agents=1, + max_steps=400, + parse_doors=True, + level_name='rooms', + doors_have_area=True, # + verbose=False, + mv_prop=move_props, # See Above + obs_prop=obs_props, # See Above + done_at_collision=True, + dest_prop=dest_props, + btry_prop=btry_props + ) + + ######################################################### + # 2. Setup parameters for the agent training (SB3: PPO) and save metrics. + agent_kwargs = dict() + + + ######################################################### + # Run the Training + for seed in range(n_seeds): + # Make a copy if you want to alter things in the training loop; like the seed. + env_kwargs = factory_kwargs.copy() + env_kwargs.update(env_seed=seed) + + # Output folder + seed_path = exp_path / f'{str(seed)}_{identifier}' + seed_path.mkdir(parents=True, exist_ok=True) + + # Parameter Storage + param_path = seed_path / env_params_json + # Observation (measures) Storage + monitor_path = seed_path / 'monitor.pick' + recorder_path = seed_path / 'recorder.json' + # Model save Path for the trained model + model_save_path = seed_path / f'model.zip' + + # Env Init & Model kwargs definition + with env_class(**env_kwargs) as env_factory: + + # EnvMonitor Init + env_monitor_callback = EnvMonitor(env_factory) + + # EnvRecorder Init + env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10)) + + # Model Init + model = model_class("MlpPolicy", env_factory, verbose=1, seed=seed, device='cpu') + + # Model train + model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback]) + + ######################################################### + # 3. Save env and agent for later analysis. + # Save the trained Model, the monitor (env measures) and the env parameters + model.named_observation_space = env_factory.named_observation_space + model.named_action_space = env_factory.named_action_space + model.save(model_save_path) + env_factory.save_params(param_path) + env_monitor_callback.save_run(monitor_path) + env_recorder_callback.save_records(recorder_path, save_occupation_map=False) + + # Compare performance runs, for each seed within a model + try: + compare_seed_runs(exp_path, use_tex=False) + except ValueError: + pass + + # Train ends here ############################################################ + + # Evaluation starts here ##################################################### + # First Iterate over every model and monitor "as trained" + print('Start Measurement Tracking') + # For trained policy in study_root_path / identifier + for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]: + + # retrieve model class + model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name) + # Load the agent agent + model = model_cls.load(policy_path / 'model.zip', device='cpu') + # Load old env kwargs + with next(policy_path.glob(env_params_json)).open('r') as f: + env_kwargs = simplejson.load(f) + # Make the env stop ar collisions + # (you only want to have a single collision per episode hence the statistics) + env_kwargs.update(done_at_collision=True) + + # Init Env + with env_class(**env_kwargs) as env_factory: + monitored_env_factory = EnvMonitor(env_factory) + + # Evaluation Loop for i in range(n Episodes) + for episode in range(100): + # noinspection PyRedeclaration + env_state = monitored_env_factory.reset() + rew, done_bool = 0, False + while not done_bool: + action = model.predict(env_state, deterministic=True)[0] + env_state, step_r, done_bool, info_obj = monitored_env_factory.step(action) + rew += step_r + if done_bool: + break + print(f'Factory run {episode} done, reward is:\n {rew}') + monitored_env_factory.save_run(filepath=policy_path / 'eval_run_monitor.pick') + print('Measurements Done') diff --git a/quickstart/single_agent_train_dest_env.py b/quickstart/single_agent_train_dest_env.py new file mode 100644 index 0000000..755b0b4 --- /dev/null +++ b/quickstart/single_agent_train_dest_env.py @@ -0,0 +1,193 @@ +import sys +import time + +from pathlib import Path +import simplejson + +import stable_baselines3 as sb3 + +# This is needed, when you put this file in a subfolder. +try: + # noinspection PyUnboundLocalVariable + if __package__ is None: + DIR = Path(__file__).resolve().parent + sys.path.insert(0, str(DIR.parent)) + __package__ = DIR.name + else: + DIR = None +except NameError: + DIR = None + pass + +from environments import helpers as h +from environments.factory.additional.dest.dest_util import DestModeOptions, DestProperties +from environments.logging.envmonitor import EnvMonitor +from environments.logging.recorder import EnvRecorder +from environments.factory.additional.dest.factory_dest import DestFactory +from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions + +from plotting.compare_runs import compare_seed_runs + +""" +Welcome to this quick start file. Here we will see how to: + 0. Setup I/O Paths + 1. Setup parameters for the environments (dest-factory). + 2. Setup parameters for the agent training (SB3: PPO) and save metrics. + Run the training. + 3. Save env and agent for later analysis. + 4. Load the agent from drive + 5. Rendering the env with a run of the trained agent. + 6. Plot metrics +""" + +if __name__ == '__main__': + ######################################################### + # 0. Setup I/O Paths + # Define some general parameters + train_steps = 1e6 + n_seeds = 3 + model_class = sb3.PPO + env_class = DestFactory + + env_params_json = 'env_params.json' + + # Define a global studi save path + start_time = int(time.time()) + study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' + # Create an identifier, which is unique for every combination and easy to read in filesystem + identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}' + exp_path = study_root_path / identifier + + ######################################################### + # 1. Setup parameters for the environments (dest-factory). + + + # Define property object parameters. + # 'ObservationProperties' are for specifying how the agent sees the env. + obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all + omit_agent_self=True, # This is default + additional_agent_placeholder=None, # We will not take care of future agents + frames_to_stack=3, # To give the agent a notion of time + pomdp_r=2 # the agents view-radius + ) + # 'MovementProperties' are for specifying how the agent is allowed to move in the env. + move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices) + allow_square_movement=True, # Manhattan (edges) + allow_no_op=False) # Pause movement (do nothing) + + # 'DestProperties' control if and how dest is spawned + # TODO: Comments + dest_props = DestProperties( + n_dests = 2, # How many destinations are there + dwell_time = 0, # How long does the agent need to "wait" on a destination + spawn_frequency = 0, + spawn_in_other_zone = True, # + spawn_mode = DestModeOptions.DONE, + ) + + # These are the EnvKwargs for initializing the env class, holding all former parameter-classes + # TODO: Comments + factory_kwargs = dict(n_agents=1, + max_steps=400, + parse_doors=True, + level_name='rooms', + doors_have_area=True, # + verbose=False, + mv_prop=move_props, # See Above + obs_prop=obs_props, # See Above + done_at_collision=True, + dest_prop=dest_props + ) + + ######################################################### + # 2. Setup parameters for the agent training (SB3: PPO) and save metrics. + agent_kwargs = dict() + + + ######################################################### + # Run the Training + for seed in range(n_seeds): + # Make a copy if you want to alter things in the training loop; like the seed. + env_kwargs = factory_kwargs.copy() + env_kwargs.update(env_seed=seed) + + # Output folder + seed_path = exp_path / f'{str(seed)}_{identifier}' + seed_path.mkdir(parents=True, exist_ok=True) + + # Parameter Storage + param_path = seed_path / env_params_json + # Observation (measures) Storage + monitor_path = seed_path / 'monitor.pick' + recorder_path = seed_path / 'recorder.json' + # Model save Path for the trained model + model_save_path = seed_path / f'model.zip' + + # Env Init & Model kwargs definition + with env_class(**env_kwargs) as env_factory: + + # EnvMonitor Init + env_monitor_callback = EnvMonitor(env_factory) + + # EnvRecorder Init + env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10)) + + # Model Init + model = model_class("MlpPolicy", env_factory,verbose=1, seed=seed, device='cpu') + + # Model train + model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback]) + + ######################################################### + # 3. Save env and agent for later analysis. + # Save the trained Model, the monitor (env measures) and the env parameters + model.named_observation_space = env_factory.named_observation_space + model.named_action_space = env_factory.named_action_space + model.save(model_save_path) + env_factory.save_params(param_path) + env_monitor_callback.save_run(monitor_path) + env_recorder_callback.save_records(recorder_path, save_occupation_map=False) + + # Compare performance runs, for each seed within a model + try: + compare_seed_runs(exp_path, use_tex=False) + except ValueError: + pass + + # Train ends here ############################################################ + + # Evaluation starts here ##################################################### + # First Iterate over every model and monitor "as trained" + print('Start Measurement Tracking') + # For trained policy in study_root_path / identifier + for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]: + + # retrieve model class + model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name) + # Load the agent agent + model = model_cls.load(policy_path / 'model.zip', device='cpu') + # Load old env kwargs + with next(policy_path.glob(env_params_json)).open('r') as f: + env_kwargs = simplejson.load(f) + # Make the env stop ar collisions + # (you only want to have a single collision per episode hence the statistics) + env_kwargs.update(done_at_collision=True) + + # Init Env + with env_class(**env_kwargs) as env_factory: + monitored_env_factory = EnvMonitor(env_factory) + + # Evaluation Loop for i in range(n Episodes) + for episode in range(100): + # noinspection PyRedeclaration + env_state = monitored_env_factory.reset() + rew, done_bool = 0, False + while not done_bool: + action = model.predict(env_state, deterministic=True)[0] + env_state, step_r, done_bool, info_obj = monitored_env_factory.step(action) + rew += step_r + if done_bool: + break + print(f'Factory run {episode} done, reward is:\n {rew}') + monitored_env_factory.save_run(filepath=policy_path / 'eval_run_monitor.pick') + print('Measurements Done') diff --git a/quickstart/single_agent_train_dirt_env.py b/quickstart/single_agent_train_dirt_env.py index b2025d1..8f0f9c9 100644 --- a/quickstart/single_agent_train_dirt_env.py +++ b/quickstart/single_agent_train_dirt_env.py @@ -1,11 +1,12 @@ import sys import time + from pathlib import Path -from matplotlib import pyplot as plt -import itertools as it +import simplejson import stable_baselines3 as sb3 +# This is needed, when you put this file in a subfolder. try: # noinspection PyUnboundLocalVariable if __package__ is None: @@ -18,19 +19,14 @@ except NameError: DIR = None pass -import simplejson -from stable_baselines3.common.vec_env import SubprocVecEnv - from environments import helpers as h -from environments.factory.factory_dirt import DirtProperties, DirtFactory from environments.logging.envmonitor import EnvMonitor +from environments.logging.recorder import EnvRecorder +from environments.factory.additional.dirt.dirt_util import DirtProperties +from environments.factory.additional.dirt.factory_dirt import DirtFactory from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions -import pickle -from plotting.compare_runs import compare_seed_runs, compare_model_runs -import pandas as pd -import seaborn as sns -import multiprocessing as mp +from plotting.compare_runs import compare_seed_runs """ Welcome to this quick start file. Here we will see how to: @@ -53,6 +49,8 @@ if __name__ == '__main__': model_class = sb3.PPO env_class = DirtFactory + env_params_json = 'env_params.json' + # Define a global studi save path start_time = int(time.time()) study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' @@ -100,7 +98,7 @@ if __name__ == '__main__': mv_prop=move_props, # See Above obs_prop=obs_props, # See Above done_at_collision=True, - dirt_props=dirt_props + dirt_prop=dirt_props ) ######################################################### @@ -120,30 +118,37 @@ if __name__ == '__main__': seed_path.mkdir(parents=True, exist_ok=True) # Parameter Storage - param_path = seed_path / f'env_params.json' + param_path = seed_path / env_params_json # Observation (measures) Storage monitor_path = seed_path / 'monitor.pick' + recorder_path = seed_path / 'recorder.json' # Model save Path for the trained model model_save_path = seed_path / f'model.zip' # Env Init & Model kwargs definition - with DirtFactory(env_kwargs) as env_factory: + with env_class(**env_kwargs) as env_factory: # EnvMonitor Init env_monitor_callback = EnvMonitor(env_factory) + # EnvRecorder Init + env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10)) + # Model Init model = model_class("MlpPolicy", env_factory,verbose=1, seed=seed, device='cpu') # Model train - model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback]) + model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback]) ######################################################### # 3. Save env and agent for later analysis. # Save the trained Model, the monitor (env measures) and the env parameters + model.named_observation_space = env_factory.named_observation_space + model.named_action_space = env_factory.named_action_space model.save(model_save_path) env_factory.save_params(param_path) env_monitor_callback.save_run(monitor_path) + env_recorder_callback.save_records(recorder_path, save_occupation_map=False) # Compare performance runs, for each seed within a model try: @@ -164,18 +169,19 @@ if __name__ == '__main__': # Load the agent agent model = model_cls.load(policy_path / 'model.zip', device='cpu') # Load old env kwargs - with next(policy_path.glob('*.json')).open('r') as f: + with next(policy_path.glob(env_params_json)).open('r') as f: env_kwargs = simplejson.load(f) # Make the env stop ar collisions # (you only want to have a single collision per episode hence the statistics) env_kwargs.update(done_at_collision=True) # Init Env - with env_to_run(**env_kwargs) as env_factory: + with env_class(**env_kwargs) as env_factory: monitored_env_factory = EnvMonitor(env_factory) # Evaluation Loop for i in range(n Episodes) for episode in range(100): + # noinspection PyRedeclaration env_state = monitored_env_factory.reset() rew, done_bool = 0, False while not done_bool: @@ -185,8 +191,5 @@ if __name__ == '__main__': if done_bool: break print(f'Factory run {episode} done, reward is:\n {rew}') - monitored_env_factory.save_run(filepath=policy_path / f'{baseline_monitor_file}.pick') - - # for policy_path in (y for y in policy_path.iterdir() if y.is_dir()): - # load_model_run_baseline(policy_path) + monitored_env_factory.save_run(filepath=policy_path / 'eval_run_monitor.pick') print('Measurements Done') diff --git a/quickstart/single_agent_train_item_env.py b/quickstart/single_agent_train_item_env.py new file mode 100644 index 0000000..0107dc1 --- /dev/null +++ b/quickstart/single_agent_train_item_env.py @@ -0,0 +1,191 @@ +import sys +import time + +from pathlib import Path +import simplejson + +import stable_baselines3 as sb3 + +# This is needed, when you put this file in a subfolder. +try: + # noinspection PyUnboundLocalVariable + if __package__ is None: + DIR = Path(__file__).resolve().parent + sys.path.insert(0, str(DIR.parent)) + __package__ = DIR.name + else: + DIR = None +except NameError: + DIR = None + pass + +from environments import helpers as h +from environments.factory.additional.item.factory_item import ItemFactory +from environments.factory.additional.item.item_util import ItemProperties +from environments.logging.envmonitor import EnvMonitor +from environments.logging.recorder import EnvRecorder +from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions + +from plotting.compare_runs import compare_seed_runs + +""" +Welcome to this quick start file. Here we will see how to: + 0. Setup I/O Paths + 1. Setup parameters for the environments (item-factory). + 2. Setup parameters for the agent training (SB3: PPO) and save metrics. + Run the training. + 3. Save env and agent for later analysis. + 4. Load the agent from drive + 5. Rendering the env with a run of the trained agent. + 6. Plot metrics +""" + +if __name__ == '__main__': + ######################################################### + # 0. Setup I/O Paths + # Define some general parameters + train_steps = 1e6 + n_seeds = 3 + model_class = sb3.PPO + env_class = ItemFactory + + env_params_json = 'env_params.json' + + # Define a global studi save path + start_time = int(time.time()) + study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}' + # Create an identifier, which is unique for every combination and easy to read in filesystem + identifier = f'{model_class.__name__}_{env_class.__name__}_{start_time}' + exp_path = study_root_path / identifier + + ######################################################### + # 1. Setup parameters for the environments (item-factory). + # + # Define property object parameters. + # 'ObservationProperties' are for specifying how the agent sees the env. + obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, # Agents won`t be shown in the obs at all + omit_agent_self=True, # This is default + additional_agent_placeholder=None, # We will not take care of future agents + frames_to_stack=3, # To give the agent a notion of time + pomdp_r=2 # the agents view-radius + ) + # 'MovementProperties' are for specifying how the agent is allowed to move in the env. + move_props = MovementProperties(allow_diagonal_movement=True, # Euclidean style (vertices) + allow_square_movement=True, # Manhattan (edges) + allow_no_op=False) # Pause movement (do nothing) + + # 'ItemProperties' control if and how item is spawned + # TODO: Comments + item_props = ItemProperties( + n_items = 7, # How many items are there at the same time + spawn_frequency = 50, # Spawn Frequency in Steps + n_drop_off_locations = 10, # How many DropOff locations are there at the same time + max_dropoff_storage_size = 0, # How many items are needed until the dropoff is full + max_agent_inventory_capacity = 5, # How many items are needed until the agent inventory is full) + ) + + # These are the EnvKwargs for initializing the env class, holding all former parameter-classes + # TODO: Comments + factory_kwargs = dict(n_agents=1, + max_steps=400, + parse_doors=True, + level_name='rooms', + doors_have_area=True, # + verbose=False, + mv_prop=move_props, # See Above + obs_prop=obs_props, # See Above + done_at_collision=True, + item_prop=item_props + ) + + ######################################################### + # 2. Setup parameters for the agent training (SB3: PPO) and save metrics. + agent_kwargs = dict() + + ######################################################### + # Run the Training + for seed in range(n_seeds): + # Make a copy if you want to alter things in the training loop; like the seed. + env_kwargs = factory_kwargs.copy() + env_kwargs.update(env_seed=seed) + + # Output folder + seed_path = exp_path / f'{str(seed)}_{identifier}' + seed_path.mkdir(parents=True, exist_ok=True) + + # Parameter Storage + param_path = seed_path / env_params_json + # Observation (measures) Storage + monitor_path = seed_path / 'monitor.pick' + recorder_path = seed_path / 'recorder.json' + # Model save Path for the trained model + model_save_path = seed_path / f'model.zip' + + # Env Init & Model kwargs definition + with ItemFactory(**env_kwargs) as env_factory: + + # EnvMonitor Init + env_monitor_callback = EnvMonitor(env_factory) + + # EnvRecorder Init + env_recorder_callback = EnvRecorder(env_factory, freq=int(train_steps / 400 / 10)) + + # Model Init + model = model_class("MlpPolicy", env_factory,verbose=1, seed=seed, device='cpu') + + # Model train + model.learn(total_timesteps=int(train_steps), callback=[env_monitor_callback, env_recorder_callback]) + + ######################################################### + # 3. Save env and agent for later analysis. + # Save the trained Model, the monitor (env measures) and the env parameters + model.named_observation_space = env_factory.named_observation_space + model.named_action_space = env_factory.named_action_space + model.save(model_save_path) + env_factory.save_params(param_path) + env_monitor_callback.save_run(monitor_path) + env_recorder_callback.save_records(recorder_path, save_occupation_map=False) + + # Compare performance runs, for each seed within a model + try: + compare_seed_runs(exp_path, use_tex=False) + except ValueError: + pass + + # Train ends here ############################################################ + + # Evaluation starts here ##################################################### + # First Iterate over every model and monitor "as trained" + print('Start Measurement Tracking') + # For trained policy in study_root_path / identifier + for policy_path in [x for x in exp_path.iterdir() if x.is_dir()]: + + # retrieve model class + model_cls = next(val for key, val in h.MODEL_MAP.items() if key in policy_path.parent.name) + # Load the agent agent + model = model_cls.load(policy_path / 'model.zip', device='cpu') + # Load old env kwargs + with next(policy_path.glob(env_params_json)).open('r') as f: + env_kwargs = simplejson.load(f) + # Make the env stop ar collisions + # (you only want to have a single collision per episode hence the statistics) + env_kwargs.update(done_at_collision=True) + + # Init Env + with ItemFactory(**env_kwargs) as env_factory: + monitored_env_factory = EnvMonitor(env_factory) + + # Evaluation Loop for i in range(n Episodes) + for episode in range(100): + # noinspection PyRedeclaration + env_state = monitored_env_factory.reset() + rew, done_bool = 0, False + while not done_bool: + action = model.predict(env_state, deterministic=True)[0] + env_state, step_r, done_bool, info_obj = monitored_env_factory.step(action) + rew += step_r + if done_bool: + break + print(f'Factory run {episode} done, reward is:\n {rew}') + monitored_env_factory.save_run(filepath=policy_path / 'eval_run_monitor.pick') + print('Measurements Done') diff --git a/reload_agent.py b/reload_agent.py index 4735529..a5d3ed3 100644 --- a/reload_agent.py +++ b/reload_agent.py @@ -4,9 +4,9 @@ from pathlib import Path import yaml from stable_baselines3 import A2C, PPO, DQN -from environments.factory.factory_dirt import Constants as c +from environments.factory.additional.dirt.dirt_util import Constants -from environments.factory.factory_dirt import DirtFactory +from environments.factory.additional.dirt.factory_dirt import DirtFactory from environments.logging.envmonitor import EnvMonitor from environments.logging.recorder import EnvRecorder @@ -23,7 +23,7 @@ if __name__ == '__main__': seed = 13 n_agents = 1 # out_path = Path('study_out/e_1_new_reward/no_obs/dirt/A2C_new_reward/0_A2C_new_reward') - out_path = Path('study_out/reload') + out_path = Path('quickstart/combinations/single_agent_train_dirt_env_1659374984/PPO_DirtFactory_1659374984/0_PPO_DirtFactory_1659374984/') model_path = out_path with (out_path / f'env_params.json').open('r') as f: @@ -62,7 +62,7 @@ if __name__ == '__main__': if render: env.render() try: - door = next(x for x in env.unwrapped.unwrapped[c.DOORS] if x.is_open) + door = next(x for x in env.unwrapped.unwrapped[Constants.DOORS] if x.is_open) print('openDoor found') except StopIteration: pass diff --git a/studies/e_1.py b/studies/e_1.py index ee1ae28..4ceabd6 100644 --- a/studies/e_1.py +++ b/studies/e_1.py @@ -19,9 +19,11 @@ import simplejson from stable_baselines3.common.vec_env import SubprocVecEnv from environments import helpers as h -from environments.factory.factory_dirt import DirtProperties, DirtFactory +from environments.factory.factory_dirt import DirtFactory +from environments.factory.dirt_util import DirtProperties from environments.factory.combined_factories import DirtItemFactory -from environments.factory.factory_item import ItemProperties, ItemFactory +from environments.factory.factory_item import ItemFactory +from environments.factory.additional.item.item_util import ItemProperties from environments.logging.envmonitor import EnvMonitor from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions import pickle diff --git a/studies/single_run_with_export.py b/studies/single_run_with_export.py index 2227f49..cfcda94 100644 --- a/studies/single_run_with_export.py +++ b/studies/single_run_with_export.py @@ -20,9 +20,12 @@ import simplejson from environments.helpers import ActionTranslator, ObservationTranslator from environments.logging.recorder import EnvRecorder from environments import helpers as h -from environments.factory.factory_dirt import DirtProperties, DirtFactory -from environments.factory.factory_item import ItemProperties, ItemFactory -from environments.factory.factory_dest import DestProperties, DestFactory, DestModeOptions +from environments.factory.factory_dirt import DirtFactory +from environments.factory.dirt_util import DirtProperties +from environments.factory.factory_item import ItemFactory +from environments.factory.additional.item.item_util import ItemProperties +from environments.factory.factory_dest import DestFactory +from environments.factory.additional.dest.dest_util import DestModeOptions, DestProperties from environments.factory.combined_factories import DirtDestItemFactory from environments.logging.envmonitor import EnvMonitor from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions