From 244d4eed68bda03bae20829e6e0209c9a449707c Mon Sep 17 00:00:00 2001 From: Steffen Illium Date: Sun, 8 Aug 2021 22:05:55 +0200 Subject: [PATCH] now with correct seeding per setting and preperations for items --- environments/factory/base/base_factory.py | 4 +- environments/factory/item_pickup.py | 47 +++++++++++++++++++---- environments/factory/simple_factory.py | 5 ++- main.py | 2 +- 4 files changed, 47 insertions(+), 11 deletions(-) diff --git a/environments/factory/base/base_factory.py b/environments/factory/base/base_factory.py index 9ac81a3..c21fc56 100644 --- a/environments/factory/base/base_factory.py +++ b/environments/factory/base/base_factory.py @@ -1,3 +1,4 @@ +import time from pathlib import Path from typing import List, Union, Iterable @@ -97,6 +98,7 @@ class BaseFactory(gym.Env): assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1." # Attribute Assignment + self._base_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns())) self.movement_properties = movement_properties self.level_name = level_name self._level_shape = None @@ -173,7 +175,7 @@ class BaseFactory(gym.Env): self._doors = Doors.from_tiles(tiles, context=self._tiles, has_area=self.doors_have_area) # Agent Init on random positions - self._agents = Agents.from_tiles(np.random.choice(self._tiles, self.n_agents)) + self._agents = Agents.from_tiles(self._base_rng.choice(self._tiles, self.n_agents)) entities = Entities() entities.register_additional_items([self._agents]) diff --git a/environments/factory/item_pickup.py b/environments/factory/item_pickup.py index 4cb61a0..02b197a 100644 --- a/environments/factory/item_pickup.py +++ b/environments/factory/item_pickup.py @@ -1,15 +1,15 @@ +import time +from collections import deque from typing import List, Union, NamedTuple -import random - import numpy as np from environments.helpers import Constants as c from environments import helpers as h from environments.factory.base.base_factory import BaseFactory -from environments.factory.base.objects import Agent, Action, Object, Slice +from environments.factory.base.objects import Agent, Action, Object, Slice, Entity from environments.factory.base.registers import Entities -from environments.factory.renderer import Renderer, Entity +from environments.factory.renderer import Renderer from environments.utility_classes import MovementProperties @@ -23,16 +23,38 @@ NO_ITEM = 0 ITEM_DROP_OFF = -1 +def inventory_slice_name(agent): + return f'{agent.name}_{INVENTORY}' + + +class DropOffLocation(Entity): + + def __init__(self, *args, storage_size_until_full: int = 5, **kwargs): + super(DropOffLocation, self).__init__(*args, **kwargs) + self.storage = deque(maxlen=storage_size_until_full) + + def place_item(self, item): + self.storage.append(item) + return True + + @property + def is_full(self): + return self.storage.maxlen == len(self.storage) + + class ItemProperties(NamedTuple): - n_items: int = 1 # How much does the robot clean with one actions. + n_items: int = 1 # How many items are there at the same time spawn_frequency: int = 5 # Spawn Frequency in Steps + max_dropoff_storage_size: int = 5 # How many items are needed until the drop off is full + max_agent_storage_size: int = 5 # How many items are needed until the agent inventory is full # noinspection PyAttributeOutsideInit class ItemFactory(BaseFactory): def __init__(self, item_properties: ItemProperties, *args, **kwargs): - super(ItemFactory, self).__init__(*args, **kwargs) self.item_properties = item_properties + self._item_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns())) + super(ItemFactory, self).__init__(*args, **kwargs) @property def additional_actions(self) -> Union[str, List[str]]: @@ -44,7 +66,8 @@ class ItemFactory(BaseFactory): @property def additional_slices(self) -> Union[Slice, List[Slice]]: - return [Slice(ITEM, np.zeros(self._level_shape)), Slice(INVENTORY, np.zeros(self._level_shape))] + return [Slice(ITEM, np.zeros(self._level_shape))] + [ + Slice(inventory_slice_name(agent), np.zeros(self._level_shape)) for agent in self._agents] def _is_item_action(self, action): if isinstance(action, str): @@ -53,10 +76,14 @@ class ItemFactory(BaseFactory): def do_item_action(self, agent): item_slice = self._slices.by_name(ITEM).slice + inventory_slice = self._slices.by_name(inventory_slice_name(agent)).slice + if item := item_slice[agent.pos]: if item == ITEM_DROP_OFF: - self._slices.by_name(INVENTORY).slice[agent.pos] = item + valid = self._item_drop_off.place_item(inventory_slice.sum()) + + item_slice[agent.pos] = NO_ITEM return True else: @@ -75,6 +102,10 @@ class ItemFactory(BaseFactory): if self.n_items > 1: self._next_item_spawn = self.item_properties.spawn_frequency + def spawn_drop_off_location(self): + single_empty_tile = self._tiles.empty_tiles[0] + self._item_drop_off = DropOffLocation(storage_size_until_full=self.item_properties.max_dropoff_storage_size) + def calculate_reward(self) -> (int, dict): pass diff --git a/environments/factory/simple_factory.py b/environments/factory/simple_factory.py index e4de5e4..694e69e 100644 --- a/environments/factory/simple_factory.py +++ b/environments/factory/simple_factory.py @@ -1,3 +1,4 @@ +import time from typing import List, Union, NamedTuple import random @@ -59,6 +60,7 @@ class SimpleFactory(BaseFactory): def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), **kwargs): self.dirt_properties = dirt_properties self._renderer = None # expensive - don't use it when not required ! + self._dirt_rng = np.random.default_rng(kwargs.get('seed', default=time.time_ns())) super(SimpleFactory, self).__init__(*args, **kwargs) def _flush_state(self): @@ -108,7 +110,8 @@ class SimpleFactory(BaseFactory): free_for_dirt = self._tiles.empty_tiles # randomly distribute dirt across the grid - n_dirt_tiles = max(0, int(random.uniform(0, self.dirt_properties.max_spawn_ratio) * len(free_for_dirt))) + new_spawn = self._dirt_rng.uniform(0, self.dirt_properties.max_spawn_ratio) + n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt))) for tile in free_for_dirt[:n_dirt_tiles]: new_value = dirt_slice[tile.pos] + self.dirt_properties.gain_amount dirt_slice[tile.pos] = min(new_value, self.dirt_properties.max_local_amount) diff --git a/main.py b/main.py index 6407d26..3bf7d8a 100644 --- a/main.py +++ b/main.py @@ -109,7 +109,7 @@ if __name__ == '__main__': with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=True, movement_properties=move_props, level_name='rooms', frames_to_stack=3, omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False, - cast_shadows=True, doors_have_area=False + cast_shadows=True, doors_have_area=False, seed=seed ) as env: if modeL_type.__name__ in ["PPO", "A2C"]: