new observation properties for testing of technical limitations

This commit is contained in:
Steffen Illium 2021-11-05 15:59:19 +01:00
parent b5c6105b7b
commit d69cf75c15
9 changed files with 424 additions and 263 deletions

View File

@ -16,7 +16,8 @@ from environments.helpers import Constants as c, Constants
from environments import helpers as h from environments import helpers as h
from environments.factory.base.objects import Agent, Tile, Action from environments.factory.base.objects import Agent, Tile, Action
from environments.factory.base.registers import Actions, Entities, Agents, Doors, FloorTiles, WallTiles, PlaceHolders from environments.factory.base.registers import Actions, Entities, Agents, Doors, FloorTiles, WallTiles, PlaceHolders
from environments.utility_classes import MovementProperties from environments.utility_classes import MovementProperties, ObservationProperties
from environments.utility_classes import AgentRenderOptions as a_obs
import simplejson import simplejson
@ -33,7 +34,7 @@ class BaseFactory(gym.Env):
@property @property
def observation_space(self): def observation_space(self):
if r := self.pomdp_r: if r := self._pomdp_r:
z = self._obs_cube.shape[0] z = self._obs_cube.shape[0]
xy = r*2 + 1 xy = r*2 + 1
level_shape = (z, xy, xy) level_shape = (z, xy, xy)
@ -44,24 +45,32 @@ class BaseFactory(gym.Env):
@property @property
def pomdp_diameter(self): def pomdp_diameter(self):
return self.pomdp_r * 2 + 1 return self._pomdp_r * 2 + 1
@property @property
def movement_actions(self): def movement_actions(self):
return self._actions.movement_actions return self._actions.movement_actions
def __enter__(self): def __enter__(self):
return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack) return self if self.obs_prop.frames_to_stack == 0 else \
FrameStack(self, self.obs_prop.frames_to_stack)
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
self.close() self.close()
def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_r: Union[None, int] = 0, def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2),
movement_properties: MovementProperties = MovementProperties(), parse_doors=False, mv_prop: MovementProperties = MovementProperties(),
combin_agent_obs: bool = False, frames_to_stack=0, record_episodes=False, obs_prop: ObservationProperties = ObservationProperties(),
omit_agent_in_obs=False, done_at_collision=False, cast_shadows=True, additional_agent_placeholder=None, parse_doors=False, record_episodes=False, done_at_collision=False,
verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs): verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs):
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
if isinstance(mv_prop, dict):
mv_prop = MovementProperties(**mv_prop)
if isinstance(obs_prop, dict):
obs_prop = ObservationProperties(**obs_prop)
assert obs_prop.frames_to_stack != 1 and \
obs_prop.frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
if kwargs: if kwargs:
print(f'Following kwargs were passed, but ignored: {kwargs}') print(f'Following kwargs were passed, but ignored: {kwargs}')
@ -69,24 +78,18 @@ class BaseFactory(gym.Env):
self.env_seed = env_seed self.env_seed = env_seed
self.seed(env_seed) self.seed(env_seed)
self._base_rng = np.random.default_rng(self.env_seed) self._base_rng = np.random.default_rng(self.env_seed)
if isinstance(movement_properties, dict): self.mv_prop = mv_prop
movement_properties = MovementProperties(**movement_properties) self.obs_prop = obs_prop
self.movement_properties = movement_properties
self.level_name = level_name self.level_name = level_name
self._level_shape = None self._level_shape = None
self.verbose = verbose self.verbose = verbose
self.additional_agent_placeholder = additional_agent_placeholder
self._renderer = None # expensive - don't use it when not required ! self._renderer = None # expensive - don't use it when not required !
self._entities = Entities() self._entities = Entities()
self.n_agents = n_agents self.n_agents = n_agents
self.max_steps = max_steps self.max_steps = max_steps
self.pomdp_r = pomdp_r self._pomdp_r = self.obs_prop.pomdp_r
self.combin_agent_obs = combin_agent_obs
self.omit_agent_in_obs = omit_agent_in_obs
self.cast_shadows = cast_shadows
self.frames_to_stack = frames_to_stack
self.done_at_collision = done_at_collision self.done_at_collision = done_at_collision
self.record_episodes = record_episodes self.record_episodes = record_episodes
@ -130,24 +133,32 @@ class BaseFactory(gym.Env):
parsed_doors = h.one_hot_level(parsed_level, c.DOOR) parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
if np.any(parsed_doors): if np.any(parsed_doors):
door_tiles = [floor.by_pos(pos) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL.value)] door_tiles = [floor.by_pos(pos) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL.value)]
doors = Doors.from_tiles(door_tiles, self._level_shape, context=floor) doors = Doors.from_tiles(door_tiles, self._level_shape,
entity_kwargs=dict(context=floor)
)
entities.update({c.DOORS: doors}) entities.update({c.DOORS: doors})
# Actions # Actions
self._actions = Actions(self.movement_properties, can_use_doors=self.parse_doors) self._actions = Actions(self.mv_prop, can_use_doors=self.parse_doors)
if additional_actions := self.additional_actions: if additional_actions := self.additional_actions:
self._actions.register_additional_items(additional_actions) self._actions.register_additional_items(additional_actions)
# Agents # Agents
agents = Agents.from_tiles(floor.empty_tiles[:self.n_agents], self._level_shape, agents = Agents.from_tiles(floor.empty_tiles[:self.n_agents], self._level_shape,
individual_slices=not self.combin_agent_obs) individual_slices=self.obs_prop.render_agents == a_obs.SEPERATE,
hide_from_obs_builder=self.obs_prop.render_agents == a_obs.LEVEL,
is_observable=self.obs_prop.render_agents != a_obs.NOT
)
entities.update({c.AGENT: agents}) entities.update({c.AGENT: agents})
if self.additional_agent_placeholder is not None: if self.obs_prop.additional_agent_placeholder is not None:
# TODO: Make this accept Lists for multiple placeholders
# Empty Observations with either [0, 1, N(0, 1)] # Empty Observations with either [0, 1, N(0, 1)]
placeholder = PlaceHolders.from_tiles([self._NO_POS_TILE], self._level_shape, placeholder = PlaceHolders.from_tiles([self._NO_POS_TILE], self._level_shape,
fill_value=self.additional_agent_placeholder) entity_kwargs=dict(
fill_value=self.obs_prop.additional_agent_placeholder)
)
entities.update({c.AGENT_PLACEHOLDER: placeholder}) entities.update({c.AGENT_PLACEHOLDER: placeholder})
@ -163,24 +174,11 @@ class BaseFactory(gym.Env):
return self._entities return self._entities
def _init_obs_cube(self): def _init_obs_cube(self):
arrays = self._entities.observable_arrays arrays = self._entities.obs_arrays
# FIXME: Move logic to Register
if self.omit_agent_in_obs and self.n_agents == 1:
del arrays[c.AGENT]
# This does not seem to be necesarry, because this case is allready handled by the Agent Register Class
# elif self.omit_agent_in_obs:
# arrays[c.AGENT] = np.delete(arrays[c.AGENT], 0, axis=0)
obs_cube_z = sum([a.shape[0] if not self[key].is_per_agent else 1 for key, a in arrays.items()]) obs_cube_z = sum([a.shape[0] if not self[key].is_per_agent else 1 for key, a in arrays.items()])
self._obs_cube = np.zeros((obs_cube_z, *self._level_shape), dtype=np.float32) self._obs_cube = np.zeros((obs_cube_z, *self._level_shape), dtype=np.float32)
# Optionally Pad this obs cube for pomdp cases
if r := self.pomdp_r:
x, y = self._level_shape
# was c.SHADOW
self._padded_obs_cube = np.full((obs_cube_z, x + r*2, y + r*2), c.SHADOWED_CELL.value, dtype=np.float32)
self._padded_obs_cube[:, r:r+x, r:r+y] = self._obs_cube
def reset(self) -> (np.ndarray, int, bool, dict): def reset(self) -> (np.ndarray, int, bool, dict):
_ = self._base_init_env() _ = self._base_init_env()
self._init_obs_cube() self._init_obs_cube()
@ -198,7 +196,6 @@ class BaseFactory(gym.Env):
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]' assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
self._steps += 1 self._steps += 1
done = False
# Pre step Hook for later use # Pre step Hook for later use
self.hook_pre_step() self.hook_pre_step()
@ -285,17 +282,22 @@ class BaseFactory(gym.Env):
def _build_per_agent_obs(self, agent: Agent, state_array_dict) -> np.ndarray: def _build_per_agent_obs(self, agent: Agent, state_array_dict) -> np.ndarray:
agent_pos_is_omitted = False agent_pos_is_omitted = False
agent_omit_idx = None agent_omit_idx = None
if self.omit_agent_in_obs and self.n_agents == 1:
if self.obs_prop.omit_agent_self and self.n_agents == 1:
# There is only a single agent and we want to omit the agent obs, so just remove the array. # There is only a single agent and we want to omit the agent obs, so just remove the array.
del state_array_dict[c.AGENT] # del state_array_dict[c.AGENT]
elif self.omit_agent_in_obs and self.combin_agent_obs and self.n_agents > 1: # Not Needed any more,
pass
elif self.obs_prop.omit_agent_self and self.obs_prop.render_agents in [a_obs.COMBINED, ] and self.n_agents > 1:
state_array_dict[c.AGENT][0, agent.x, agent.y] -= agent.encoding state_array_dict[c.AGENT][0, agent.x, agent.y] -= agent.encoding
agent_pos_is_omitted = True agent_pos_is_omitted = True
elif self.omit_agent_in_obs and not self.combin_agent_obs and self.n_agents > 1: elif self.obs_prop.omit_agent_self and self.obs_prop.render_agents == a_obs.SEPERATE and self.n_agents > 1:
agent_omit_idx = next((i for i, a in enumerate(self[c.AGENT]) if a == agent)) agent_omit_idx = next((i for i, a in enumerate(self[c.AGENT]) if a == agent))
running_idx, shadowing_idxs, can_be_shadowed_idxs = 0, [], [] running_idx, shadowing_idxs, can_be_shadowed_idxs = 0, [], []
self._obs_cube[:] = 0
# FIXME: Refactor this! Make a globally build observation, then add individual per-agent-obs
for key, array in state_array_dict.items(): for key, array in state_array_dict.items():
# Flush state array object representation to obs cube # Flush state array object representation to obs cube
if not self[key].hide_from_obs_builder: if not self[key].hide_from_obs_builder:
@ -309,12 +311,15 @@ class BaseFactory(gym.Env):
for array_idx in range(array.shape[0]): for array_idx in range(array.shape[0]):
self._obs_cube[running_idx: running_idx+z] = array[[x for x in range(array.shape[0]) self._obs_cube[running_idx: running_idx+z] = array[[x for x in range(array.shape[0])
if x != agent_omit_idx]] if x != agent_omit_idx]]
elif key == c.AGENT and self.omit_agent_in_obs and self.combin_agent_obs: # Agent OBS are combined
elif key == c.AGENT and self.obs_prop.omit_agent_self \
and self.obs_prop.render_agents == a_obs.COMBINED:
z = 1 z = 1
self._obs_cube[running_idx: running_idx + z] = array self._obs_cube[running_idx: running_idx + z] = array
# Each Agent is rendered on a seperate array slice
else: else:
z = array.shape[0] z = array.shape[0]
self._obs_cube[running_idx: running_idx+z] = array self._obs_cube[running_idx: running_idx + z] = array
# Define which OBS SLices cast a Shadow # Define which OBS SLices cast a Shadow
if self[key].is_blocking_light: if self[key].is_blocking_light:
for i in range(z): for i in range(z):
@ -328,19 +333,14 @@ class BaseFactory(gym.Env):
if agent_pos_is_omitted: if agent_pos_is_omitted:
state_array_dict[c.AGENT][0, agent.x, agent.y] += agent.encoding state_array_dict[c.AGENT][0, agent.x, agent.y] += agent.encoding
if r := self.pomdp_r: if self._pomdp_r:
self._padded_obs_cube[:] = c.SHADOWED_CELL.value # Was c.SHADOW obs = self._do_pomdp_obs_cutout(agent, self._obs_cube)
# self._padded_obs_cube[0] = c.OCCUPIED_CELL.value
x, y = self._level_shape
self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube
global_x, global_y = map(sum, zip(agent.pos, (r, r)))
x0, x1 = max(0, global_x - self.pomdp_r), global_x + self.pomdp_r + 1
y0, y1 = max(0, global_y - self.pomdp_r), global_y + self.pomdp_r + 1
obs = self._padded_obs_cube[:, x0:x1, y0:y1]
else: else:
obs = self._obs_cube obs = self._obs_cube
if self.cast_shadows: obs = obs.copy()
if self.obs_prop.cast_shadows:
obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx in shadowing_idxs] obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx in shadowing_idxs]
door_shadowing = False door_shadowing = False
if self.parse_doors: if self.parse_doors:
@ -350,8 +350,8 @@ class BaseFactory(gym.Env):
for group in door.connectivity_subgroups: for group in door.connectivity_subgroups:
if agent.last_pos not in group: if agent.last_pos not in group:
door_shadowing = True door_shadowing = True
if self.pomdp_r: if self._pomdp_r:
blocking = [tuple(np.subtract(x, agent.pos) + (self.pomdp_r, self.pomdp_r)) blocking = [tuple(np.subtract(x, agent.pos) + (self._pomdp_r, self._pomdp_r))
for x in group] for x in group]
xs, ys = zip(*blocking) xs, ys = zip(*blocking)
else: else:
@ -361,8 +361,8 @@ class BaseFactory(gym.Env):
obs_block_light[0][xs, ys] = False obs_block_light[0][xs, ys] = False
light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int)) light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int))
if self.pomdp_r: if self._pomdp_r:
light_block_map = light_block_map.do_fov(self.pomdp_r, self.pomdp_r, max(self._level_shape)) light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape))
else: else:
light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape)) light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
if door_shadowing: if door_shadowing:
@ -374,6 +374,20 @@ class BaseFactory(gym.Env):
else: else:
pass pass
# Agents observe other agents as wall
if self.obs_prop.render_agents == a_obs.LEVEL and self.n_agents > 1:
other_agent_obs = self[c.AGENT].as_array()
if self.obs_prop.omit_agent_self:
other_agent_obs[:, agent.x, agent.y] -= agent.encoding
if self.obs_prop.pomdp_r:
oobs = self._do_pomdp_obs_cutout(agent, other_agent_obs)[0]
mask = (oobs != c.SHADOWED_CELL.value).astype(int)
obs[0] += oobs * mask
else:
obs[0] += other_agent_obs
# Additional Observation: # Additional Observation:
for additional_obs in self.additional_obs_build(): for additional_obs in self.additional_obs_build():
obs[running_idx:running_idx+additional_obs.shape[0]] = additional_obs obs[running_idx:running_idx+additional_obs.shape[0]] = additional_obs
@ -384,6 +398,37 @@ class BaseFactory(gym.Env):
return obs return obs
def _do_pomdp_obs_cutout(self, agent, obs_to_be_padded):
assert obs_to_be_padded.ndim == 3
r, d = self._pomdp_r, self.pomdp_diameter
x0, x1 = max(0, agent.x - r), min(agent.x + r + 1, self._level_shape[0])
y0, y1 = max(0, agent.y - r), min(agent.y + r + 1, self._level_shape[1])
# Other Agent Obs = oobs
oobs = obs_to_be_padded[:, x0:x1, y0:y1]
if oobs.shape[0:] != (d,) * 2:
if xd := oobs.shape[1] % d:
if agent.x > r:
x0_pad = 0
x1_pad = (d - xd)
else:
x0_pad = r - agent.x
x1_pad = 0
else:
x0_pad, x1_pad = 0, 0
if yd := oobs.shape[2] % d:
if agent.y > r:
y0_pad = 0
y1_pad = (d - yd)
else:
y0_pad = r - agent.y
y1_pad = 0
else:
y0_pad, y1_pad = 0, 0
oobs = np.pad(oobs, ((0, 0), (x0_pad, x1_pad), (y0_pad, y1_pad)), 'constant')
return oobs
def get_all_tiles_with_collisions(self) -> List[Tile]: def get_all_tiles_with_collisions(self) -> List[Tile]:
tiles_with_collisions = list() tiles_with_collisions = list()
for tile in self[c.FLOOR]: for tile in self[c.FLOOR]:
@ -449,7 +494,7 @@ class BaseFactory(gym.Env):
if self._actions.is_moving_action(agent.temp_action): if self._actions.is_moving_action(agent.temp_action):
if agent.temp_valid: if agent.temp_valid:
# info_dict.update(movement=1) # info_dict.update(movement=1)
# reward += 0.00 reward -= 0.001
pass pass
else: else:
reward -= 0.01 reward -= 0.01
@ -501,7 +546,7 @@ class BaseFactory(gym.Env):
def render(self, mode='human'): def render(self, mode='human'):
if not self._renderer: # lazy init if not self._renderer: # lazy init
height, width = self._obs_cube.shape[1:] height, width = self._obs_cube.shape[1:]
self._renderer = Renderer(width, height, view_radius=self.pomdp_r, fps=5) self._renderer = Renderer(width, height, view_radius=self._pomdp_r, fps=5)
walls = [RenderEntity('wall', wall.pos) for wall in self[c.WALLS]] walls = [RenderEntity('wall', wall.pos) for wall in self[c.WALLS]]

View File

@ -1,3 +1,4 @@
import numbers
import random import random
from abc import ABC from abc import ABC
from typing import List, Union, Dict from typing import List, Union, Dict
@ -91,21 +92,18 @@ class EntityObjectRegister(ObjectRegister, ABC):
raise NotImplementedError raise NotImplementedError
@classmethod @classmethod
def from_tiles(cls, tiles, *args, **kwargs): def from_tiles(cls, tiles, *args, entity_kwargs=None, **kwargs):
# objects_name = cls._accepted_objects.__name__ # objects_name = cls._accepted_objects.__name__
register_obj = cls(*args, **kwargs) register_obj = cls(*args, **kwargs)
try: entities = [cls._accepted_objects(tile, str_ident=i, **entity_kwargs if entity_kwargs is not None else {})
del kwargs['individual_slices']
except KeyError:
pass
entities = [cls._accepted_objects(tile, str_ident=i, **kwargs)
for i, tile in enumerate(tiles)] for i, tile in enumerate(tiles)]
register_obj.register_additional_items(entities) register_obj.register_additional_items(entities)
return register_obj return register_obj
@classmethod @classmethod
def from_argwhere_coordinates(cls, positions: [(int, int)], tiles, *args, **kwargs): def from_argwhere_coordinates(cls, positions: [(int, int)], tiles, *args, entity_kwargs=None, **kwargs, ):
return cls.from_tiles([tiles.by_pos(position) for position in positions], *args, **kwargs) return cls.from_tiles([tiles.by_pos(position) for position in positions], *args, entity_kwargs=entity_kwargs,
**kwargs)
@property @property
def positions(self): def positions(self):
@ -166,10 +164,15 @@ class PlaceHolders(MovingEntityObjectRegister):
# noinspection DuplicatedCode # noinspection DuplicatedCode
def as_array(self): def as_array(self):
if isinstance(self.fill_value, int): if isinstance(self.fill_value, numbers.Number):
self._array[:] = self.fill_value self._array[:] = self.fill_value
elif self.fill_value == "normal": elif isinstance(self.fill_value, str):
self._array = np.random.normal(size=self._array.shape) if self.fill_value.lower() in ['normal', 'n']:
self._array = np.random.normal(size=self._array.shape)
else:
raise ValueError('Choose one of: ["normal", "N"]')
else:
raise TypeError('Objects of type "str" or "number" is required here.')
if self.individual_slices: if self.individual_slices:
return self._array return self._array
@ -183,10 +186,12 @@ class Entities(Register):
@property @property
def observable_arrays(self): def observable_arrays(self):
# FIXME: Find a better name
return {key: val.as_array() for key, val in self.items() if val.is_observable} return {key: val.as_array() for key, val in self.items() if val.is_observable}
@property @property
def obs_arrays(self): def obs_arrays(self):
# FIXME: Find a better name
return {key: val.as_array() for key, val in self.items() if val.is_observable and not val.hide_from_obs_builder} return {key: val.as_array() for key, val in self.items() if val.is_observable and not val.hide_from_obs_builder}
@property @property
@ -208,6 +213,10 @@ class Entities(Register):
def register_additional_items(self, others: Dict): def register_additional_items(self, others: Dict):
return self.register_item(others) return self.register_item(others)
def by_pos(self, pos: (int, int)):
found_entities = [y for y in (x.by_pos(pos) for x in self.values() if hasattr(x, 'by_pos')) if y is not None]
return found_entities
class WallTiles(EntityObjectRegister): class WallTiles(EntityObjectRegister):
_accepted_objects = Wall _accepted_objects = Wall
@ -289,6 +298,10 @@ class Agents(MovingEntityObjectRegister):
_accepted_objects = Agent _accepted_objects = Agent
def __init__(self, *args, hide_from_obs_builder=False, **kwargs):
super().__init__(*args, **kwargs)
self.hide_from_obs_builder = hide_from_obs_builder
# noinspection DuplicatedCode # noinspection DuplicatedCode
def as_array(self): def as_array(self):
self._array[:] = c.FREE_CELL.value self._array[:] = c.FREE_CELL.value

View File

@ -14,7 +14,7 @@ from environments.factory.base.registers import Entities, MovingEntityObjectRegi
from environments.factory.renderer import RenderEntity from environments.factory.renderer import RenderEntity
from environments.logging.recorder import RecorderCallback from environments.logging.recorder import RecorderCallback
from environments.utility_classes import ObservationProperties
CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP
@ -65,9 +65,9 @@ class DirtRegister(MovingEntityObjectRegister):
def as_array(self): def as_array(self):
if self._array is not None: if self._array is not None:
self._array[:] = c.FREE_CELL.value self._array[:] = c.FREE_CELL.value
for key, dirt in self.items(): for dirt in self.values():
if dirt.amount == 0: if dirt.amount == 0:
self.delete_item(key) self.delete_item(dirt)
self._array[0, dirt.x, dirt.y] = dirt.amount self._array[0, dirt.x, dirt.y] = dirt.amount
else: else:
self._array = np.zeros((1, *self._level_shape)) self._array = np.zeros((1, *self._level_shape))
@ -124,21 +124,21 @@ class DirtFactory(BaseFactory):
@property @property
def additional_actions(self) -> Union[Action, List[Action]]: def additional_actions(self) -> Union[Action, List[Action]]:
super_actions = super().additional_actions super_actions = super().additional_actions
if self.dirt_properties.agent_can_interact: if self.dirt_prop.agent_can_interact:
super_actions.append(Action(enum_ident=CLEAN_UP_ACTION)) super_actions.append(Action(enum_ident=CLEAN_UP_ACTION))
return super_actions return super_actions
@property @property
def additional_entities(self) -> Dict[(Enum, Entities)]: def additional_entities(self) -> Dict[(Enum, Entities)]:
super_entities = super().additional_entities super_entities = super().additional_entities
dirt_register = DirtRegister(self.dirt_properties, self._level_shape) dirt_register = DirtRegister(self.dirt_prop, self._level_shape)
super_entities.update(({c.DIRT: dirt_register})) super_entities.update(({c.DIRT: dirt_register}))
return super_entities return super_entities
def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), env_seed=time.time_ns(), **kwargs): def __init__(self, *args, dirt_prop: DirtProperties = DirtProperties(), env_seed=time.time_ns(), **kwargs):
if isinstance(dirt_properties, dict): if isinstance(dirt_prop, dict):
dirt_properties = DirtProperties(**dirt_properties) dirt_prop = DirtProperties(**dirt_prop)
self.dirt_properties = dirt_properties self.dirt_prop = dirt_prop
self._dirt_rng = np.random.default_rng(env_seed) self._dirt_rng = np.random.default_rng(env_seed)
self._dirt: DirtRegister self._dirt: DirtRegister
kwargs.update(env_seed=env_seed) kwargs.update(env_seed=env_seed)
@ -153,7 +153,7 @@ class DirtFactory(BaseFactory):
def clean_up(self, agent: Agent) -> c: def clean_up(self, agent: Agent) -> c:
if dirt := self[c.DIRT].by_pos(agent.pos): if dirt := self[c.DIRT].by_pos(agent.pos):
new_dirt_amount = dirt.amount - self.dirt_properties.clean_amount new_dirt_amount = dirt.amount - self.dirt_prop.clean_amount
if new_dirt_amount <= 0: if new_dirt_amount <= 0:
self[c.DIRT].delete_item(dirt) self[c.DIRT].delete_item(dirt)
@ -170,16 +170,16 @@ class DirtFactory(BaseFactory):
] ]
self._dirt_rng.shuffle(free_for_dirt) self._dirt_rng.shuffle(free_for_dirt)
if initial_spawn: if initial_spawn:
var = self.dirt_properties.initial_dirt_spawn_r_var var = self.dirt_prop.initial_dirt_spawn_r_var
new_spawn = self.dirt_properties.initial_dirt_ratio + dirt_rng.uniform(-var, var) new_spawn = self.dirt_prop.initial_dirt_ratio + dirt_rng.uniform(-var, var)
else: else:
new_spawn = dirt_rng.uniform(0, self.dirt_properties.max_spawn_ratio) new_spawn = dirt_rng.uniform(0, self.dirt_prop.max_spawn_ratio)
n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt))) n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt)))
self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles]) self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles])
def do_additional_step(self) -> dict: def do_additional_step(self) -> dict:
info_dict = super().do_additional_step() info_dict = super().do_additional_step()
if smear_amount := self.dirt_properties.dirt_smear_amount: if smear_amount := self.dirt_prop.dirt_smear_amount:
for agent in self[c.AGENT]: for agent in self[c.AGENT]:
if agent.temp_valid and agent.last_pos != c.NO_POS: if agent.temp_valid and agent.last_pos != c.NO_POS:
if self._actions.is_moving_action(agent.temp_action): if self._actions.is_moving_action(agent.temp_action):
@ -196,7 +196,7 @@ class DirtFactory(BaseFactory):
pass # No Dirt Spawn pass # No Dirt Spawn
elif not self._next_dirt_spawn: elif not self._next_dirt_spawn:
self.trigger_dirt_spawn() self.trigger_dirt_spawn()
self._next_dirt_spawn = self.dirt_properties.spawn_frequency self._next_dirt_spawn = self.dirt_prop.spawn_frequency
else: else:
self._next_dirt_spawn -= 1 self._next_dirt_spawn -= 1
return info_dict return info_dict
@ -205,7 +205,7 @@ class DirtFactory(BaseFactory):
valid = super().do_additional_actions(agent, action) valid = super().do_additional_actions(agent, action)
if valid is None: if valid is None:
if action == CLEAN_UP_ACTION: if action == CLEAN_UP_ACTION:
if self.dirt_properties.agent_can_interact: if self.dirt_prop.agent_can_interact:
valid = self.clean_up(agent) valid = self.clean_up(agent)
return valid return valid
else: else:
@ -218,11 +218,11 @@ class DirtFactory(BaseFactory):
def do_additional_reset(self) -> None: def do_additional_reset(self) -> None:
super().do_additional_reset() super().do_additional_reset()
self.trigger_dirt_spawn(initial_spawn=True) self.trigger_dirt_spawn(initial_spawn=True)
self._next_dirt_spawn = self.dirt_properties.spawn_frequency if self.dirt_properties.spawn_frequency else -1 self._next_dirt_spawn = self.dirt_prop.spawn_frequency if self.dirt_prop.spawn_frequency else -1
def check_additional_done(self): def check_additional_done(self):
super_done = super().check_additional_done() super_done = super().check_additional_done()
done = self.dirt_properties.done_when_clean and (len(self[c.DIRT]) == 0) done = self.dirt_prop.done_when_clean and (len(self[c.DIRT]) == 0)
return super_done or done return super_done or done
def calculate_additional_reward(self, agent: Agent) -> (int, dict): def calculate_additional_reward(self, agent: Agent) -> (int, dict):
@ -256,41 +256,40 @@ class DirtFactory(BaseFactory):
if __name__ == '__main__': if __name__ == '__main__':
from environments.utility_classes import AgentRenderOptions as ARO
render = True render = True
dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0.0) dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0)
obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True, pomdp_r=2, additional_agent_placeholder=None)
move_props = {'allow_square_movement': True, move_props = {'allow_square_movement': True,
'allow_diagonal_movement': False, 'allow_diagonal_movement': False,
'allow_no_op': False} #MovementProperties(True, True, False) 'allow_no_op': False}
with RecorderCallback(filepath=Path('debug_out') / f'recorder_xxxx.json', occupation_map=False, factory = DirtFactory(n_agents=3, done_at_collision=False,
trajectory_map=False) as recorder: level_name='rooms', max_steps=400,
obs_prop=obs_props, parse_doors=True,
record_episodes=True, verbose=True,
mv_prop=move_props, dirt_prop=dirt_props
)
factory = DirtFactory(n_agents=1, done_at_collision=False, frames_to_stack=0, # noinspection DuplicatedCode
level_name='rooms', max_steps=400, combin_agent_obs=True, n_actions = factory.action_space.n - 1
omit_agent_in_obs=True, parse_doors=True, pomdp_r=3, _ = factory.observation_space
record_episodes=True, verbose=True, cast_shadows=True,
movement_properties=move_props, dirt_properties=dirt_props
)
# noinspection DuplicatedCode for epoch in range(4):
n_actions = factory.action_space.n - 1 random_actions = [[random.randint(0, n_actions) for _
_ = factory.observation_space in range(factory.n_agents)] for _
in range(factory.max_steps+1)]
for epoch in range(4): env_state = factory.reset()
random_actions = [[random.randint(0, n_actions) for _ r = 0
in range(factory.n_agents)] for _ for agent_i_action in random_actions:
in range(factory.max_steps+1)] env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
env_state = factory.reset() r += step_r
r = 0 if render:
for agent_i_action in random_actions: factory.render()
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) if done_bool:
#recorder.read_info(0, info_obj) break
r += step_r print(f'Factory run {epoch} done, reward is:\n {r}')
if render: pass
factory.render()
if done_bool:
# recorder.read_done(0, done_bool)
break
print(f'Factory run {epoch} done, reward is:\n {r}')
pass

View File

@ -3,6 +3,7 @@ from collections import deque, UserList
from enum import Enum from enum import Enum
from typing import List, Union, NamedTuple, Dict from typing import List, Union, NamedTuple, Dict
import numpy as np import numpy as np
import random
from environments.factory.base.base_factory import BaseFactory from environments.factory.base.base_factory import BaseFactory
from environments.helpers import Constants as c from environments.helpers import Constants as c
@ -18,13 +19,6 @@ NO_ITEM = 0
ITEM_DROP_OFF = 1 ITEM_DROP_OFF = 1
def inventory_slice_name(agent_i):
if isinstance(agent_i, int):
return f'{c.INVENTORY.name}_{c.AGENT.value}#{agent_i}'
else:
return f'{c.INVENTORY.name}_{agent_i}'
class Item(MoveableEntity): class Item(MoveableEntity):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -77,7 +71,7 @@ class Inventory(UserList):
@property @property
def name(self): def name(self):
return self.agent.name return f'{self.__class__.__name__}({self.agent.name})'
def __init__(self, pomdp_r: int, level_shape: (int, int), agent: Agent, capacity: int): def __init__(self, pomdp_r: int, level_shape: (int, int), agent: Agent, capacity: int):
super(Inventory, self).__init__() super(Inventory, self).__init__()
@ -111,7 +105,8 @@ class Inventory(UserList):
def summarize_state(self, **kwargs): def summarize_state(self, **kwargs):
attr_dict = {key: str(val) for key, val in self.__dict__.items() if not key.startswith('_') and key != 'data'} attr_dict = {key: str(val) for key, val in self.__dict__.items() if not key.startswith('_') and key != 'data'}
attr_dict.update({val.name: val.summarize_state(**kwargs) for val in self}) attr_dict.update(dict(items={val.name: val.summarize_state(**kwargs) for val in self}))
attr_dict.update(dict(name=self.name))
return attr_dict return attr_dict
@ -149,6 +144,11 @@ class Inventories(ObjectRegister):
except StopIteration: except StopIteration:
return None return None
def summarize_states(self, n_steps=None):
# as dict with additional nesting
# return dict(items=super(Inventories, self).summarize_states())
return super(Inventories, self).summarize_states(n_steps=n_steps)
class DropOffLocation(Entity): class DropOffLocation(Entity):
@ -194,6 +194,9 @@ class DropOffLocations(EntityObjectRegister):
self._array[0, item.x, item.y] = item.encoding self._array[0, item.x, item.y] = item.encoding
return self._array return self._array
def __repr__(self):
super(DropOffLocations, self).__repr__()
class ItemProperties(NamedTuple): class ItemProperties(NamedTuple):
n_items: int = 5 # How many items are there at the same time n_items: int = 5 # How many items are there at the same time
@ -207,13 +210,13 @@ class ItemProperties(NamedTuple):
# noinspection PyAttributeOutsideInit, PyAbstractClass # noinspection PyAttributeOutsideInit, PyAbstractClass
class ItemFactory(BaseFactory): class ItemFactory(BaseFactory):
# noinspection PyMissingConstructor # noinspection PyMissingConstructor
def __init__(self, *args, item_properties: ItemProperties = ItemProperties(), env_seed=time.time_ns(), **kwargs): def __init__(self, *args, item_prop: ItemProperties = ItemProperties(), env_seed=time.time_ns(), **kwargs):
if isinstance(item_properties, dict): if isinstance(item_prop, dict):
item_properties = ItemProperties(**item_properties) item_prop = ItemProperties(**item_prop)
self.item_properties = item_properties self.item_prop = item_prop
kwargs.update(env_seed=env_seed) kwargs.update(env_seed=env_seed)
self._item_rng = np.random.default_rng(env_seed) self._item_rng = np.random.default_rng(env_seed)
assert (item_properties.n_items <= ((1 + kwargs.get('pomdp_r', 0) * 2) ** 2)) or not kwargs.get('pomdp_r', 0) assert (item_prop.n_items <= ((1 + kwargs.get('_pomdp_r', 0) * 2) ** 2)) or not kwargs.get('_pomdp_r', 0)
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@property @property
@ -228,16 +231,19 @@ class ItemFactory(BaseFactory):
# noinspection PyUnresolvedReferences # noinspection PyUnresolvedReferences
super_entities = super().additional_entities super_entities = super().additional_entities
empty_tiles = self[c.FLOOR].empty_tiles[:self.item_properties.n_drop_off_locations] empty_tiles = self[c.FLOOR].empty_tiles[:self.item_prop.n_drop_off_locations]
drop_offs = DropOffLocations.from_tiles(empty_tiles, self._level_shape, drop_offs = DropOffLocations.from_tiles(
storage_size_until_full=self.item_properties.max_dropoff_storage_size) empty_tiles, self._level_shape,
entity_kwargs=dict(
storage_size_until_full=self.item_prop.max_dropoff_storage_size)
)
item_register = ItemRegister(self._level_shape) item_register = ItemRegister(self._level_shape)
empty_tiles = self[c.FLOOR].empty_tiles[:self.item_properties.n_items] empty_tiles = self[c.FLOOR].empty_tiles[:self.item_prop.n_items]
item_register.spawn_items(empty_tiles) item_register.spawn_items(empty_tiles)
inventories = Inventories(self._level_shape if not self.pomdp_r else ((self.pomdp_diameter,) * 2)) inventories = Inventories(self._level_shape if not self._pomdp_r else ((self.pomdp_diameter,) * 2))
inventories.spawn_inventories(self[c.AGENT], self.pomdp_r, inventories.spawn_inventories(self[c.AGENT], self._pomdp_r,
self.item_properties.max_agent_inventory_capacity) self.item_prop.max_agent_inventory_capacity)
super_entities.update({c.DROP_OFF: drop_offs, c.ITEM: item_register, c.INVENTORY: inventories}) super_entities.update({c.DROP_OFF: drop_offs, c.ITEM: item_register, c.INVENTORY: inventories})
return super_entities return super_entities
@ -270,7 +276,7 @@ class ItemFactory(BaseFactory):
valid = super().do_additional_actions(agent, action) valid = super().do_additional_actions(agent, action)
if valid is None: if valid is None:
if action == h.EnvActions.ITEM_ACTION: if action == h.EnvActions.ITEM_ACTION:
if self.item_properties.agent_can_interact: if self.item_prop.agent_can_interact:
valid = self.do_item_action(agent) valid = self.do_item_action(agent)
return valid return valid
else: else:
@ -283,14 +289,14 @@ class ItemFactory(BaseFactory):
def do_additional_reset(self) -> None: def do_additional_reset(self) -> None:
# noinspection PyUnresolvedReferences # noinspection PyUnresolvedReferences
super().do_additional_reset() super().do_additional_reset()
self._next_item_spawn = self.item_properties.spawn_frequency self._next_item_spawn = self.item_prop.spawn_frequency
self.trigger_item_spawn() self.trigger_item_spawn()
def trigger_item_spawn(self): def trigger_item_spawn(self):
if item_to_spawns := max(0, (self.item_properties.n_items - len(self[c.ITEM]))): if item_to_spawns := max(0, (self.item_prop.n_items - len(self[c.ITEM]))):
empty_tiles = self[c.FLOOR].empty_tiles[:item_to_spawns] empty_tiles = self[c.FLOOR].empty_tiles[:item_to_spawns]
self[c.ITEM].spawn_items(empty_tiles) self[c.ITEM].spawn_items(empty_tiles)
self._next_item_spawn = self.item_properties.spawn_frequency self._next_item_spawn = self.item_prop.spawn_frequency
self.print(f'{item_to_spawns} new items have been spawned; next spawn in {self._next_item_spawn}') self.print(f'{item_to_spawns} new items have been spawned; next spawn in {self._next_item_spawn}')
else: else:
self.print('No Items are spawning, limit is reached.') self.print('No Items are spawning, limit is reached.')
@ -351,30 +357,41 @@ class ItemFactory(BaseFactory):
if __name__ == '__main__': if __name__ == '__main__':
import random from environments.utility_classes import AgentRenderOptions as ARO, ObservationProperties
render = True render = True
item_props = ItemProperties() item_probs = ItemProperties()
factory = ItemFactory(item_properties=item_props, n_agents=3, done_at_collision=False, frames_to_stack=0, obs_props = ObservationProperties(render_agents=ARO.LEVEL, omit_agent_self=True, pomdp_r=2)
level_name='rooms', max_steps=4000,
omit_agent_in_obs=True, parse_doors=True, pomdp_r=3, move_props = {'allow_square_movement': True,
record_episodes=False, verbose=False 'allow_diagonal_movement': False,
'allow_no_op': False}
factory = ItemFactory(n_agents=3, done_at_collision=False,
level_name='rooms', max_steps=400,
obs_prop=obs_props, parse_doors=True,
record_episodes=True, verbose=True,
mv_prop=move_props, item_prop=item_probs
) )
# noinspection DuplicatedCode # noinspection DuplicatedCode
n_actions = factory.action_space.n - 1 n_actions = factory.action_space.n - 1
_ = factory.observation_space _ = factory.observation_space
for epoch in range(100): for epoch in range(4):
random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)] random_actions = [[random.randint(0, n_actions) for _
in range(factory.n_agents)] for _
in range(factory.max_steps + 1)]
env_state = factory.reset() env_state = factory.reset()
rew = 0 r = 0
for agent_i_action in random_actions: for agent_i_action in random_actions:
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
rew += step_r r += step_r
if render: if render:
factory.render() factory.render()
if done_bool: if done_bool:
break break
print(f'Factory run {epoch} done, reward is:\n {rew}') print(f'Factory run {epoch} done, reward is:\n {r}')
pass

View File

@ -1,7 +1,24 @@
from typing import NamedTuple from enum import Enum
from typing import NamedTuple, Union
class AgentRenderOptions(object):
SEPERATE = 'each'
COMBINED = 'combined'
LEVEL = 'lvl'
NOT = 'not'
class MovementProperties(NamedTuple): class MovementProperties(NamedTuple):
allow_square_movement: bool = True allow_square_movement: bool = True
allow_diagonal_movement: bool = False allow_diagonal_movement: bool = False
allow_no_op: bool = False allow_no_op: bool = False
class ObservationProperties(NamedTuple):
render_agents: AgentRenderOptions = AgentRenderOptions.SEPERATE
omit_agent_self: bool = True
additional_agent_placeholder: Union[None, str, int] = None
cast_shadows = True
frames_to_stack: int = 0
pomdp_r: int = 0

View File

@ -56,7 +56,7 @@ if __name__ == '__main__':
for modeL_type in [A2C, PPO, DQN]: # ,RegDQN, QRDQN]: for modeL_type in [A2C, PPO, DQN]: # ,RegDQN, QRDQN]:
for seed in range(3): for seed in range(3):
env_kwargs = dict(n_agents=1, env_kwargs = dict(n_agents=1,
# item_properties=item_props, # item_prop=item_props,
dirt_properties=dirt_props, dirt_properties=dirt_props,
movement_properties=move_props, movement_properties=move_props,
pomdp_r=2, max_steps=1000, parse_doors=False, pomdp_r=2, max_steps=1000, parse_doors=False,

View File

@ -48,7 +48,7 @@ if __name__ == '__main__':
env_kwargs = yaml.load(f, Loader=yaml.FullLoader) env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
dirt_props = DirtProperties(clean_amount=3, gain_amount=0.2, max_global_amount=30, dirt_props = DirtProperties(clean_amount=3, gain_amount=0.2, max_global_amount=30,
max_local_amount=3, spawn_frequency=1, max_spawn_ratio=0.05) max_local_amount=3, spawn_frequency=1, max_spawn_ratio=0.05)
# env_kwargs.update(n_agents=1, dirt_properties=dirt_props) # env_kwargs.update(n_agents=1, dirt_prop=dirt_props)
env = DirtFactory(**env_kwargs) env = DirtFactory(**env_kwargs)
env = FrameStack(env, 4) env = FrameStack(env, 4)

View File

@ -5,6 +5,7 @@ import numpy as np
import yaml import yaml
from environments import helpers as h from environments import helpers as h
from environments.helpers import Constants as c
from environments.factory.factory_dirt import DirtFactory from environments.factory.factory_dirt import DirtFactory
from environments.factory.factory_dirt_item import DirtItemFactory from environments.factory.factory_dirt_item import DirtItemFactory
from environments.logging.recorder import RecorderCallback from environments.logging.recorder import RecorderCallback
@ -15,29 +16,30 @@ warnings.filterwarnings('ignore', category=UserWarning)
if __name__ == '__main__': if __name__ == '__main__':
model_name = 'DQN_1631187073' model_name = 'DQN_163519000'
run_id = 0 run_id = 0
seed = 69 seed = 69
out_path = Path('debug_out/DQN_1635176929/0_DQN_1635176929') n_agents = 2
out_path = Path('debug_out/DQN_163519000/1_DQN_163519000')
model_path = out_path model_path = out_path
with (out_path / f'env_params.json').open('r') as f: with (out_path / f'env_params.json').open('r') as f:
env_kwargs = yaml.load(f, Loader=yaml.FullLoader) env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
env_kwargs.update(additional_agent_placeholder=None, n_agents=4) env_kwargs.update(additional_agent_placeholder=None, n_agents=n_agents)
if gain_amount := env_kwargs.get('dirt_properties', {}).get('gain_amount', None): if gain_amount := env_kwargs.get('dirt_prop', {}).get('gain_amount', None):
env_kwargs['dirt_properties']['max_spawn_amount'] = gain_amount env_kwargs['dirt_prop']['max_spawn_amount'] = gain_amount
del env_kwargs['dirt_properties']['gain_amount'] del env_kwargs['dirt_prop']['gain_amount']
env_kwargs.update(record_episodes=True) env_kwargs.update(record_episodes=False)
this_model = out_path / 'model.zip' this_model = out_path / 'model.zip'
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name) model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name)
models = [model_cls.load(this_model) for _ in range(4)] models = [model_cls.load(this_model) for _ in range(n_agents)]
with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json') as recorder: with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json') as recorder:
# Init Env # Init Env
with DirtItemFactory(**env_kwargs) as env: with DirtFactory(**env_kwargs) as env:
obs_shape = env.observation_space.shape obs_shape = env.observation_space.shape
# Evaluation Loop for i in range(n Episodes) # Evaluation Loop for i in range(n Episodes)
for episode in range(5): for episode in range(5):
@ -46,11 +48,11 @@ if __name__ == '__main__':
while not done_bool: while not done_bool:
actions = [model.predict( actions = [model.predict(
np.stack([env_state[i][j] for i in range(env_state.shape[0])]), np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
deterministic=True)[0] for j, model in enumerate(models)] deterministic=False)[0] for j, model in enumerate(models)]
env_state, step_r, done_bool, info_obj = env.step(actions) env_state, step_r, done_bool, info_obj = env.step(actions)
recorder.read_info(0, info_obj) recorder.read_info(0, info_obj)
rew += step_r rew += step_r
# env.render() env.render()
if done_bool: if done_bool:
recorder.read_done(0, done_bool) recorder.read_done(0, done_bool)
break break

View File

@ -26,16 +26,12 @@ from environments.factory.factory_dirt import DirtProperties, DirtFactory
from environments.factory.factory_dirt_item import DirtItemFactory from environments.factory.factory_dirt_item import DirtItemFactory
from environments.factory.factory_item import ItemProperties, ItemFactory from environments.factory.factory_item import ItemProperties, ItemFactory
from environments.logging.monitor import MonitorCallback from environments.logging.monitor import MonitorCallback
from environments.utility_classes import MovementProperties from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
import pickle import pickle
from plotting.compare_runs import compare_seed_runs, compare_model_runs, compare_all_parameter_runs from plotting.compare_runs import compare_seed_runs, compare_model_runs, compare_all_parameter_runs
import pandas as pd import pandas as pd
import seaborn as sns import seaborn as sns
# Define a global studi save path
start_time = 163519000 # int(time.time())
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
""" """
In this studie, we want to explore the macro behaviour of multi agents which are trained on the same task, In this studie, we want to explore the macro behaviour of multi agents which are trained on the same task,
but never saw each other in training. but never saw each other in training.
@ -68,6 +64,10 @@ There are further distinctions to be made:
- We are out of distribution. - We are out of distribution.
""" """
n_agents = 4
ood_monitor_file = f'e_1_monitor_{n_agents}_agents.pick'
baseline_monitor_file = 'e_1_baseline_monitor.pick'
def policy_model_kwargs(): def policy_model_kwargs():
return dict(ent_coef=0.05) return dict(ent_coef=0.05)
@ -92,11 +92,96 @@ def encapsule_env_factory(env_fctry, env_kwrgs):
return _init return _init
def load_model_run_baseline(seed_path, env_to_run):
# retrieve model class
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in seed_path.parent.name)
# Load both agents
model = model_cls.load(seed_path / 'model.zip')
# Load old env kwargs
with next(seed_path.glob('*.json')).open('r') as f:
env_kwargs = simplejson.load(f)
# Monitor Init
with MonitorCallback(filepath=seed_path / baseline_monitor_file) as monitor:
# Init Env
with env_to_run(**env_kwargs) as env_factory:
# Evaluation Loop for i in range(n Episodes)
for episode in range(100):
env_state = env_factory.reset()
rew, done_bool = 0, False
while not done_bool:
action = model.predict(env_state, deterministic=True)[0]
env_state, step_r, done_bool, info_obj = env_factory.step(action)
monitor.read_info(0, info_obj)
rew += step_r
if done_bool:
monitor.read_done(0, done_bool)
break
print(f'Factory run {episode} done, reward is:\n {rew}')
# Eval monitor outputs are automatically stored by the monitor object
# del model, env_kwargs, env_factory
# import gc
# gc.collect()
def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict):
global model_cls
# retrieve model class
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in seed_path.parent.name)
# Load both agents
models = [model_cls.load(seed_path / 'model.zip') for _ in range(n_agents)]
# Load old env kwargs
with next(seed_path.glob('*.json')).open('r') as f:
env_kwargs = simplejson.load(f)
env_kwargs.update(
n_agents=n_agents,
**additional_kwargs_dict.get('post_training_kwargs', {}))
# Monitor Init
with MonitorCallback(filepath=seed_path / ood_monitor_file) as monitor:
# Init Env
with env_to_run(**env_kwargs) as env_factory:
# Evaluation Loop for i in range(n Episodes)
for episode in range(50):
env_state = env_factory.reset()
rew, done_bool = 0, False
while not done_bool:
try:
actions = [model.predict(
np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
deterministic=False)[0] for j, model in enumerate(models)]
except ValueError as e:
print(e)
print('Env_Kwargs are:\n')
print(env_kwargs)
print('Path is:\n')
print(seed_path)
exit()
env_state, step_r, done_bool, info_obj = env_factory.step(actions)
monitor.read_info(0, info_obj)
rew += step_r
if done_bool:
monitor.read_done(0, done_bool)
break
print(f'Factory run {episode} done, reward is:\n {rew}')
# Eval monitor outputs are automatically stored by the monitor object
del models, env_kwargs, env_factory
import gc
gc.collect()
if __name__ == '__main__': if __name__ == '__main__':
train_steps = 8e5 train_steps = 8e5
# Define a global studi save path
start_time = '900000' # int(time.time())
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
# Define Global Env Parameters # Define Global Env Parameters
# Define properties object parameters # Define properties object parameters
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,
omit_agent_self=True,
frames_to_stack=3,
pomdp_r=2
)
move_props = MovementProperties(allow_diagonal_movement=True, move_props = MovementProperties(allow_diagonal_movement=True,
allow_square_movement=True, allow_square_movement=True,
allow_no_op=False) allow_no_op=False)
@ -108,33 +193,67 @@ if __name__ == '__main__':
item_props = ItemProperties(n_items=10, agent_can_interact=True, item_props = ItemProperties(n_items=10, agent_can_interact=True,
spawn_frequency=30, n_drop_off_locations=2, spawn_frequency=30, n_drop_off_locations=2,
max_agent_inventory_capacity=15) max_agent_inventory_capacity=15)
factory_kwargs = dict(n_agents=1, factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True,
pomdp_r=2, max_steps=400, parse_doors=True, level_name='rooms', record_episodes=False, doors_have_area=False,
level_name='rooms', frames_to_stack=3, verbose=False,
omit_agent_in_obs=True, combin_agent_obs=True, record_episodes=False, mv_prop=move_props,
cast_shadows=True, doors_have_area=False, verbose=False, obs_prop=obs_props
movement_properties=move_props
) )
# Bundle both environments with global kwargs and parameters # Bundle both environments with global kwargs and parameters
env_map = {'dirt': (DirtFactory, dict(dirt_properties=dirt_props, **factory_kwargs)), env_map = {'dirt': (DirtFactory, dict(dirt_prop=dirt_props,
'item': (ItemFactory, dict(item_properties=item_props, **factory_kwargs)), **factory_kwargs)),
'itemdirt': (DirtItemFactory, dict(dirt_properties=dirt_props, item_properties=item_props, 'item': (ItemFactory, dict(item_prop=item_props,
**factory_kwargs)),
'itemdirt': (DirtItemFactory, dict(dirt_prop=dirt_props,
item_prop=item_props,
**factory_kwargs))} **factory_kwargs))}
env_names = list(env_map.keys()) env_names = list(env_map.keys())
# Define parameter versions according with #1,2[1,0,N],3 # Define parameter versions according with #1,2[1,0,N],3
observation_modes = { observation_modes = {
# Fill-value = 0 # Fill-value = 0
# DEACTIVATED 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)), # DEACTIVATED 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)),
# Fill-value = 1 # Fill-value = 1
# DEACTIVATED 'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)), # DEACTIVATED 'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)),
# Fill-value = N(0, 1) # Fill-value = N(0, 1)
'seperate_N': dict(additional_env_kwargs=dict(additional_agent_placeholder='N')), 'seperate_N': dict(
# Further Adjustments are done post-training post_training_kwargs=
'in_lvl_obs': dict(post_training_kwargs=dict(other_agent_obs='in_lvl')), dict(obs_prop=ObservationProperties(
render_agents=AgentRenderOptions.COMBINED,
additional_agent_placeholder=None,
omit_agent_self=True,
frames_to_stack=3,
pomdp_r=2)
),
additional_env_kwargs=
dict(obs_prop=ObservationProperties(
render_agents=AgentRenderOptions.NOT,
additional_agent_placeholder='N',
omit_agent_self=True,
frames_to_stack=3,
pomdp_r=2)
)
),
'in_lvl_obs': dict(
post_training_kwargs=
dict(obs_prop=ObservationProperties(
render_agents=AgentRenderOptions.LEVEL,
omit_agent_self=True,
frames_to_stack=3,
pomdp_r=2)
)
),
# No further adjustment needed # No further adjustment needed
'no_obs': {} 'no_obs': dict(
post_training_kwargs=
dict(obs_prop=ObservationProperties(
render_agents=AgentRenderOptions.NOT,
omit_agent_self=True,
frames_to_stack=3,
pomdp_r=2)
)
)
} }
# Train starts here ############################################################ # Train starts here ############################################################
@ -223,52 +342,27 @@ if __name__ == '__main__':
# Evaluation starts here ##################################################### # Evaluation starts here #####################################################
# First Iterate over every model and monitor "as trained" # First Iterate over every model and monitor "as trained"
baseline_monitor_file = 'e_1_baseline_monitor.pick'
if True: if True:
render = False
for observation_mode in observation_modes: for observation_mode in observation_modes:
obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode) obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode)
# For trained policy in study_root_path / identifier # For trained policy in study_root_path / identifier
for env_path in [x for x in obs_mode_path.iterdir() if x.is_dir()]: for env_path in [x for x in obs_mode_path.iterdir() if x.is_dir()]:
for policy_path in [x for x in env_path.iterdir() if x. is_dir()]: for policy_path in [x for x in env_path.iterdir() if x. is_dir()]:
# Iteration # Iteration
for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): paths = list(y for y in policy_path.iterdir() if y.is_dir() \
# retrieve model class and not (y / baseline_monitor_file).exists())
for model_cls in (val for key, val in h.MODEL_MAP.items() if key in policy_path.name): import multiprocessing as mp
# Load both agents import itertools as it
model = model_cls.load(seed_path / 'model.zip') pool = mp.Pool(mp.cpu_count())
# Load old env kwargs result = pool.starmap(load_model_run_baseline,
with next(seed_path.glob('*.json')).open('r') as f: it.product(paths,
env_kwargs = simplejson.load(f) (env_map[env_path.name][0],))
# Monitor Init )
with MonitorCallback(filepath=seed_path / baseline_monitor_file) as monitor:
# Init Env
with env_map[env_path.name][0](**env_kwargs) as env_factory:
# Evaluation Loop for i in range(n Episodes)
for episode in range(100):
env_state = env_factory.reset()
rew, done_bool = 0, False
while not done_bool:
action = model.predict(env_state, deterministic=True)[0]
env_state, step_r, done_bool, info_obj = env_factory.step(action)
monitor.read_info(0, info_obj)
rew += step_r
if render:
env_factory.render()
if done_bool:
monitor.read_done(0, done_bool)
break
print(f'Factory run {episode} done, reward is:\n {rew}')
# Eval monitor outputs are automatically stored by the monitor object
del model, env_kwargs, env_factory
import gc
gc.collect() # for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
# load_model_run_baseline(seed_path)
# Then iterate over every model and monitor "ood behavior" - "is it ood?" # Then iterate over every model and monitor "ood behavior" - "is it ood?"
n_agents = 4
ood_monitor_file = f'e_1_monitor_{n_agents}_agents.pick'
if True: if True:
for observation_mode in observation_modes: for observation_mode in observation_modes:
obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode) obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode)
@ -279,44 +373,18 @@ if __name__ == '__main__':
# First seed path version # First seed path version
# seed_path = next((y for y in policy_path.iterdir() if y.is_dir())) # seed_path = next((y for y in policy_path.iterdir() if y.is_dir()))
# Iteration # Iteration
for seed_path in (y for y in policy_path.iterdir() if y.is_dir()): import multiprocessing as mp
if (seed_path / ood_monitor_file).exists(): import itertools as it
continue pool = mp.Pool(mp.cpu_count())
# retrieve model class paths = list(y for y in policy_path.iterdir() if y.is_dir() \
for model_cls in (val for key, val in h.MODEL_MAP.items() if key in policy_path.name): and not (y / ood_monitor_file).exists())
# Load both agents result = pool.starmap(load_model_run_study,
models = [model_cls.load(seed_path / 'model.zip') for _ in range(n_agents)] it.product(paths,
# Load old env kwargs (env_map[env_path.name][0],),
with next(seed_path.glob('*.json')).open('r') as f: (observation_modes[observation_mode],))
env_kwargs = simplejson.load(f) )
env_kwargs.update( # for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
n_agents=n_agents, additional_agent_placeholder=None, # load_model_run_study(seed_path)
**observation_modes[observation_mode].get('post_training_env_kwargs', {}))
# Monitor Init
with MonitorCallback(filepath=seed_path / ood_monitor_file) as monitor:
# Init Env
with env_map[env_path.name][0](**env_kwargs) as env_factory:
# Evaluation Loop for i in range(n Episodes)
for episode in range(50):
env_state = env_factory.reset()
rew, done_bool = 0, False
while not done_bool:
actions = [model.predict(
np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
deterministic=False)[0] for j, model in enumerate(models)]
env_state, step_r, done_bool, info_obj = env_factory.step(actions)
monitor.read_info(0, info_obj)
rew += step_r
if done_bool:
monitor.read_done(0, done_bool)
break
print(f'Factory run {episode} done, reward is:\n {rew}')
# Eval monitor outputs are automatically stored by the monitor object
del models, env_kwargs, env_factory
import gc
gc.collect()
# Plotting # Plotting
if True: if True: