new observation properties for testing of technical limitations
This commit is contained in:
parent
b5c6105b7b
commit
d69cf75c15
@ -16,7 +16,8 @@ from environments.helpers import Constants as c, Constants
|
|||||||
from environments import helpers as h
|
from environments import helpers as h
|
||||||
from environments.factory.base.objects import Agent, Tile, Action
|
from environments.factory.base.objects import Agent, Tile, Action
|
||||||
from environments.factory.base.registers import Actions, Entities, Agents, Doors, FloorTiles, WallTiles, PlaceHolders
|
from environments.factory.base.registers import Actions, Entities, Agents, Doors, FloorTiles, WallTiles, PlaceHolders
|
||||||
from environments.utility_classes import MovementProperties
|
from environments.utility_classes import MovementProperties, ObservationProperties
|
||||||
|
from environments.utility_classes import AgentRenderOptions as a_obs
|
||||||
|
|
||||||
import simplejson
|
import simplejson
|
||||||
|
|
||||||
@ -33,7 +34,7 @@ class BaseFactory(gym.Env):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def observation_space(self):
|
def observation_space(self):
|
||||||
if r := self.pomdp_r:
|
if r := self._pomdp_r:
|
||||||
z = self._obs_cube.shape[0]
|
z = self._obs_cube.shape[0]
|
||||||
xy = r*2 + 1
|
xy = r*2 + 1
|
||||||
level_shape = (z, xy, xy)
|
level_shape = (z, xy, xy)
|
||||||
@ -44,24 +45,32 @@ class BaseFactory(gym.Env):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def pomdp_diameter(self):
|
def pomdp_diameter(self):
|
||||||
return self.pomdp_r * 2 + 1
|
return self._pomdp_r * 2 + 1
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def movement_actions(self):
|
def movement_actions(self):
|
||||||
return self._actions.movement_actions
|
return self._actions.movement_actions
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
return self if self.frames_to_stack == 0 else FrameStack(self, self.frames_to_stack)
|
return self if self.obs_prop.frames_to_stack == 0 else \
|
||||||
|
FrameStack(self, self.obs_prop.frames_to_stack)
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
self.close()
|
self.close()
|
||||||
|
|
||||||
def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2), pomdp_r: Union[None, int] = 0,
|
def __init__(self, level_name='simple', n_agents=1, max_steps=int(5e2),
|
||||||
movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
|
mv_prop: MovementProperties = MovementProperties(),
|
||||||
combin_agent_obs: bool = False, frames_to_stack=0, record_episodes=False,
|
obs_prop: ObservationProperties = ObservationProperties(),
|
||||||
omit_agent_in_obs=False, done_at_collision=False, cast_shadows=True, additional_agent_placeholder=None,
|
parse_doors=False, record_episodes=False, done_at_collision=False,
|
||||||
verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs):
|
verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs):
|
||||||
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
|
|
||||||
|
if isinstance(mv_prop, dict):
|
||||||
|
mv_prop = MovementProperties(**mv_prop)
|
||||||
|
if isinstance(obs_prop, dict):
|
||||||
|
obs_prop = ObservationProperties(**obs_prop)
|
||||||
|
|
||||||
|
assert obs_prop.frames_to_stack != 1 and \
|
||||||
|
obs_prop.frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
|
||||||
if kwargs:
|
if kwargs:
|
||||||
print(f'Following kwargs were passed, but ignored: {kwargs}')
|
print(f'Following kwargs were passed, but ignored: {kwargs}')
|
||||||
|
|
||||||
@ -69,24 +78,18 @@ class BaseFactory(gym.Env):
|
|||||||
self.env_seed = env_seed
|
self.env_seed = env_seed
|
||||||
self.seed(env_seed)
|
self.seed(env_seed)
|
||||||
self._base_rng = np.random.default_rng(self.env_seed)
|
self._base_rng = np.random.default_rng(self.env_seed)
|
||||||
if isinstance(movement_properties, dict):
|
self.mv_prop = mv_prop
|
||||||
movement_properties = MovementProperties(**movement_properties)
|
self.obs_prop = obs_prop
|
||||||
self.movement_properties = movement_properties
|
|
||||||
self.level_name = level_name
|
self.level_name = level_name
|
||||||
self._level_shape = None
|
self._level_shape = None
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.additional_agent_placeholder = additional_agent_placeholder
|
|
||||||
self._renderer = None # expensive - don't use it when not required !
|
self._renderer = None # expensive - don't use it when not required !
|
||||||
self._entities = Entities()
|
self._entities = Entities()
|
||||||
|
|
||||||
self.n_agents = n_agents
|
self.n_agents = n_agents
|
||||||
|
|
||||||
self.max_steps = max_steps
|
self.max_steps = max_steps
|
||||||
self.pomdp_r = pomdp_r
|
self._pomdp_r = self.obs_prop.pomdp_r
|
||||||
self.combin_agent_obs = combin_agent_obs
|
|
||||||
self.omit_agent_in_obs = omit_agent_in_obs
|
|
||||||
self.cast_shadows = cast_shadows
|
|
||||||
self.frames_to_stack = frames_to_stack
|
|
||||||
|
|
||||||
self.done_at_collision = done_at_collision
|
self.done_at_collision = done_at_collision
|
||||||
self.record_episodes = record_episodes
|
self.record_episodes = record_episodes
|
||||||
@ -130,24 +133,32 @@ class BaseFactory(gym.Env):
|
|||||||
parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
|
parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
|
||||||
if np.any(parsed_doors):
|
if np.any(parsed_doors):
|
||||||
door_tiles = [floor.by_pos(pos) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL.value)]
|
door_tiles = [floor.by_pos(pos) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL.value)]
|
||||||
doors = Doors.from_tiles(door_tiles, self._level_shape, context=floor)
|
doors = Doors.from_tiles(door_tiles, self._level_shape,
|
||||||
|
entity_kwargs=dict(context=floor)
|
||||||
|
)
|
||||||
entities.update({c.DOORS: doors})
|
entities.update({c.DOORS: doors})
|
||||||
|
|
||||||
# Actions
|
# Actions
|
||||||
self._actions = Actions(self.movement_properties, can_use_doors=self.parse_doors)
|
self._actions = Actions(self.mv_prop, can_use_doors=self.parse_doors)
|
||||||
if additional_actions := self.additional_actions:
|
if additional_actions := self.additional_actions:
|
||||||
self._actions.register_additional_items(additional_actions)
|
self._actions.register_additional_items(additional_actions)
|
||||||
|
|
||||||
# Agents
|
# Agents
|
||||||
agents = Agents.from_tiles(floor.empty_tiles[:self.n_agents], self._level_shape,
|
agents = Agents.from_tiles(floor.empty_tiles[:self.n_agents], self._level_shape,
|
||||||
individual_slices=not self.combin_agent_obs)
|
individual_slices=self.obs_prop.render_agents == a_obs.SEPERATE,
|
||||||
|
hide_from_obs_builder=self.obs_prop.render_agents == a_obs.LEVEL,
|
||||||
|
is_observable=self.obs_prop.render_agents != a_obs.NOT
|
||||||
|
)
|
||||||
entities.update({c.AGENT: agents})
|
entities.update({c.AGENT: agents})
|
||||||
|
|
||||||
if self.additional_agent_placeholder is not None:
|
if self.obs_prop.additional_agent_placeholder is not None:
|
||||||
|
# TODO: Make this accept Lists for multiple placeholders
|
||||||
|
|
||||||
# Empty Observations with either [0, 1, N(0, 1)]
|
# Empty Observations with either [0, 1, N(0, 1)]
|
||||||
placeholder = PlaceHolders.from_tiles([self._NO_POS_TILE], self._level_shape,
|
placeholder = PlaceHolders.from_tiles([self._NO_POS_TILE], self._level_shape,
|
||||||
fill_value=self.additional_agent_placeholder)
|
entity_kwargs=dict(
|
||||||
|
fill_value=self.obs_prop.additional_agent_placeholder)
|
||||||
|
)
|
||||||
|
|
||||||
entities.update({c.AGENT_PLACEHOLDER: placeholder})
|
entities.update({c.AGENT_PLACEHOLDER: placeholder})
|
||||||
|
|
||||||
@ -163,24 +174,11 @@ class BaseFactory(gym.Env):
|
|||||||
return self._entities
|
return self._entities
|
||||||
|
|
||||||
def _init_obs_cube(self):
|
def _init_obs_cube(self):
|
||||||
arrays = self._entities.observable_arrays
|
arrays = self._entities.obs_arrays
|
||||||
|
|
||||||
# FIXME: Move logic to Register
|
|
||||||
if self.omit_agent_in_obs and self.n_agents == 1:
|
|
||||||
del arrays[c.AGENT]
|
|
||||||
# This does not seem to be necesarry, because this case is allready handled by the Agent Register Class
|
|
||||||
# elif self.omit_agent_in_obs:
|
|
||||||
# arrays[c.AGENT] = np.delete(arrays[c.AGENT], 0, axis=0)
|
|
||||||
obs_cube_z = sum([a.shape[0] if not self[key].is_per_agent else 1 for key, a in arrays.items()])
|
obs_cube_z = sum([a.shape[0] if not self[key].is_per_agent else 1 for key, a in arrays.items()])
|
||||||
self._obs_cube = np.zeros((obs_cube_z, *self._level_shape), dtype=np.float32)
|
self._obs_cube = np.zeros((obs_cube_z, *self._level_shape), dtype=np.float32)
|
||||||
|
|
||||||
# Optionally Pad this obs cube for pomdp cases
|
|
||||||
if r := self.pomdp_r:
|
|
||||||
x, y = self._level_shape
|
|
||||||
# was c.SHADOW
|
|
||||||
self._padded_obs_cube = np.full((obs_cube_z, x + r*2, y + r*2), c.SHADOWED_CELL.value, dtype=np.float32)
|
|
||||||
self._padded_obs_cube[:, r:r+x, r:r+y] = self._obs_cube
|
|
||||||
|
|
||||||
def reset(self) -> (np.ndarray, int, bool, dict):
|
def reset(self) -> (np.ndarray, int, bool, dict):
|
||||||
_ = self._base_init_env()
|
_ = self._base_init_env()
|
||||||
self._init_obs_cube()
|
self._init_obs_cube()
|
||||||
@ -198,7 +196,6 @@ class BaseFactory(gym.Env):
|
|||||||
|
|
||||||
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
|
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
|
||||||
self._steps += 1
|
self._steps += 1
|
||||||
done = False
|
|
||||||
|
|
||||||
# Pre step Hook for later use
|
# Pre step Hook for later use
|
||||||
self.hook_pre_step()
|
self.hook_pre_step()
|
||||||
@ -285,17 +282,22 @@ class BaseFactory(gym.Env):
|
|||||||
def _build_per_agent_obs(self, agent: Agent, state_array_dict) -> np.ndarray:
|
def _build_per_agent_obs(self, agent: Agent, state_array_dict) -> np.ndarray:
|
||||||
agent_pos_is_omitted = False
|
agent_pos_is_omitted = False
|
||||||
agent_omit_idx = None
|
agent_omit_idx = None
|
||||||
if self.omit_agent_in_obs and self.n_agents == 1:
|
|
||||||
|
if self.obs_prop.omit_agent_self and self.n_agents == 1:
|
||||||
# There is only a single agent and we want to omit the agent obs, so just remove the array.
|
# There is only a single agent and we want to omit the agent obs, so just remove the array.
|
||||||
del state_array_dict[c.AGENT]
|
# del state_array_dict[c.AGENT]
|
||||||
elif self.omit_agent_in_obs and self.combin_agent_obs and self.n_agents > 1:
|
# Not Needed any more,
|
||||||
|
pass
|
||||||
|
elif self.obs_prop.omit_agent_self and self.obs_prop.render_agents in [a_obs.COMBINED, ] and self.n_agents > 1:
|
||||||
state_array_dict[c.AGENT][0, agent.x, agent.y] -= agent.encoding
|
state_array_dict[c.AGENT][0, agent.x, agent.y] -= agent.encoding
|
||||||
agent_pos_is_omitted = True
|
agent_pos_is_omitted = True
|
||||||
elif self.omit_agent_in_obs and not self.combin_agent_obs and self.n_agents > 1:
|
elif self.obs_prop.omit_agent_self and self.obs_prop.render_agents == a_obs.SEPERATE and self.n_agents > 1:
|
||||||
agent_omit_idx = next((i for i, a in enumerate(self[c.AGENT]) if a == agent))
|
agent_omit_idx = next((i for i, a in enumerate(self[c.AGENT]) if a == agent))
|
||||||
|
|
||||||
running_idx, shadowing_idxs, can_be_shadowed_idxs = 0, [], []
|
running_idx, shadowing_idxs, can_be_shadowed_idxs = 0, [], []
|
||||||
|
self._obs_cube[:] = 0
|
||||||
|
|
||||||
|
# FIXME: Refactor this! Make a globally build observation, then add individual per-agent-obs
|
||||||
for key, array in state_array_dict.items():
|
for key, array in state_array_dict.items():
|
||||||
# Flush state array object representation to obs cube
|
# Flush state array object representation to obs cube
|
||||||
if not self[key].hide_from_obs_builder:
|
if not self[key].hide_from_obs_builder:
|
||||||
@ -309,12 +311,15 @@ class BaseFactory(gym.Env):
|
|||||||
for array_idx in range(array.shape[0]):
|
for array_idx in range(array.shape[0]):
|
||||||
self._obs_cube[running_idx: running_idx+z] = array[[x for x in range(array.shape[0])
|
self._obs_cube[running_idx: running_idx+z] = array[[x for x in range(array.shape[0])
|
||||||
if x != agent_omit_idx]]
|
if x != agent_omit_idx]]
|
||||||
elif key == c.AGENT and self.omit_agent_in_obs and self.combin_agent_obs:
|
# Agent OBS are combined
|
||||||
|
elif key == c.AGENT and self.obs_prop.omit_agent_self \
|
||||||
|
and self.obs_prop.render_agents == a_obs.COMBINED:
|
||||||
z = 1
|
z = 1
|
||||||
self._obs_cube[running_idx: running_idx + z] = array
|
self._obs_cube[running_idx: running_idx + z] = array
|
||||||
|
# Each Agent is rendered on a seperate array slice
|
||||||
else:
|
else:
|
||||||
z = array.shape[0]
|
z = array.shape[0]
|
||||||
self._obs_cube[running_idx: running_idx+z] = array
|
self._obs_cube[running_idx: running_idx + z] = array
|
||||||
# Define which OBS SLices cast a Shadow
|
# Define which OBS SLices cast a Shadow
|
||||||
if self[key].is_blocking_light:
|
if self[key].is_blocking_light:
|
||||||
for i in range(z):
|
for i in range(z):
|
||||||
@ -328,19 +333,14 @@ class BaseFactory(gym.Env):
|
|||||||
if agent_pos_is_omitted:
|
if agent_pos_is_omitted:
|
||||||
state_array_dict[c.AGENT][0, agent.x, agent.y] += agent.encoding
|
state_array_dict[c.AGENT][0, agent.x, agent.y] += agent.encoding
|
||||||
|
|
||||||
if r := self.pomdp_r:
|
if self._pomdp_r:
|
||||||
self._padded_obs_cube[:] = c.SHADOWED_CELL.value # Was c.SHADOW
|
obs = self._do_pomdp_obs_cutout(agent, self._obs_cube)
|
||||||
# self._padded_obs_cube[0] = c.OCCUPIED_CELL.value
|
|
||||||
x, y = self._level_shape
|
|
||||||
self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube
|
|
||||||
global_x, global_y = map(sum, zip(agent.pos, (r, r)))
|
|
||||||
x0, x1 = max(0, global_x - self.pomdp_r), global_x + self.pomdp_r + 1
|
|
||||||
y0, y1 = max(0, global_y - self.pomdp_r), global_y + self.pomdp_r + 1
|
|
||||||
obs = self._padded_obs_cube[:, x0:x1, y0:y1]
|
|
||||||
else:
|
else:
|
||||||
obs = self._obs_cube
|
obs = self._obs_cube
|
||||||
|
|
||||||
if self.cast_shadows:
|
obs = obs.copy()
|
||||||
|
|
||||||
|
if self.obs_prop.cast_shadows:
|
||||||
obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx in shadowing_idxs]
|
obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx in shadowing_idxs]
|
||||||
door_shadowing = False
|
door_shadowing = False
|
||||||
if self.parse_doors:
|
if self.parse_doors:
|
||||||
@ -350,8 +350,8 @@ class BaseFactory(gym.Env):
|
|||||||
for group in door.connectivity_subgroups:
|
for group in door.connectivity_subgroups:
|
||||||
if agent.last_pos not in group:
|
if agent.last_pos not in group:
|
||||||
door_shadowing = True
|
door_shadowing = True
|
||||||
if self.pomdp_r:
|
if self._pomdp_r:
|
||||||
blocking = [tuple(np.subtract(x, agent.pos) + (self.pomdp_r, self.pomdp_r))
|
blocking = [tuple(np.subtract(x, agent.pos) + (self._pomdp_r, self._pomdp_r))
|
||||||
for x in group]
|
for x in group]
|
||||||
xs, ys = zip(*blocking)
|
xs, ys = zip(*blocking)
|
||||||
else:
|
else:
|
||||||
@ -361,8 +361,8 @@ class BaseFactory(gym.Env):
|
|||||||
obs_block_light[0][xs, ys] = False
|
obs_block_light[0][xs, ys] = False
|
||||||
|
|
||||||
light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int))
|
light_block_map = Map((np.prod(obs_block_light, axis=0) != True).astype(int))
|
||||||
if self.pomdp_r:
|
if self._pomdp_r:
|
||||||
light_block_map = light_block_map.do_fov(self.pomdp_r, self.pomdp_r, max(self._level_shape))
|
light_block_map = light_block_map.do_fov(self._pomdp_r, self._pomdp_r, max(self._level_shape))
|
||||||
else:
|
else:
|
||||||
light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
|
light_block_map = light_block_map.do_fov(*agent.pos, max(self._level_shape))
|
||||||
if door_shadowing:
|
if door_shadowing:
|
||||||
@ -374,6 +374,20 @@ class BaseFactory(gym.Env):
|
|||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Agents observe other agents as wall
|
||||||
|
if self.obs_prop.render_agents == a_obs.LEVEL and self.n_agents > 1:
|
||||||
|
other_agent_obs = self[c.AGENT].as_array()
|
||||||
|
if self.obs_prop.omit_agent_self:
|
||||||
|
other_agent_obs[:, agent.x, agent.y] -= agent.encoding
|
||||||
|
|
||||||
|
if self.obs_prop.pomdp_r:
|
||||||
|
oobs = self._do_pomdp_obs_cutout(agent, other_agent_obs)[0]
|
||||||
|
mask = (oobs != c.SHADOWED_CELL.value).astype(int)
|
||||||
|
obs[0] += oobs * mask
|
||||||
|
|
||||||
|
else:
|
||||||
|
obs[0] += other_agent_obs
|
||||||
|
|
||||||
# Additional Observation:
|
# Additional Observation:
|
||||||
for additional_obs in self.additional_obs_build():
|
for additional_obs in self.additional_obs_build():
|
||||||
obs[running_idx:running_idx+additional_obs.shape[0]] = additional_obs
|
obs[running_idx:running_idx+additional_obs.shape[0]] = additional_obs
|
||||||
@ -384,6 +398,37 @@ class BaseFactory(gym.Env):
|
|||||||
|
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
|
def _do_pomdp_obs_cutout(self, agent, obs_to_be_padded):
|
||||||
|
assert obs_to_be_padded.ndim == 3
|
||||||
|
r, d = self._pomdp_r, self.pomdp_diameter
|
||||||
|
x0, x1 = max(0, agent.x - r), min(agent.x + r + 1, self._level_shape[0])
|
||||||
|
y0, y1 = max(0, agent.y - r), min(agent.y + r + 1, self._level_shape[1])
|
||||||
|
# Other Agent Obs = oobs
|
||||||
|
oobs = obs_to_be_padded[:, x0:x1, y0:y1]
|
||||||
|
if oobs.shape[0:] != (d,) * 2:
|
||||||
|
if xd := oobs.shape[1] % d:
|
||||||
|
if agent.x > r:
|
||||||
|
x0_pad = 0
|
||||||
|
x1_pad = (d - xd)
|
||||||
|
else:
|
||||||
|
x0_pad = r - agent.x
|
||||||
|
x1_pad = 0
|
||||||
|
else:
|
||||||
|
x0_pad, x1_pad = 0, 0
|
||||||
|
|
||||||
|
if yd := oobs.shape[2] % d:
|
||||||
|
if agent.y > r:
|
||||||
|
y0_pad = 0
|
||||||
|
y1_pad = (d - yd)
|
||||||
|
else:
|
||||||
|
y0_pad = r - agent.y
|
||||||
|
y1_pad = 0
|
||||||
|
else:
|
||||||
|
y0_pad, y1_pad = 0, 0
|
||||||
|
|
||||||
|
oobs = np.pad(oobs, ((0, 0), (x0_pad, x1_pad), (y0_pad, y1_pad)), 'constant')
|
||||||
|
return oobs
|
||||||
|
|
||||||
def get_all_tiles_with_collisions(self) -> List[Tile]:
|
def get_all_tiles_with_collisions(self) -> List[Tile]:
|
||||||
tiles_with_collisions = list()
|
tiles_with_collisions = list()
|
||||||
for tile in self[c.FLOOR]:
|
for tile in self[c.FLOOR]:
|
||||||
@ -449,7 +494,7 @@ class BaseFactory(gym.Env):
|
|||||||
if self._actions.is_moving_action(agent.temp_action):
|
if self._actions.is_moving_action(agent.temp_action):
|
||||||
if agent.temp_valid:
|
if agent.temp_valid:
|
||||||
# info_dict.update(movement=1)
|
# info_dict.update(movement=1)
|
||||||
# reward += 0.00
|
reward -= 0.001
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
reward -= 0.01
|
reward -= 0.01
|
||||||
@ -501,7 +546,7 @@ class BaseFactory(gym.Env):
|
|||||||
def render(self, mode='human'):
|
def render(self, mode='human'):
|
||||||
if not self._renderer: # lazy init
|
if not self._renderer: # lazy init
|
||||||
height, width = self._obs_cube.shape[1:]
|
height, width = self._obs_cube.shape[1:]
|
||||||
self._renderer = Renderer(width, height, view_radius=self.pomdp_r, fps=5)
|
self._renderer = Renderer(width, height, view_radius=self._pomdp_r, fps=5)
|
||||||
|
|
||||||
walls = [RenderEntity('wall', wall.pos) for wall in self[c.WALLS]]
|
walls = [RenderEntity('wall', wall.pos) for wall in self[c.WALLS]]
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import numbers
|
||||||
import random
|
import random
|
||||||
from abc import ABC
|
from abc import ABC
|
||||||
from typing import List, Union, Dict
|
from typing import List, Union, Dict
|
||||||
@ -91,21 +92,18 @@ class EntityObjectRegister(ObjectRegister, ABC):
|
|||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_tiles(cls, tiles, *args, **kwargs):
|
def from_tiles(cls, tiles, *args, entity_kwargs=None, **kwargs):
|
||||||
# objects_name = cls._accepted_objects.__name__
|
# objects_name = cls._accepted_objects.__name__
|
||||||
register_obj = cls(*args, **kwargs)
|
register_obj = cls(*args, **kwargs)
|
||||||
try:
|
entities = [cls._accepted_objects(tile, str_ident=i, **entity_kwargs if entity_kwargs is not None else {})
|
||||||
del kwargs['individual_slices']
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
entities = [cls._accepted_objects(tile, str_ident=i, **kwargs)
|
|
||||||
for i, tile in enumerate(tiles)]
|
for i, tile in enumerate(tiles)]
|
||||||
register_obj.register_additional_items(entities)
|
register_obj.register_additional_items(entities)
|
||||||
return register_obj
|
return register_obj
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_argwhere_coordinates(cls, positions: [(int, int)], tiles, *args, **kwargs):
|
def from_argwhere_coordinates(cls, positions: [(int, int)], tiles, *args, entity_kwargs=None, **kwargs, ):
|
||||||
return cls.from_tiles([tiles.by_pos(position) for position in positions], *args, **kwargs)
|
return cls.from_tiles([tiles.by_pos(position) for position in positions], *args, entity_kwargs=entity_kwargs,
|
||||||
|
**kwargs)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def positions(self):
|
def positions(self):
|
||||||
@ -166,10 +164,15 @@ class PlaceHolders(MovingEntityObjectRegister):
|
|||||||
|
|
||||||
# noinspection DuplicatedCode
|
# noinspection DuplicatedCode
|
||||||
def as_array(self):
|
def as_array(self):
|
||||||
if isinstance(self.fill_value, int):
|
if isinstance(self.fill_value, numbers.Number):
|
||||||
self._array[:] = self.fill_value
|
self._array[:] = self.fill_value
|
||||||
elif self.fill_value == "normal":
|
elif isinstance(self.fill_value, str):
|
||||||
self._array = np.random.normal(size=self._array.shape)
|
if self.fill_value.lower() in ['normal', 'n']:
|
||||||
|
self._array = np.random.normal(size=self._array.shape)
|
||||||
|
else:
|
||||||
|
raise ValueError('Choose one of: ["normal", "N"]')
|
||||||
|
else:
|
||||||
|
raise TypeError('Objects of type "str" or "number" is required here.')
|
||||||
|
|
||||||
if self.individual_slices:
|
if self.individual_slices:
|
||||||
return self._array
|
return self._array
|
||||||
@ -183,10 +186,12 @@ class Entities(Register):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def observable_arrays(self):
|
def observable_arrays(self):
|
||||||
|
# FIXME: Find a better name
|
||||||
return {key: val.as_array() for key, val in self.items() if val.is_observable}
|
return {key: val.as_array() for key, val in self.items() if val.is_observable}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def obs_arrays(self):
|
def obs_arrays(self):
|
||||||
|
# FIXME: Find a better name
|
||||||
return {key: val.as_array() for key, val in self.items() if val.is_observable and not val.hide_from_obs_builder}
|
return {key: val.as_array() for key, val in self.items() if val.is_observable and not val.hide_from_obs_builder}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -208,6 +213,10 @@ class Entities(Register):
|
|||||||
def register_additional_items(self, others: Dict):
|
def register_additional_items(self, others: Dict):
|
||||||
return self.register_item(others)
|
return self.register_item(others)
|
||||||
|
|
||||||
|
def by_pos(self, pos: (int, int)):
|
||||||
|
found_entities = [y for y in (x.by_pos(pos) for x in self.values() if hasattr(x, 'by_pos')) if y is not None]
|
||||||
|
return found_entities
|
||||||
|
|
||||||
|
|
||||||
class WallTiles(EntityObjectRegister):
|
class WallTiles(EntityObjectRegister):
|
||||||
_accepted_objects = Wall
|
_accepted_objects = Wall
|
||||||
@ -289,6 +298,10 @@ class Agents(MovingEntityObjectRegister):
|
|||||||
|
|
||||||
_accepted_objects = Agent
|
_accepted_objects = Agent
|
||||||
|
|
||||||
|
def __init__(self, *args, hide_from_obs_builder=False, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.hide_from_obs_builder = hide_from_obs_builder
|
||||||
|
|
||||||
# noinspection DuplicatedCode
|
# noinspection DuplicatedCode
|
||||||
def as_array(self):
|
def as_array(self):
|
||||||
self._array[:] = c.FREE_CELL.value
|
self._array[:] = c.FREE_CELL.value
|
||||||
|
@ -14,7 +14,7 @@ from environments.factory.base.registers import Entities, MovingEntityObjectRegi
|
|||||||
|
|
||||||
from environments.factory.renderer import RenderEntity
|
from environments.factory.renderer import RenderEntity
|
||||||
from environments.logging.recorder import RecorderCallback
|
from environments.logging.recorder import RecorderCallback
|
||||||
|
from environments.utility_classes import ObservationProperties
|
||||||
|
|
||||||
CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP
|
CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP
|
||||||
|
|
||||||
@ -65,9 +65,9 @@ class DirtRegister(MovingEntityObjectRegister):
|
|||||||
def as_array(self):
|
def as_array(self):
|
||||||
if self._array is not None:
|
if self._array is not None:
|
||||||
self._array[:] = c.FREE_CELL.value
|
self._array[:] = c.FREE_CELL.value
|
||||||
for key, dirt in self.items():
|
for dirt in self.values():
|
||||||
if dirt.amount == 0:
|
if dirt.amount == 0:
|
||||||
self.delete_item(key)
|
self.delete_item(dirt)
|
||||||
self._array[0, dirt.x, dirt.y] = dirt.amount
|
self._array[0, dirt.x, dirt.y] = dirt.amount
|
||||||
else:
|
else:
|
||||||
self._array = np.zeros((1, *self._level_shape))
|
self._array = np.zeros((1, *self._level_shape))
|
||||||
@ -124,21 +124,21 @@ class DirtFactory(BaseFactory):
|
|||||||
@property
|
@property
|
||||||
def additional_actions(self) -> Union[Action, List[Action]]:
|
def additional_actions(self) -> Union[Action, List[Action]]:
|
||||||
super_actions = super().additional_actions
|
super_actions = super().additional_actions
|
||||||
if self.dirt_properties.agent_can_interact:
|
if self.dirt_prop.agent_can_interact:
|
||||||
super_actions.append(Action(enum_ident=CLEAN_UP_ACTION))
|
super_actions.append(Action(enum_ident=CLEAN_UP_ACTION))
|
||||||
return super_actions
|
return super_actions
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def additional_entities(self) -> Dict[(Enum, Entities)]:
|
def additional_entities(self) -> Dict[(Enum, Entities)]:
|
||||||
super_entities = super().additional_entities
|
super_entities = super().additional_entities
|
||||||
dirt_register = DirtRegister(self.dirt_properties, self._level_shape)
|
dirt_register = DirtRegister(self.dirt_prop, self._level_shape)
|
||||||
super_entities.update(({c.DIRT: dirt_register}))
|
super_entities.update(({c.DIRT: dirt_register}))
|
||||||
return super_entities
|
return super_entities
|
||||||
|
|
||||||
def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), env_seed=time.time_ns(), **kwargs):
|
def __init__(self, *args, dirt_prop: DirtProperties = DirtProperties(), env_seed=time.time_ns(), **kwargs):
|
||||||
if isinstance(dirt_properties, dict):
|
if isinstance(dirt_prop, dict):
|
||||||
dirt_properties = DirtProperties(**dirt_properties)
|
dirt_prop = DirtProperties(**dirt_prop)
|
||||||
self.dirt_properties = dirt_properties
|
self.dirt_prop = dirt_prop
|
||||||
self._dirt_rng = np.random.default_rng(env_seed)
|
self._dirt_rng = np.random.default_rng(env_seed)
|
||||||
self._dirt: DirtRegister
|
self._dirt: DirtRegister
|
||||||
kwargs.update(env_seed=env_seed)
|
kwargs.update(env_seed=env_seed)
|
||||||
@ -153,7 +153,7 @@ class DirtFactory(BaseFactory):
|
|||||||
|
|
||||||
def clean_up(self, agent: Agent) -> c:
|
def clean_up(self, agent: Agent) -> c:
|
||||||
if dirt := self[c.DIRT].by_pos(agent.pos):
|
if dirt := self[c.DIRT].by_pos(agent.pos):
|
||||||
new_dirt_amount = dirt.amount - self.dirt_properties.clean_amount
|
new_dirt_amount = dirt.amount - self.dirt_prop.clean_amount
|
||||||
|
|
||||||
if new_dirt_amount <= 0:
|
if new_dirt_amount <= 0:
|
||||||
self[c.DIRT].delete_item(dirt)
|
self[c.DIRT].delete_item(dirt)
|
||||||
@ -170,16 +170,16 @@ class DirtFactory(BaseFactory):
|
|||||||
]
|
]
|
||||||
self._dirt_rng.shuffle(free_for_dirt)
|
self._dirt_rng.shuffle(free_for_dirt)
|
||||||
if initial_spawn:
|
if initial_spawn:
|
||||||
var = self.dirt_properties.initial_dirt_spawn_r_var
|
var = self.dirt_prop.initial_dirt_spawn_r_var
|
||||||
new_spawn = self.dirt_properties.initial_dirt_ratio + dirt_rng.uniform(-var, var)
|
new_spawn = self.dirt_prop.initial_dirt_ratio + dirt_rng.uniform(-var, var)
|
||||||
else:
|
else:
|
||||||
new_spawn = dirt_rng.uniform(0, self.dirt_properties.max_spawn_ratio)
|
new_spawn = dirt_rng.uniform(0, self.dirt_prop.max_spawn_ratio)
|
||||||
n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt)))
|
n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt)))
|
||||||
self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles])
|
self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles])
|
||||||
|
|
||||||
def do_additional_step(self) -> dict:
|
def do_additional_step(self) -> dict:
|
||||||
info_dict = super().do_additional_step()
|
info_dict = super().do_additional_step()
|
||||||
if smear_amount := self.dirt_properties.dirt_smear_amount:
|
if smear_amount := self.dirt_prop.dirt_smear_amount:
|
||||||
for agent in self[c.AGENT]:
|
for agent in self[c.AGENT]:
|
||||||
if agent.temp_valid and agent.last_pos != c.NO_POS:
|
if agent.temp_valid and agent.last_pos != c.NO_POS:
|
||||||
if self._actions.is_moving_action(agent.temp_action):
|
if self._actions.is_moving_action(agent.temp_action):
|
||||||
@ -196,7 +196,7 @@ class DirtFactory(BaseFactory):
|
|||||||
pass # No Dirt Spawn
|
pass # No Dirt Spawn
|
||||||
elif not self._next_dirt_spawn:
|
elif not self._next_dirt_spawn:
|
||||||
self.trigger_dirt_spawn()
|
self.trigger_dirt_spawn()
|
||||||
self._next_dirt_spawn = self.dirt_properties.spawn_frequency
|
self._next_dirt_spawn = self.dirt_prop.spawn_frequency
|
||||||
else:
|
else:
|
||||||
self._next_dirt_spawn -= 1
|
self._next_dirt_spawn -= 1
|
||||||
return info_dict
|
return info_dict
|
||||||
@ -205,7 +205,7 @@ class DirtFactory(BaseFactory):
|
|||||||
valid = super().do_additional_actions(agent, action)
|
valid = super().do_additional_actions(agent, action)
|
||||||
if valid is None:
|
if valid is None:
|
||||||
if action == CLEAN_UP_ACTION:
|
if action == CLEAN_UP_ACTION:
|
||||||
if self.dirt_properties.agent_can_interact:
|
if self.dirt_prop.agent_can_interact:
|
||||||
valid = self.clean_up(agent)
|
valid = self.clean_up(agent)
|
||||||
return valid
|
return valid
|
||||||
else:
|
else:
|
||||||
@ -218,11 +218,11 @@ class DirtFactory(BaseFactory):
|
|||||||
def do_additional_reset(self) -> None:
|
def do_additional_reset(self) -> None:
|
||||||
super().do_additional_reset()
|
super().do_additional_reset()
|
||||||
self.trigger_dirt_spawn(initial_spawn=True)
|
self.trigger_dirt_spawn(initial_spawn=True)
|
||||||
self._next_dirt_spawn = self.dirt_properties.spawn_frequency if self.dirt_properties.spawn_frequency else -1
|
self._next_dirt_spawn = self.dirt_prop.spawn_frequency if self.dirt_prop.spawn_frequency else -1
|
||||||
|
|
||||||
def check_additional_done(self):
|
def check_additional_done(self):
|
||||||
super_done = super().check_additional_done()
|
super_done = super().check_additional_done()
|
||||||
done = self.dirt_properties.done_when_clean and (len(self[c.DIRT]) == 0)
|
done = self.dirt_prop.done_when_clean and (len(self[c.DIRT]) == 0)
|
||||||
return super_done or done
|
return super_done or done
|
||||||
|
|
||||||
def calculate_additional_reward(self, agent: Agent) -> (int, dict):
|
def calculate_additional_reward(self, agent: Agent) -> (int, dict):
|
||||||
@ -256,41 +256,40 @@ class DirtFactory(BaseFactory):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
from environments.utility_classes import AgentRenderOptions as ARO
|
||||||
render = True
|
render = True
|
||||||
|
|
||||||
dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0.0)
|
dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0)
|
||||||
|
|
||||||
|
obs_props = ObservationProperties(render_agents=ARO.COMBINED, omit_agent_self=True, pomdp_r=2, additional_agent_placeholder=None)
|
||||||
|
|
||||||
move_props = {'allow_square_movement': True,
|
move_props = {'allow_square_movement': True,
|
||||||
'allow_diagonal_movement': False,
|
'allow_diagonal_movement': False,
|
||||||
'allow_no_op': False} #MovementProperties(True, True, False)
|
'allow_no_op': False}
|
||||||
|
|
||||||
with RecorderCallback(filepath=Path('debug_out') / f'recorder_xxxx.json', occupation_map=False,
|
factory = DirtFactory(n_agents=3, done_at_collision=False,
|
||||||
trajectory_map=False) as recorder:
|
level_name='rooms', max_steps=400,
|
||||||
|
obs_prop=obs_props, parse_doors=True,
|
||||||
|
record_episodes=True, verbose=True,
|
||||||
|
mv_prop=move_props, dirt_prop=dirt_props
|
||||||
|
)
|
||||||
|
|
||||||
factory = DirtFactory(n_agents=1, done_at_collision=False, frames_to_stack=0,
|
# noinspection DuplicatedCode
|
||||||
level_name='rooms', max_steps=400, combin_agent_obs=True,
|
n_actions = factory.action_space.n - 1
|
||||||
omit_agent_in_obs=True, parse_doors=True, pomdp_r=3,
|
_ = factory.observation_space
|
||||||
record_episodes=True, verbose=True, cast_shadows=True,
|
|
||||||
movement_properties=move_props, dirt_properties=dirt_props
|
|
||||||
)
|
|
||||||
|
|
||||||
# noinspection DuplicatedCode
|
for epoch in range(4):
|
||||||
n_actions = factory.action_space.n - 1
|
random_actions = [[random.randint(0, n_actions) for _
|
||||||
_ = factory.observation_space
|
in range(factory.n_agents)] for _
|
||||||
|
in range(factory.max_steps+1)]
|
||||||
for epoch in range(4):
|
env_state = factory.reset()
|
||||||
random_actions = [[random.randint(0, n_actions) for _
|
r = 0
|
||||||
in range(factory.n_agents)] for _
|
for agent_i_action in random_actions:
|
||||||
in range(factory.max_steps+1)]
|
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
|
||||||
env_state = factory.reset()
|
r += step_r
|
||||||
r = 0
|
if render:
|
||||||
for agent_i_action in random_actions:
|
factory.render()
|
||||||
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
|
if done_bool:
|
||||||
#recorder.read_info(0, info_obj)
|
break
|
||||||
r += step_r
|
print(f'Factory run {epoch} done, reward is:\n {r}')
|
||||||
if render:
|
pass
|
||||||
factory.render()
|
|
||||||
if done_bool:
|
|
||||||
# recorder.read_done(0, done_bool)
|
|
||||||
break
|
|
||||||
print(f'Factory run {epoch} done, reward is:\n {r}')
|
|
||||||
pass
|
|
||||||
|
@ -3,6 +3,7 @@ from collections import deque, UserList
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import List, Union, NamedTuple, Dict
|
from typing import List, Union, NamedTuple, Dict
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import random
|
||||||
|
|
||||||
from environments.factory.base.base_factory import BaseFactory
|
from environments.factory.base.base_factory import BaseFactory
|
||||||
from environments.helpers import Constants as c
|
from environments.helpers import Constants as c
|
||||||
@ -18,13 +19,6 @@ NO_ITEM = 0
|
|||||||
ITEM_DROP_OFF = 1
|
ITEM_DROP_OFF = 1
|
||||||
|
|
||||||
|
|
||||||
def inventory_slice_name(agent_i):
|
|
||||||
if isinstance(agent_i, int):
|
|
||||||
return f'{c.INVENTORY.name}_{c.AGENT.value}#{agent_i}'
|
|
||||||
else:
|
|
||||||
return f'{c.INVENTORY.name}_{agent_i}'
|
|
||||||
|
|
||||||
|
|
||||||
class Item(MoveableEntity):
|
class Item(MoveableEntity):
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -77,7 +71,7 @@ class Inventory(UserList):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self):
|
def name(self):
|
||||||
return self.agent.name
|
return f'{self.__class__.__name__}({self.agent.name})'
|
||||||
|
|
||||||
def __init__(self, pomdp_r: int, level_shape: (int, int), agent: Agent, capacity: int):
|
def __init__(self, pomdp_r: int, level_shape: (int, int), agent: Agent, capacity: int):
|
||||||
super(Inventory, self).__init__()
|
super(Inventory, self).__init__()
|
||||||
@ -111,7 +105,8 @@ class Inventory(UserList):
|
|||||||
|
|
||||||
def summarize_state(self, **kwargs):
|
def summarize_state(self, **kwargs):
|
||||||
attr_dict = {key: str(val) for key, val in self.__dict__.items() if not key.startswith('_') and key != 'data'}
|
attr_dict = {key: str(val) for key, val in self.__dict__.items() if not key.startswith('_') and key != 'data'}
|
||||||
attr_dict.update({val.name: val.summarize_state(**kwargs) for val in self})
|
attr_dict.update(dict(items={val.name: val.summarize_state(**kwargs) for val in self}))
|
||||||
|
attr_dict.update(dict(name=self.name))
|
||||||
return attr_dict
|
return attr_dict
|
||||||
|
|
||||||
|
|
||||||
@ -149,6 +144,11 @@ class Inventories(ObjectRegister):
|
|||||||
except StopIteration:
|
except StopIteration:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def summarize_states(self, n_steps=None):
|
||||||
|
# as dict with additional nesting
|
||||||
|
# return dict(items=super(Inventories, self).summarize_states())
|
||||||
|
return super(Inventories, self).summarize_states(n_steps=n_steps)
|
||||||
|
|
||||||
|
|
||||||
class DropOffLocation(Entity):
|
class DropOffLocation(Entity):
|
||||||
|
|
||||||
@ -194,6 +194,9 @@ class DropOffLocations(EntityObjectRegister):
|
|||||||
self._array[0, item.x, item.y] = item.encoding
|
self._array[0, item.x, item.y] = item.encoding
|
||||||
return self._array
|
return self._array
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
super(DropOffLocations, self).__repr__()
|
||||||
|
|
||||||
|
|
||||||
class ItemProperties(NamedTuple):
|
class ItemProperties(NamedTuple):
|
||||||
n_items: int = 5 # How many items are there at the same time
|
n_items: int = 5 # How many items are there at the same time
|
||||||
@ -207,13 +210,13 @@ class ItemProperties(NamedTuple):
|
|||||||
# noinspection PyAttributeOutsideInit, PyAbstractClass
|
# noinspection PyAttributeOutsideInit, PyAbstractClass
|
||||||
class ItemFactory(BaseFactory):
|
class ItemFactory(BaseFactory):
|
||||||
# noinspection PyMissingConstructor
|
# noinspection PyMissingConstructor
|
||||||
def __init__(self, *args, item_properties: ItemProperties = ItemProperties(), env_seed=time.time_ns(), **kwargs):
|
def __init__(self, *args, item_prop: ItemProperties = ItemProperties(), env_seed=time.time_ns(), **kwargs):
|
||||||
if isinstance(item_properties, dict):
|
if isinstance(item_prop, dict):
|
||||||
item_properties = ItemProperties(**item_properties)
|
item_prop = ItemProperties(**item_prop)
|
||||||
self.item_properties = item_properties
|
self.item_prop = item_prop
|
||||||
kwargs.update(env_seed=env_seed)
|
kwargs.update(env_seed=env_seed)
|
||||||
self._item_rng = np.random.default_rng(env_seed)
|
self._item_rng = np.random.default_rng(env_seed)
|
||||||
assert (item_properties.n_items <= ((1 + kwargs.get('pomdp_r', 0) * 2) ** 2)) or not kwargs.get('pomdp_r', 0)
|
assert (item_prop.n_items <= ((1 + kwargs.get('_pomdp_r', 0) * 2) ** 2)) or not kwargs.get('_pomdp_r', 0)
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -228,16 +231,19 @@ class ItemFactory(BaseFactory):
|
|||||||
# noinspection PyUnresolvedReferences
|
# noinspection PyUnresolvedReferences
|
||||||
super_entities = super().additional_entities
|
super_entities = super().additional_entities
|
||||||
|
|
||||||
empty_tiles = self[c.FLOOR].empty_tiles[:self.item_properties.n_drop_off_locations]
|
empty_tiles = self[c.FLOOR].empty_tiles[:self.item_prop.n_drop_off_locations]
|
||||||
drop_offs = DropOffLocations.from_tiles(empty_tiles, self._level_shape,
|
drop_offs = DropOffLocations.from_tiles(
|
||||||
storage_size_until_full=self.item_properties.max_dropoff_storage_size)
|
empty_tiles, self._level_shape,
|
||||||
|
entity_kwargs=dict(
|
||||||
|
storage_size_until_full=self.item_prop.max_dropoff_storage_size)
|
||||||
|
)
|
||||||
item_register = ItemRegister(self._level_shape)
|
item_register = ItemRegister(self._level_shape)
|
||||||
empty_tiles = self[c.FLOOR].empty_tiles[:self.item_properties.n_items]
|
empty_tiles = self[c.FLOOR].empty_tiles[:self.item_prop.n_items]
|
||||||
item_register.spawn_items(empty_tiles)
|
item_register.spawn_items(empty_tiles)
|
||||||
|
|
||||||
inventories = Inventories(self._level_shape if not self.pomdp_r else ((self.pomdp_diameter,) * 2))
|
inventories = Inventories(self._level_shape if not self._pomdp_r else ((self.pomdp_diameter,) * 2))
|
||||||
inventories.spawn_inventories(self[c.AGENT], self.pomdp_r,
|
inventories.spawn_inventories(self[c.AGENT], self._pomdp_r,
|
||||||
self.item_properties.max_agent_inventory_capacity)
|
self.item_prop.max_agent_inventory_capacity)
|
||||||
|
|
||||||
super_entities.update({c.DROP_OFF: drop_offs, c.ITEM: item_register, c.INVENTORY: inventories})
|
super_entities.update({c.DROP_OFF: drop_offs, c.ITEM: item_register, c.INVENTORY: inventories})
|
||||||
return super_entities
|
return super_entities
|
||||||
@ -270,7 +276,7 @@ class ItemFactory(BaseFactory):
|
|||||||
valid = super().do_additional_actions(agent, action)
|
valid = super().do_additional_actions(agent, action)
|
||||||
if valid is None:
|
if valid is None:
|
||||||
if action == h.EnvActions.ITEM_ACTION:
|
if action == h.EnvActions.ITEM_ACTION:
|
||||||
if self.item_properties.agent_can_interact:
|
if self.item_prop.agent_can_interact:
|
||||||
valid = self.do_item_action(agent)
|
valid = self.do_item_action(agent)
|
||||||
return valid
|
return valid
|
||||||
else:
|
else:
|
||||||
@ -283,14 +289,14 @@ class ItemFactory(BaseFactory):
|
|||||||
def do_additional_reset(self) -> None:
|
def do_additional_reset(self) -> None:
|
||||||
# noinspection PyUnresolvedReferences
|
# noinspection PyUnresolvedReferences
|
||||||
super().do_additional_reset()
|
super().do_additional_reset()
|
||||||
self._next_item_spawn = self.item_properties.spawn_frequency
|
self._next_item_spawn = self.item_prop.spawn_frequency
|
||||||
self.trigger_item_spawn()
|
self.trigger_item_spawn()
|
||||||
|
|
||||||
def trigger_item_spawn(self):
|
def trigger_item_spawn(self):
|
||||||
if item_to_spawns := max(0, (self.item_properties.n_items - len(self[c.ITEM]))):
|
if item_to_spawns := max(0, (self.item_prop.n_items - len(self[c.ITEM]))):
|
||||||
empty_tiles = self[c.FLOOR].empty_tiles[:item_to_spawns]
|
empty_tiles = self[c.FLOOR].empty_tiles[:item_to_spawns]
|
||||||
self[c.ITEM].spawn_items(empty_tiles)
|
self[c.ITEM].spawn_items(empty_tiles)
|
||||||
self._next_item_spawn = self.item_properties.spawn_frequency
|
self._next_item_spawn = self.item_prop.spawn_frequency
|
||||||
self.print(f'{item_to_spawns} new items have been spawned; next spawn in {self._next_item_spawn}')
|
self.print(f'{item_to_spawns} new items have been spawned; next spawn in {self._next_item_spawn}')
|
||||||
else:
|
else:
|
||||||
self.print('No Items are spawning, limit is reached.')
|
self.print('No Items are spawning, limit is reached.')
|
||||||
@ -351,30 +357,41 @@ class ItemFactory(BaseFactory):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import random
|
from environments.utility_classes import AgentRenderOptions as ARO, ObservationProperties
|
||||||
|
|
||||||
render = True
|
render = True
|
||||||
|
|
||||||
item_props = ItemProperties()
|
item_probs = ItemProperties()
|
||||||
|
|
||||||
factory = ItemFactory(item_properties=item_props, n_agents=3, done_at_collision=False, frames_to_stack=0,
|
obs_props = ObservationProperties(render_agents=ARO.LEVEL, omit_agent_self=True, pomdp_r=2)
|
||||||
level_name='rooms', max_steps=4000,
|
|
||||||
omit_agent_in_obs=True, parse_doors=True, pomdp_r=3,
|
move_props = {'allow_square_movement': True,
|
||||||
record_episodes=False, verbose=False
|
'allow_diagonal_movement': False,
|
||||||
|
'allow_no_op': False}
|
||||||
|
|
||||||
|
factory = ItemFactory(n_agents=3, done_at_collision=False,
|
||||||
|
level_name='rooms', max_steps=400,
|
||||||
|
obs_prop=obs_props, parse_doors=True,
|
||||||
|
record_episodes=True, verbose=True,
|
||||||
|
mv_prop=move_props, item_prop=item_probs
|
||||||
)
|
)
|
||||||
|
|
||||||
# noinspection DuplicatedCode
|
# noinspection DuplicatedCode
|
||||||
n_actions = factory.action_space.n - 1
|
n_actions = factory.action_space.n - 1
|
||||||
_ = factory.observation_space
|
_ = factory.observation_space
|
||||||
|
|
||||||
for epoch in range(100):
|
for epoch in range(4):
|
||||||
random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)]
|
random_actions = [[random.randint(0, n_actions) for _
|
||||||
|
in range(factory.n_agents)] for _
|
||||||
|
in range(factory.max_steps + 1)]
|
||||||
env_state = factory.reset()
|
env_state = factory.reset()
|
||||||
rew = 0
|
r = 0
|
||||||
for agent_i_action in random_actions:
|
for agent_i_action in random_actions:
|
||||||
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
|
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
|
||||||
rew += step_r
|
r += step_r
|
||||||
if render:
|
if render:
|
||||||
factory.render()
|
factory.render()
|
||||||
if done_bool:
|
if done_bool:
|
||||||
break
|
break
|
||||||
print(f'Factory run {epoch} done, reward is:\n {rew}')
|
print(f'Factory run {epoch} done, reward is:\n {r}')
|
||||||
|
pass
|
||||||
|
@ -1,7 +1,24 @@
|
|||||||
from typing import NamedTuple
|
from enum import Enum
|
||||||
|
from typing import NamedTuple, Union
|
||||||
|
|
||||||
|
|
||||||
|
class AgentRenderOptions(object):
|
||||||
|
SEPERATE = 'each'
|
||||||
|
COMBINED = 'combined'
|
||||||
|
LEVEL = 'lvl'
|
||||||
|
NOT = 'not'
|
||||||
|
|
||||||
|
|
||||||
class MovementProperties(NamedTuple):
|
class MovementProperties(NamedTuple):
|
||||||
allow_square_movement: bool = True
|
allow_square_movement: bool = True
|
||||||
allow_diagonal_movement: bool = False
|
allow_diagonal_movement: bool = False
|
||||||
allow_no_op: bool = False
|
allow_no_op: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class ObservationProperties(NamedTuple):
|
||||||
|
render_agents: AgentRenderOptions = AgentRenderOptions.SEPERATE
|
||||||
|
omit_agent_self: bool = True
|
||||||
|
additional_agent_placeholder: Union[None, str, int] = None
|
||||||
|
cast_shadows = True
|
||||||
|
frames_to_stack: int = 0
|
||||||
|
pomdp_r: int = 0
|
||||||
|
2
main.py
2
main.py
@ -56,7 +56,7 @@ if __name__ == '__main__':
|
|||||||
for modeL_type in [A2C, PPO, DQN]: # ,RegDQN, QRDQN]:
|
for modeL_type in [A2C, PPO, DQN]: # ,RegDQN, QRDQN]:
|
||||||
for seed in range(3):
|
for seed in range(3):
|
||||||
env_kwargs = dict(n_agents=1,
|
env_kwargs = dict(n_agents=1,
|
||||||
# item_properties=item_props,
|
# item_prop=item_props,
|
||||||
dirt_properties=dirt_props,
|
dirt_properties=dirt_props,
|
||||||
movement_properties=move_props,
|
movement_properties=move_props,
|
||||||
pomdp_r=2, max_steps=1000, parse_doors=False,
|
pomdp_r=2, max_steps=1000, parse_doors=False,
|
||||||
|
@ -48,7 +48,7 @@ if __name__ == '__main__':
|
|||||||
env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
|
env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
|
||||||
dirt_props = DirtProperties(clean_amount=3, gain_amount=0.2, max_global_amount=30,
|
dirt_props = DirtProperties(clean_amount=3, gain_amount=0.2, max_global_amount=30,
|
||||||
max_local_amount=3, spawn_frequency=1, max_spawn_ratio=0.05)
|
max_local_amount=3, spawn_frequency=1, max_spawn_ratio=0.05)
|
||||||
# env_kwargs.update(n_agents=1, dirt_properties=dirt_props)
|
# env_kwargs.update(n_agents=1, dirt_prop=dirt_props)
|
||||||
env = DirtFactory(**env_kwargs)
|
env = DirtFactory(**env_kwargs)
|
||||||
|
|
||||||
env = FrameStack(env, 4)
|
env = FrameStack(env, 4)
|
||||||
|
@ -5,6 +5,7 @@ import numpy as np
|
|||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from environments import helpers as h
|
from environments import helpers as h
|
||||||
|
from environments.helpers import Constants as c
|
||||||
from environments.factory.factory_dirt import DirtFactory
|
from environments.factory.factory_dirt import DirtFactory
|
||||||
from environments.factory.factory_dirt_item import DirtItemFactory
|
from environments.factory.factory_dirt_item import DirtItemFactory
|
||||||
from environments.logging.recorder import RecorderCallback
|
from environments.logging.recorder import RecorderCallback
|
||||||
@ -15,29 +16,30 @@ warnings.filterwarnings('ignore', category=UserWarning)
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
model_name = 'DQN_1631187073'
|
model_name = 'DQN_163519000'
|
||||||
run_id = 0
|
run_id = 0
|
||||||
seed = 69
|
seed = 69
|
||||||
out_path = Path('debug_out/DQN_1635176929/0_DQN_1635176929')
|
n_agents = 2
|
||||||
|
out_path = Path('debug_out/DQN_163519000/1_DQN_163519000')
|
||||||
model_path = out_path
|
model_path = out_path
|
||||||
|
|
||||||
with (out_path / f'env_params.json').open('r') as f:
|
with (out_path / f'env_params.json').open('r') as f:
|
||||||
env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
|
env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
|
||||||
env_kwargs.update(additional_agent_placeholder=None, n_agents=4)
|
env_kwargs.update(additional_agent_placeholder=None, n_agents=n_agents)
|
||||||
if gain_amount := env_kwargs.get('dirt_properties', {}).get('gain_amount', None):
|
if gain_amount := env_kwargs.get('dirt_prop', {}).get('gain_amount', None):
|
||||||
env_kwargs['dirt_properties']['max_spawn_amount'] = gain_amount
|
env_kwargs['dirt_prop']['max_spawn_amount'] = gain_amount
|
||||||
del env_kwargs['dirt_properties']['gain_amount']
|
del env_kwargs['dirt_prop']['gain_amount']
|
||||||
|
|
||||||
env_kwargs.update(record_episodes=True)
|
env_kwargs.update(record_episodes=False)
|
||||||
|
|
||||||
this_model = out_path / 'model.zip'
|
this_model = out_path / 'model.zip'
|
||||||
|
|
||||||
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name)
|
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in model_name)
|
||||||
models = [model_cls.load(this_model) for _ in range(4)]
|
models = [model_cls.load(this_model) for _ in range(n_agents)]
|
||||||
|
|
||||||
with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json') as recorder:
|
with RecorderCallback(filepath=Path() / 'recorder_out_DQN.json') as recorder:
|
||||||
# Init Env
|
# Init Env
|
||||||
with DirtItemFactory(**env_kwargs) as env:
|
with DirtFactory(**env_kwargs) as env:
|
||||||
obs_shape = env.observation_space.shape
|
obs_shape = env.observation_space.shape
|
||||||
# Evaluation Loop for i in range(n Episodes)
|
# Evaluation Loop for i in range(n Episodes)
|
||||||
for episode in range(5):
|
for episode in range(5):
|
||||||
@ -46,11 +48,11 @@ if __name__ == '__main__':
|
|||||||
while not done_bool:
|
while not done_bool:
|
||||||
actions = [model.predict(
|
actions = [model.predict(
|
||||||
np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
|
np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
|
||||||
deterministic=True)[0] for j, model in enumerate(models)]
|
deterministic=False)[0] for j, model in enumerate(models)]
|
||||||
env_state, step_r, done_bool, info_obj = env.step(actions)
|
env_state, step_r, done_bool, info_obj = env.step(actions)
|
||||||
recorder.read_info(0, info_obj)
|
recorder.read_info(0, info_obj)
|
||||||
rew += step_r
|
rew += step_r
|
||||||
# env.render()
|
env.render()
|
||||||
if done_bool:
|
if done_bool:
|
||||||
recorder.read_done(0, done_bool)
|
recorder.read_done(0, done_bool)
|
||||||
break
|
break
|
||||||
|
254
studies/e_1.py
254
studies/e_1.py
@ -26,16 +26,12 @@ from environments.factory.factory_dirt import DirtProperties, DirtFactory
|
|||||||
from environments.factory.factory_dirt_item import DirtItemFactory
|
from environments.factory.factory_dirt_item import DirtItemFactory
|
||||||
from environments.factory.factory_item import ItemProperties, ItemFactory
|
from environments.factory.factory_item import ItemProperties, ItemFactory
|
||||||
from environments.logging.monitor import MonitorCallback
|
from environments.logging.monitor import MonitorCallback
|
||||||
from environments.utility_classes import MovementProperties
|
from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions
|
||||||
import pickle
|
import pickle
|
||||||
from plotting.compare_runs import compare_seed_runs, compare_model_runs, compare_all_parameter_runs
|
from plotting.compare_runs import compare_seed_runs, compare_model_runs, compare_all_parameter_runs
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
|
|
||||||
# Define a global studi save path
|
|
||||||
start_time = 163519000 # int(time.time())
|
|
||||||
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
In this studie, we want to explore the macro behaviour of multi agents which are trained on the same task,
|
In this studie, we want to explore the macro behaviour of multi agents which are trained on the same task,
|
||||||
but never saw each other in training.
|
but never saw each other in training.
|
||||||
@ -68,6 +64,10 @@ There are further distinctions to be made:
|
|||||||
- We are out of distribution.
|
- We are out of distribution.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
n_agents = 4
|
||||||
|
ood_monitor_file = f'e_1_monitor_{n_agents}_agents.pick'
|
||||||
|
baseline_monitor_file = 'e_1_baseline_monitor.pick'
|
||||||
|
|
||||||
|
|
||||||
def policy_model_kwargs():
|
def policy_model_kwargs():
|
||||||
return dict(ent_coef=0.05)
|
return dict(ent_coef=0.05)
|
||||||
@ -92,11 +92,96 @@ def encapsule_env_factory(env_fctry, env_kwrgs):
|
|||||||
return _init
|
return _init
|
||||||
|
|
||||||
|
|
||||||
|
def load_model_run_baseline(seed_path, env_to_run):
|
||||||
|
# retrieve model class
|
||||||
|
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in seed_path.parent.name)
|
||||||
|
# Load both agents
|
||||||
|
model = model_cls.load(seed_path / 'model.zip')
|
||||||
|
# Load old env kwargs
|
||||||
|
with next(seed_path.glob('*.json')).open('r') as f:
|
||||||
|
env_kwargs = simplejson.load(f)
|
||||||
|
# Monitor Init
|
||||||
|
with MonitorCallback(filepath=seed_path / baseline_monitor_file) as monitor:
|
||||||
|
# Init Env
|
||||||
|
with env_to_run(**env_kwargs) as env_factory:
|
||||||
|
# Evaluation Loop for i in range(n Episodes)
|
||||||
|
for episode in range(100):
|
||||||
|
env_state = env_factory.reset()
|
||||||
|
rew, done_bool = 0, False
|
||||||
|
while not done_bool:
|
||||||
|
action = model.predict(env_state, deterministic=True)[0]
|
||||||
|
env_state, step_r, done_bool, info_obj = env_factory.step(action)
|
||||||
|
monitor.read_info(0, info_obj)
|
||||||
|
rew += step_r
|
||||||
|
if done_bool:
|
||||||
|
monitor.read_done(0, done_bool)
|
||||||
|
break
|
||||||
|
print(f'Factory run {episode} done, reward is:\n {rew}')
|
||||||
|
# Eval monitor outputs are automatically stored by the monitor object
|
||||||
|
# del model, env_kwargs, env_factory
|
||||||
|
# import gc
|
||||||
|
# gc.collect()
|
||||||
|
|
||||||
|
|
||||||
|
def load_model_run_study(seed_path, env_to_run, additional_kwargs_dict):
|
||||||
|
global model_cls
|
||||||
|
# retrieve model class
|
||||||
|
model_cls = next(val for key, val in h.MODEL_MAP.items() if key in seed_path.parent.name)
|
||||||
|
# Load both agents
|
||||||
|
models = [model_cls.load(seed_path / 'model.zip') for _ in range(n_agents)]
|
||||||
|
# Load old env kwargs
|
||||||
|
with next(seed_path.glob('*.json')).open('r') as f:
|
||||||
|
env_kwargs = simplejson.load(f)
|
||||||
|
env_kwargs.update(
|
||||||
|
n_agents=n_agents,
|
||||||
|
**additional_kwargs_dict.get('post_training_kwargs', {}))
|
||||||
|
# Monitor Init
|
||||||
|
with MonitorCallback(filepath=seed_path / ood_monitor_file) as monitor:
|
||||||
|
# Init Env
|
||||||
|
with env_to_run(**env_kwargs) as env_factory:
|
||||||
|
# Evaluation Loop for i in range(n Episodes)
|
||||||
|
for episode in range(50):
|
||||||
|
env_state = env_factory.reset()
|
||||||
|
rew, done_bool = 0, False
|
||||||
|
while not done_bool:
|
||||||
|
try:
|
||||||
|
actions = [model.predict(
|
||||||
|
np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
|
||||||
|
deterministic=False)[0] for j, model in enumerate(models)]
|
||||||
|
except ValueError as e:
|
||||||
|
print(e)
|
||||||
|
print('Env_Kwargs are:\n')
|
||||||
|
print(env_kwargs)
|
||||||
|
print('Path is:\n')
|
||||||
|
print(seed_path)
|
||||||
|
exit()
|
||||||
|
env_state, step_r, done_bool, info_obj = env_factory.step(actions)
|
||||||
|
monitor.read_info(0, info_obj)
|
||||||
|
rew += step_r
|
||||||
|
if done_bool:
|
||||||
|
monitor.read_done(0, done_bool)
|
||||||
|
break
|
||||||
|
print(f'Factory run {episode} done, reward is:\n {rew}')
|
||||||
|
# Eval monitor outputs are automatically stored by the monitor object
|
||||||
|
del models, env_kwargs, env_factory
|
||||||
|
import gc
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
train_steps = 8e5
|
train_steps = 8e5
|
||||||
|
|
||||||
|
# Define a global studi save path
|
||||||
|
start_time = '900000' # int(time.time())
|
||||||
|
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}_{start_time}'
|
||||||
|
|
||||||
# Define Global Env Parameters
|
# Define Global Env Parameters
|
||||||
# Define properties object parameters
|
# Define properties object parameters
|
||||||
|
obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT,
|
||||||
|
omit_agent_self=True,
|
||||||
|
frames_to_stack=3,
|
||||||
|
pomdp_r=2
|
||||||
|
)
|
||||||
move_props = MovementProperties(allow_diagonal_movement=True,
|
move_props = MovementProperties(allow_diagonal_movement=True,
|
||||||
allow_square_movement=True,
|
allow_square_movement=True,
|
||||||
allow_no_op=False)
|
allow_no_op=False)
|
||||||
@ -108,33 +193,67 @@ if __name__ == '__main__':
|
|||||||
item_props = ItemProperties(n_items=10, agent_can_interact=True,
|
item_props = ItemProperties(n_items=10, agent_can_interact=True,
|
||||||
spawn_frequency=30, n_drop_off_locations=2,
|
spawn_frequency=30, n_drop_off_locations=2,
|
||||||
max_agent_inventory_capacity=15)
|
max_agent_inventory_capacity=15)
|
||||||
factory_kwargs = dict(n_agents=1,
|
factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True,
|
||||||
pomdp_r=2, max_steps=400, parse_doors=True,
|
level_name='rooms', record_episodes=False, doors_have_area=False,
|
||||||
level_name='rooms', frames_to_stack=3,
|
verbose=False,
|
||||||
omit_agent_in_obs=True, combin_agent_obs=True, record_episodes=False,
|
mv_prop=move_props,
|
||||||
cast_shadows=True, doors_have_area=False, verbose=False,
|
obs_prop=obs_props
|
||||||
movement_properties=move_props
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Bundle both environments with global kwargs and parameters
|
# Bundle both environments with global kwargs and parameters
|
||||||
env_map = {'dirt': (DirtFactory, dict(dirt_properties=dirt_props, **factory_kwargs)),
|
env_map = {'dirt': (DirtFactory, dict(dirt_prop=dirt_props,
|
||||||
'item': (ItemFactory, dict(item_properties=item_props, **factory_kwargs)),
|
**factory_kwargs)),
|
||||||
'itemdirt': (DirtItemFactory, dict(dirt_properties=dirt_props, item_properties=item_props,
|
'item': (ItemFactory, dict(item_prop=item_props,
|
||||||
|
**factory_kwargs)),
|
||||||
|
'itemdirt': (DirtItemFactory, dict(dirt_prop=dirt_props,
|
||||||
|
item_prop=item_props,
|
||||||
**factory_kwargs))}
|
**factory_kwargs))}
|
||||||
env_names = list(env_map.keys())
|
env_names = list(env_map.keys())
|
||||||
|
|
||||||
# Define parameter versions according with #1,2[1,0,N],3
|
# Define parameter versions according with #1,2[1,0,N],3
|
||||||
observation_modes = {
|
observation_modes = {
|
||||||
# Fill-value = 0
|
# Fill-value = 0
|
||||||
# DEACTIVATED 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)),
|
# DEACTIVATED 'seperate_0': dict(additional_env_kwargs=dict(additional_agent_placeholder=0)),
|
||||||
# Fill-value = 1
|
# Fill-value = 1
|
||||||
# DEACTIVATED 'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)),
|
# DEACTIVATED 'seperate_1': dict(additional_env_kwargs=dict(additional_agent_placeholder=1)),
|
||||||
# Fill-value = N(0, 1)
|
# Fill-value = N(0, 1)
|
||||||
'seperate_N': dict(additional_env_kwargs=dict(additional_agent_placeholder='N')),
|
'seperate_N': dict(
|
||||||
# Further Adjustments are done post-training
|
post_training_kwargs=
|
||||||
'in_lvl_obs': dict(post_training_kwargs=dict(other_agent_obs='in_lvl')),
|
dict(obs_prop=ObservationProperties(
|
||||||
|
render_agents=AgentRenderOptions.COMBINED,
|
||||||
|
additional_agent_placeholder=None,
|
||||||
|
omit_agent_self=True,
|
||||||
|
frames_to_stack=3,
|
||||||
|
pomdp_r=2)
|
||||||
|
),
|
||||||
|
additional_env_kwargs=
|
||||||
|
dict(obs_prop=ObservationProperties(
|
||||||
|
render_agents=AgentRenderOptions.NOT,
|
||||||
|
additional_agent_placeholder='N',
|
||||||
|
omit_agent_self=True,
|
||||||
|
frames_to_stack=3,
|
||||||
|
pomdp_r=2)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
'in_lvl_obs': dict(
|
||||||
|
post_training_kwargs=
|
||||||
|
dict(obs_prop=ObservationProperties(
|
||||||
|
render_agents=AgentRenderOptions.LEVEL,
|
||||||
|
omit_agent_self=True,
|
||||||
|
frames_to_stack=3,
|
||||||
|
pomdp_r=2)
|
||||||
|
)
|
||||||
|
),
|
||||||
# No further adjustment needed
|
# No further adjustment needed
|
||||||
'no_obs': {}
|
'no_obs': dict(
|
||||||
|
post_training_kwargs=
|
||||||
|
dict(obs_prop=ObservationProperties(
|
||||||
|
render_agents=AgentRenderOptions.NOT,
|
||||||
|
omit_agent_self=True,
|
||||||
|
frames_to_stack=3,
|
||||||
|
pomdp_r=2)
|
||||||
|
)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Train starts here ############################################################
|
# Train starts here ############################################################
|
||||||
@ -223,52 +342,27 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
# Evaluation starts here #####################################################
|
# Evaluation starts here #####################################################
|
||||||
# First Iterate over every model and monitor "as trained"
|
# First Iterate over every model and monitor "as trained"
|
||||||
baseline_monitor_file = 'e_1_baseline_monitor.pick'
|
|
||||||
if True:
|
if True:
|
||||||
render = False
|
|
||||||
for observation_mode in observation_modes:
|
for observation_mode in observation_modes:
|
||||||
obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode)
|
obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode)
|
||||||
# For trained policy in study_root_path / identifier
|
# For trained policy in study_root_path / identifier
|
||||||
for env_path in [x for x in obs_mode_path.iterdir() if x.is_dir()]:
|
for env_path in [x for x in obs_mode_path.iterdir() if x.is_dir()]:
|
||||||
for policy_path in [x for x in env_path.iterdir() if x. is_dir()]:
|
for policy_path in [x for x in env_path.iterdir() if x. is_dir()]:
|
||||||
# Iteration
|
# Iteration
|
||||||
for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
|
paths = list(y for y in policy_path.iterdir() if y.is_dir() \
|
||||||
# retrieve model class
|
and not (y / baseline_monitor_file).exists())
|
||||||
for model_cls in (val for key, val in h.MODEL_MAP.items() if key in policy_path.name):
|
import multiprocessing as mp
|
||||||
# Load both agents
|
import itertools as it
|
||||||
model = model_cls.load(seed_path / 'model.zip')
|
pool = mp.Pool(mp.cpu_count())
|
||||||
# Load old env kwargs
|
result = pool.starmap(load_model_run_baseline,
|
||||||
with next(seed_path.glob('*.json')).open('r') as f:
|
it.product(paths,
|
||||||
env_kwargs = simplejson.load(f)
|
(env_map[env_path.name][0],))
|
||||||
# Monitor Init
|
)
|
||||||
with MonitorCallback(filepath=seed_path / baseline_monitor_file) as monitor:
|
|
||||||
# Init Env
|
|
||||||
with env_map[env_path.name][0](**env_kwargs) as env_factory:
|
|
||||||
# Evaluation Loop for i in range(n Episodes)
|
|
||||||
for episode in range(100):
|
|
||||||
env_state = env_factory.reset()
|
|
||||||
rew, done_bool = 0, False
|
|
||||||
while not done_bool:
|
|
||||||
action = model.predict(env_state, deterministic=True)[0]
|
|
||||||
env_state, step_r, done_bool, info_obj = env_factory.step(action)
|
|
||||||
monitor.read_info(0, info_obj)
|
|
||||||
rew += step_r
|
|
||||||
if render:
|
|
||||||
env_factory.render()
|
|
||||||
if done_bool:
|
|
||||||
monitor.read_done(0, done_bool)
|
|
||||||
break
|
|
||||||
print(f'Factory run {episode} done, reward is:\n {rew}')
|
|
||||||
# Eval monitor outputs are automatically stored by the monitor object
|
|
||||||
del model, env_kwargs, env_factory
|
|
||||||
import gc
|
|
||||||
|
|
||||||
gc.collect()
|
# for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
|
||||||
|
# load_model_run_baseline(seed_path)
|
||||||
|
|
||||||
# Then iterate over every model and monitor "ood behavior" - "is it ood?"
|
# Then iterate over every model and monitor "ood behavior" - "is it ood?"
|
||||||
n_agents = 4
|
|
||||||
ood_monitor_file = f'e_1_monitor_{n_agents}_agents.pick'
|
|
||||||
|
|
||||||
if True:
|
if True:
|
||||||
for observation_mode in observation_modes:
|
for observation_mode in observation_modes:
|
||||||
obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode)
|
obs_mode_path = next(x for x in study_root_path.iterdir() if x.is_dir() and x.name == observation_mode)
|
||||||
@ -279,44 +373,18 @@ if __name__ == '__main__':
|
|||||||
# First seed path version
|
# First seed path version
|
||||||
# seed_path = next((y for y in policy_path.iterdir() if y.is_dir()))
|
# seed_path = next((y for y in policy_path.iterdir() if y.is_dir()))
|
||||||
# Iteration
|
# Iteration
|
||||||
for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
|
import multiprocessing as mp
|
||||||
if (seed_path / ood_monitor_file).exists():
|
import itertools as it
|
||||||
continue
|
pool = mp.Pool(mp.cpu_count())
|
||||||
# retrieve model class
|
paths = list(y for y in policy_path.iterdir() if y.is_dir() \
|
||||||
for model_cls in (val for key, val in h.MODEL_MAP.items() if key in policy_path.name):
|
and not (y / ood_monitor_file).exists())
|
||||||
# Load both agents
|
result = pool.starmap(load_model_run_study,
|
||||||
models = [model_cls.load(seed_path / 'model.zip') for _ in range(n_agents)]
|
it.product(paths,
|
||||||
# Load old env kwargs
|
(env_map[env_path.name][0],),
|
||||||
with next(seed_path.glob('*.json')).open('r') as f:
|
(observation_modes[observation_mode],))
|
||||||
env_kwargs = simplejson.load(f)
|
)
|
||||||
env_kwargs.update(
|
# for seed_path in (y for y in policy_path.iterdir() if y.is_dir()):
|
||||||
n_agents=n_agents, additional_agent_placeholder=None,
|
# load_model_run_study(seed_path)
|
||||||
**observation_modes[observation_mode].get('post_training_env_kwargs', {}))
|
|
||||||
|
|
||||||
# Monitor Init
|
|
||||||
with MonitorCallback(filepath=seed_path / ood_monitor_file) as monitor:
|
|
||||||
# Init Env
|
|
||||||
with env_map[env_path.name][0](**env_kwargs) as env_factory:
|
|
||||||
# Evaluation Loop for i in range(n Episodes)
|
|
||||||
for episode in range(50):
|
|
||||||
env_state = env_factory.reset()
|
|
||||||
rew, done_bool = 0, False
|
|
||||||
while not done_bool:
|
|
||||||
actions = [model.predict(
|
|
||||||
np.stack([env_state[i][j] for i in range(env_state.shape[0])]),
|
|
||||||
deterministic=False)[0] for j, model in enumerate(models)]
|
|
||||||
env_state, step_r, done_bool, info_obj = env_factory.step(actions)
|
|
||||||
monitor.read_info(0, info_obj)
|
|
||||||
rew += step_r
|
|
||||||
if done_bool:
|
|
||||||
monitor.read_done(0, done_bool)
|
|
||||||
break
|
|
||||||
print(f'Factory run {episode} done, reward is:\n {rew}')
|
|
||||||
# Eval monitor outputs are automatically stored by the monitor object
|
|
||||||
del models, env_kwargs, env_factory
|
|
||||||
import gc
|
|
||||||
|
|
||||||
gc.collect()
|
|
||||||
|
|
||||||
# Plotting
|
# Plotting
|
||||||
if True:
|
if True:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user