Multithreaded Operation
Debugging Doors, when no Doors are present Smaller Bugfixes
This commit is contained in:
BIN
environments/factory/assets/agents/move.png
Normal file
BIN
environments/factory/assets/agents/move.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.8 KiB |
Binary file not shown.
Before Width: | Height: | Size: 1.5 KiB After Width: | Height: | Size: 5.6 KiB |
@ -60,9 +60,12 @@ class BaseFactory(gym.Env):
|
|||||||
omit_agent_in_obs=False, done_at_collision=False, cast_shadows=True,
|
omit_agent_in_obs=False, done_at_collision=False, cast_shadows=True,
|
||||||
verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs):
|
verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs):
|
||||||
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
|
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
|
||||||
|
if kwargs:
|
||||||
|
print(f'Following kwargs were passed, but ignored: {kwargs}')
|
||||||
|
|
||||||
# Attribute Assignment
|
# Attribute Assignment
|
||||||
self.env_seed = env_seed
|
self.env_seed = env_seed
|
||||||
|
self.seed(env_seed)
|
||||||
self._base_rng = np.random.default_rng(self.env_seed)
|
self._base_rng = np.random.default_rng(self.env_seed)
|
||||||
self.movement_properties = movement_properties
|
self.movement_properties = movement_properties
|
||||||
self.level_name = level_name
|
self.level_name = level_name
|
||||||
@ -85,11 +88,6 @@ class BaseFactory(gym.Env):
|
|||||||
self.parse_doors = parse_doors
|
self.parse_doors = parse_doors
|
||||||
self.doors_have_area = doors_have_area
|
self.doors_have_area = doors_have_area
|
||||||
|
|
||||||
# Actions
|
|
||||||
self._actions = Actions(self.movement_properties, can_use_doors=self.parse_doors)
|
|
||||||
if additional_actions := self.additional_actions:
|
|
||||||
self._actions.register_additional_items(additional_actions)
|
|
||||||
|
|
||||||
# Reset
|
# Reset
|
||||||
self.reset()
|
self.reset()
|
||||||
|
|
||||||
@ -123,12 +121,18 @@ class BaseFactory(gym.Env):
|
|||||||
self.NO_POS_TILE = Tile(c.NO_POS.value)
|
self.NO_POS_TILE = Tile(c.NO_POS.value)
|
||||||
|
|
||||||
# Doors
|
# Doors
|
||||||
|
if self.parse_doors:
|
||||||
parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
|
parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
|
||||||
if np.any(parsed_doors):
|
if np.any(parsed_doors):
|
||||||
door_tiles = [floor.by_pos(pos) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL.value)]
|
door_tiles = [floor.by_pos(pos) for pos in np.argwhere(parsed_doors == c.OCCUPIED_CELL.value)]
|
||||||
doors = Doors.from_tiles(door_tiles, self._level_shape, context=floor, is_blocking_light=True)
|
doors = Doors.from_tiles(door_tiles, self._level_shape, context=floor, is_blocking_light=True)
|
||||||
entities.update({c.DOORS: doors})
|
entities.update({c.DOORS: doors})
|
||||||
|
|
||||||
|
# Actions
|
||||||
|
self._actions = Actions(self.movement_properties, can_use_doors=self.parse_doors)
|
||||||
|
if additional_actions := self.additional_actions:
|
||||||
|
self._actions.register_additional_items(additional_actions)
|
||||||
|
|
||||||
# Agents
|
# Agents
|
||||||
agents = Agents.from_tiles(floor.empty_tiles[:self.n_agents], self._level_shape)
|
agents = Agents.from_tiles(floor.empty_tiles[:self.n_agents], self._level_shape)
|
||||||
entities.update({c.AGENT: agents})
|
entities.update({c.AGENT: agents})
|
||||||
@ -155,8 +159,8 @@ class BaseFactory(gym.Env):
|
|||||||
# Optionally Pad this obs cube for pomdp cases
|
# Optionally Pad this obs cube for pomdp cases
|
||||||
if r := self.pomdp_r:
|
if r := self.pomdp_r:
|
||||||
x, y = self._level_shape
|
x, y = self._level_shape
|
||||||
|
# was c.SHADOW
|
||||||
self._padded_obs_cube = np.full((obs_cube_z, x + r*2, y + r*2), c.SHADOWED_CELL.value, dtype=np.float32)
|
self._padded_obs_cube = np.full((obs_cube_z, x + r*2, y + r*2), c.SHADOWED_CELL.value, dtype=np.float32)
|
||||||
# self._padded_obs_cube[0] = c.OCCUPIED_CELL.value
|
|
||||||
self._padded_obs_cube[:, r:r+x, r:r+y] = self._obs_cube
|
self._padded_obs_cube[:, r:r+x, r:r+y] = self._obs_cube
|
||||||
|
|
||||||
def reset(self) -> (np.ndarray, int, bool, dict):
|
def reset(self) -> (np.ndarray, int, bool, dict):
|
||||||
@ -170,7 +174,10 @@ class BaseFactory(gym.Env):
|
|||||||
return obs
|
return obs
|
||||||
|
|
||||||
def step(self, actions):
|
def step(self, actions):
|
||||||
actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions
|
|
||||||
|
if self.n_agents == 1:
|
||||||
|
actions = [int(actions)]
|
||||||
|
|
||||||
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
|
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
|
||||||
self._steps += 1
|
self._steps += 1
|
||||||
done = False
|
done = False
|
||||||
@ -180,9 +187,10 @@ class BaseFactory(gym.Env):
|
|||||||
|
|
||||||
# Move this in a seperate function?
|
# Move this in a seperate function?
|
||||||
for action, agent in zip(actions, self[c.AGENT]):
|
for action, agent in zip(actions, self[c.AGENT]):
|
||||||
agent.clear_temp_sate()
|
agent.clear_temp_state()
|
||||||
action_obj = self._actions[int(action)]
|
action_obj = self._actions[int(action)]
|
||||||
if self._actions.is_moving_action(action_obj):
|
self.print(f'Action #{action} has been resolved to: {action_obj}')
|
||||||
|
if h.MovingAction.is_member(action_obj):
|
||||||
valid = self._move_or_colide(agent, action_obj)
|
valid = self._move_or_colide(agent, action_obj)
|
||||||
elif h.EnvActions.NOOP == agent.temp_action:
|
elif h.EnvActions.NOOP == agent.temp_action:
|
||||||
valid = c.VALID
|
valid = c.VALID
|
||||||
@ -210,7 +218,8 @@ class BaseFactory(gym.Env):
|
|||||||
|
|
||||||
# Step the door close intervall
|
# Step the door close intervall
|
||||||
if self.parse_doors:
|
if self.parse_doors:
|
||||||
self[c.DOORS].tick_doors()
|
if doors := self[c.DOORS]:
|
||||||
|
doors.tick_doors()
|
||||||
|
|
||||||
# Finalize
|
# Finalize
|
||||||
reward, reward_info = self.calculate_reward()
|
reward, reward_info = self.calculate_reward()
|
||||||
@ -229,17 +238,20 @@ class BaseFactory(gym.Env):
|
|||||||
return obs, reward, done, info
|
return obs, reward, done, info
|
||||||
|
|
||||||
def _handle_door_interaction(self, agent) -> c:
|
def _handle_door_interaction(self, agent) -> c:
|
||||||
|
if doors := self[c.DOORS]:
|
||||||
# Check if agent really is standing on a door:
|
# Check if agent really is standing on a door:
|
||||||
if self.doors_have_area:
|
if self.doors_have_area:
|
||||||
door = self[c.DOORS].get_near_position(agent.pos)
|
door = doors.get_near_position(agent.pos)
|
||||||
else:
|
else:
|
||||||
door = self[c.DOORS].by_pos(agent.pos)
|
door = doors.by_pos(agent.pos)
|
||||||
if door is not None:
|
if door is not None:
|
||||||
door.use()
|
door.use()
|
||||||
return c.VALID
|
return c.VALID
|
||||||
# When he doesn't...
|
# When he doesn't...
|
||||||
else:
|
else:
|
||||||
return c.NOT_VALID
|
return c.NOT_VALID
|
||||||
|
else:
|
||||||
|
return c.NOT_VALID
|
||||||
|
|
||||||
def _get_observations(self) -> np.ndarray:
|
def _get_observations(self) -> np.ndarray:
|
||||||
state_array_dict = self._entities.arrays
|
state_array_dict = self._entities.arrays
|
||||||
@ -284,8 +296,9 @@ class BaseFactory(gym.Env):
|
|||||||
state_array_dict[c.AGENT][0, agent.x, agent.y] += agent.encoding
|
state_array_dict[c.AGENT][0, agent.x, agent.y] += agent.encoding
|
||||||
|
|
||||||
if r := self.pomdp_r:
|
if r := self.pomdp_r:
|
||||||
|
self._padded_obs_cube[:] = c.SHADOWED_CELL.value # Was c.SHADOW
|
||||||
|
# self._padded_obs_cube[0] = c.OCCUPIED_CELL.value
|
||||||
x, y = self._level_shape
|
x, y = self._level_shape
|
||||||
self._padded_obs_cube[:] = c.SHADOWED_CELL.value
|
|
||||||
self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube
|
self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube
|
||||||
global_x, global_y = map(sum, zip(agent.pos, (r, r)))
|
global_x, global_y = map(sum, zip(agent.pos, (r, r)))
|
||||||
x0, x1 = max(0, global_x - self.pomdp_r), global_x + self.pomdp_r + 1
|
x0, x1 = max(0, global_x - self.pomdp_r), global_x + self.pomdp_r + 1
|
||||||
@ -297,7 +310,9 @@ class BaseFactory(gym.Env):
|
|||||||
if self.cast_shadows:
|
if self.cast_shadows:
|
||||||
obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx in shadowing_idxs]
|
obs_block_light = [obs[idx] != c.OCCUPIED_CELL.value for idx in shadowing_idxs]
|
||||||
door_shadowing = False
|
door_shadowing = False
|
||||||
if door := self[c.DOORS].by_pos(agent.pos):
|
if self.parse_doors:
|
||||||
|
if doors := self[c.DOORS]:
|
||||||
|
if door := doors.by_pos(agent.pos):
|
||||||
if door.is_closed:
|
if door.is_closed:
|
||||||
for group in door.connectivity_subgroups:
|
for group in door.connectivity_subgroups:
|
||||||
if agent.last_pos not in group:
|
if agent.last_pos not in group:
|
||||||
@ -361,13 +376,15 @@ class BaseFactory(gym.Env):
|
|||||||
return tile, valid
|
return tile, valid
|
||||||
|
|
||||||
if self.parse_doors and agent.last_pos != c.NO_POS:
|
if self.parse_doors and agent.last_pos != c.NO_POS:
|
||||||
if door := self[c.DOORS].by_pos(new_tile.pos):
|
if doors := self[c.DOORS]:
|
||||||
|
if self.doors_have_area:
|
||||||
|
if door := doors.by_pos(new_tile.pos):
|
||||||
if door.can_collide:
|
if door.can_collide:
|
||||||
return agent.tile, c.NOT_VALID
|
return agent.tile, c.NOT_VALID
|
||||||
else: # door.is_closed:
|
else: # door.is_closed:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if door := self[c.DOORS].by_pos(agent.pos):
|
if door := doors.by_pos(agent.pos):
|
||||||
if door.is_open:
|
if door.is_open:
|
||||||
pass
|
pass
|
||||||
else: # door.is_closed:
|
else: # door.is_closed:
|
||||||
@ -391,7 +408,9 @@ class BaseFactory(gym.Env):
|
|||||||
if self._actions.is_moving_action(agent.temp_action):
|
if self._actions.is_moving_action(agent.temp_action):
|
||||||
if agent.temp_valid:
|
if agent.temp_valid:
|
||||||
# info_dict.update(movement=1)
|
# info_dict.update(movement=1)
|
||||||
reward -= 0.00
|
# info_dict.update({f'{agent.name}_failed_action': 1})
|
||||||
|
# reward += 0.00
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
# self.print('collision')
|
# self.print('collision')
|
||||||
reward -= 0.01
|
reward -= 0.01
|
||||||
@ -400,16 +419,17 @@ class BaseFactory(gym.Env):
|
|||||||
|
|
||||||
elif h.EnvActions.USE_DOOR == agent.temp_action:
|
elif h.EnvActions.USE_DOOR == agent.temp_action:
|
||||||
if agent.temp_valid:
|
if agent.temp_valid:
|
||||||
|
# reward += 0.00
|
||||||
self.print(f'{agent.name} did just use the door at {agent.pos}.')
|
self.print(f'{agent.name} did just use the door at {agent.pos}.')
|
||||||
info_dict.update(door_used=1)
|
info_dict.update(door_used=1)
|
||||||
else:
|
else:
|
||||||
reward -= 0.00
|
# reward -= 0.00
|
||||||
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
|
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
|
||||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||||
info_dict.update({f'{agent.name}_failed_door_open': 1})
|
info_dict.update({f'{agent.name}_failed_door_open': 1})
|
||||||
elif h.EnvActions.NOOP == agent.temp_action:
|
elif h.EnvActions.NOOP == agent.temp_action:
|
||||||
info_dict.update(no_op=1)
|
info_dict.update(no_op=1)
|
||||||
reward -= 0.00
|
# reward -= 0.00
|
||||||
|
|
||||||
additional_reward, additional_info_dict = self.calculate_additional_reward(agent)
|
additional_reward, additional_info_dict = self.calculate_additional_reward(agent)
|
||||||
reward += additional_reward
|
reward += additional_reward
|
||||||
|
@ -24,15 +24,27 @@ class Object:
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def identifier(self):
|
def identifier(self):
|
||||||
return self._enum_ident
|
|
||||||
|
|
||||||
def __init__(self, enum_ident: Union[Enum, None] = None, is_blocking_light=False, **kwargs):
|
|
||||||
self._enum_ident = enum_ident
|
|
||||||
if self._enum_ident is not None:
|
if self._enum_ident is not None:
|
||||||
self._name = f'{self.__class__.__name__}[{self._enum_ident.name}]'
|
return self._enum_ident
|
||||||
|
elif self._str_ident is not None:
|
||||||
|
return self._str_ident
|
||||||
else:
|
else:
|
||||||
|
return self._name
|
||||||
|
|
||||||
|
def __init__(self, str_ident: Union[str, None] = None, enum_ident: Union[Enum, None] = None, is_blocking_light=False, **kwargs):
|
||||||
|
self._str_ident = str_ident
|
||||||
|
self._enum_ident = enum_ident
|
||||||
|
|
||||||
|
if self._enum_ident is not None and self._str_ident is None:
|
||||||
|
self._name = f'{self.__class__.__name__}[{self._enum_ident.name}]'
|
||||||
|
elif self._str_ident is not None and self._enum_ident is None:
|
||||||
|
self._name = f'{self.__class__.__name__}[{self._str_ident}]'
|
||||||
|
elif self._str_ident is None and self._enum_ident is None:
|
||||||
self._name = f'{self.__class__.__name__}#{self._u_idx}'
|
self._name = f'{self.__class__.__name__}#{self._u_idx}'
|
||||||
Object._u_idx += 1
|
Object._u_idx += 1
|
||||||
|
else:
|
||||||
|
raise ValueError('Please use either of the idents.')
|
||||||
|
|
||||||
self._is_blocking_light = is_blocking_light
|
self._is_blocking_light = is_blocking_light
|
||||||
if kwargs:
|
if kwargs:
|
||||||
print(f'Following kwargs were passed, but ignored: {kwargs}')
|
print(f'Following kwargs were passed, but ignored: {kwargs}')
|
||||||
@ -166,7 +178,7 @@ class Door(Entity):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def encoding(self):
|
def encoding(self):
|
||||||
return 1 if self.is_closed else -1
|
return 1 if self.is_closed else 0.5
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def access_area(self):
|
def access_area(self):
|
||||||
@ -274,10 +286,10 @@ class Agent(MoveableEntity):
|
|||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(Agent, self).__init__(*args, **kwargs)
|
super(Agent, self).__init__(*args, **kwargs)
|
||||||
self.clear_temp_sate()
|
self.clear_temp_state()
|
||||||
|
|
||||||
# noinspection PyAttributeOutsideInit
|
# noinspection PyAttributeOutsideInit
|
||||||
def clear_temp_sate(self):
|
def clear_temp_state(self):
|
||||||
# for attr in self.__dict__:
|
# for attr in self.__dict__:
|
||||||
# if attr.startswith('temp'):
|
# if attr.startswith('temp'):
|
||||||
self.temp_collisions = []
|
self.temp_collisions = []
|
||||||
|
@ -53,7 +53,10 @@ class Register:
|
|||||||
return next(v for i, v in enumerate(self._register.values()) if i == item)
|
return next(v for i, v in enumerate(self._register.values()) if i == item)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
return None
|
return None
|
||||||
|
try:
|
||||||
return self._register[item]
|
return self._register[item]
|
||||||
|
except KeyError:
|
||||||
|
return None
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f'{self.__class__.__name__}({self._register})'
|
return f'{self.__class__.__name__}({self._register})'
|
||||||
@ -84,8 +87,8 @@ class EntityObjectRegister(ObjectRegister, ABC):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def from_tiles(cls, tiles, *args, **kwargs):
|
def from_tiles(cls, tiles, *args, **kwargs):
|
||||||
# objects_name = cls._accepted_objects.__name__
|
# objects_name = cls._accepted_objects.__name__
|
||||||
entities = [cls._accepted_objects(tile, **kwargs)
|
entities = [cls._accepted_objects(tile, str_ident=i, **kwargs)
|
||||||
for tile in tiles]
|
for i, tile in enumerate(tiles)]
|
||||||
register_obj = cls(*args)
|
register_obj = cls(*args)
|
||||||
register_obj.register_additional_items(entities)
|
register_obj.register_additional_items(entities)
|
||||||
return register_obj
|
return register_obj
|
||||||
@ -294,10 +297,10 @@ class Actions(Register):
|
|||||||
|
|
||||||
if self.allow_square_movement:
|
if self.allow_square_movement:
|
||||||
self.register_additional_items([self._accepted_objects(enum_ident=direction)
|
self.register_additional_items([self._accepted_objects(enum_ident=direction)
|
||||||
for direction in h.ManhattanMoves])
|
for direction in h.MovingAction.square()])
|
||||||
if self.allow_diagonal_movement:
|
if self.allow_diagonal_movement:
|
||||||
self.register_additional_items([self._accepted_objects(enum_ident=direction)
|
self.register_additional_items([self._accepted_objects(enum_ident=direction)
|
||||||
for direction in h.DiagonalMoves])
|
for direction in h.MovingAction.diagonal()])
|
||||||
self._movement_actions = self._register.copy()
|
self._movement_actions = self._register.copy()
|
||||||
if self.can_use_doors:
|
if self.can_use_doors:
|
||||||
self.register_additional_items([self._accepted_objects(enum_ident=h.EnvActions.USE_DOOR)])
|
self.register_additional_items([self._accepted_objects(enum_ident=h.EnvActions.USE_DOOR)])
|
||||||
|
@ -79,6 +79,7 @@ class Renderer:
|
|||||||
rects = []
|
rects = []
|
||||||
for i, j in product(range(-self.view_radius, self.view_radius+1),
|
for i, j in product(range(-self.view_radius, self.view_radius+1),
|
||||||
range(-self.view_radius, self.view_radius+1)):
|
range(-self.view_radius, self.view_radius+1)):
|
||||||
|
if view is not None:
|
||||||
if bool(view[self.view_radius+j, self.view_radius+i]):
|
if bool(view[self.view_radius+j, self.view_radius+i]):
|
||||||
visibility_rect = bp['dest'].copy()
|
visibility_rect = bp['dest'].copy()
|
||||||
visibility_rect.centerx += i*self.cell_size
|
visibility_rect.centerx += i*self.cell_size
|
||||||
|
@ -94,6 +94,10 @@ class DirtRegister(MovingEntityObjectRegister):
|
|||||||
return c.NOT_VALID
|
return c.NOT_VALID
|
||||||
return c.VALID
|
return c.VALID
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
s = super(DirtRegister, self).__repr__()
|
||||||
|
return f'{s[:-1]}, {self.amount})'
|
||||||
|
|
||||||
|
|
||||||
def softmax(x):
|
def softmax(x):
|
||||||
"""Compute softmax values for each sets of scores in x."""
|
"""Compute softmax values for each sets of scores in x."""
|
||||||
@ -149,7 +153,10 @@ class SimpleFactory(BaseFactory):
|
|||||||
return c.NOT_VALID
|
return c.NOT_VALID
|
||||||
|
|
||||||
def trigger_dirt_spawn(self):
|
def trigger_dirt_spawn(self):
|
||||||
free_for_dirt = self[c.FLOOR].empty_tiles
|
free_for_dirt = [x for x in self[c.FLOOR]
|
||||||
|
if len(x.guests) == 0 or (len(x.guests) == 1 and isinstance(next(y for y in x.guests), Dirt))
|
||||||
|
]
|
||||||
|
self._dirt_rng.shuffle(free_for_dirt)
|
||||||
new_spawn = self._dirt_rng.uniform(0, self.dirt_properties.max_spawn_ratio)
|
new_spawn = self._dirt_rng.uniform(0, self.dirt_properties.max_spawn_ratio)
|
||||||
n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt)))
|
n_dirt_tiles = max(0, int(new_spawn * len(free_for_dirt)))
|
||||||
self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles])
|
self[c.DIRT].spawn_dirt(free_for_dirt[:n_dirt_tiles])
|
||||||
@ -216,7 +223,7 @@ class SimpleFactory(BaseFactory):
|
|||||||
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
|
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
|
||||||
info_dict.update(dirt_cleaned=1)
|
info_dict.update(dirt_cleaned=1)
|
||||||
else:
|
else:
|
||||||
reward -= 0.00
|
reward -= 0.01
|
||||||
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
|
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
|
||||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||||
@ -235,8 +242,8 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
factory = SimpleFactory(n_agents=1, done_at_collision=False, frames_to_stack=0,
|
factory = SimpleFactory(n_agents=1, done_at_collision=False, frames_to_stack=0,
|
||||||
level_name='rooms', max_steps=400, combin_agent_obs=True,
|
level_name='rooms', max_steps=400, combin_agent_obs=True,
|
||||||
omit_agent_in_obs=True, parse_doors=True, pomdp_r=2,
|
omit_agent_in_obs=True, parse_doors=False, pomdp_r=2,
|
||||||
record_episodes=False, verbose=True
|
record_episodes=False, verbose=True, cast_shadows=False
|
||||||
)
|
)
|
||||||
|
|
||||||
# noinspection DuplicatedCode
|
# noinspection DuplicatedCode
|
||||||
|
@ -50,19 +50,28 @@ class Constants(Enum):
|
|||||||
return bool(self.value)
|
return bool(self.value)
|
||||||
|
|
||||||
|
|
||||||
class ManhattanMoves(Enum):
|
class MovingAction(Enum):
|
||||||
NORTH = 'north'
|
NORTH = 'north'
|
||||||
EAST = 'east'
|
EAST = 'east'
|
||||||
SOUTH = 'south'
|
SOUTH = 'south'
|
||||||
WEST = 'west'
|
WEST = 'west'
|
||||||
|
|
||||||
|
|
||||||
class DiagonalMoves(Enum):
|
|
||||||
NORTHEAST = 'north_east'
|
NORTHEAST = 'north_east'
|
||||||
SOUTHEAST = 'south_east'
|
SOUTHEAST = 'south_east'
|
||||||
SOUTHWEST = 'south_west'
|
SOUTHWEST = 'south_west'
|
||||||
NORTHWEST = 'north_west'
|
NORTHWEST = 'north_west'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_member(cls, other):
|
||||||
|
return any([other == direction for direction in cls])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def square(cls):
|
||||||
|
return [cls.NORTH, cls.EAST, cls.SOUTH, cls.WEST]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def diagonal(cls):
|
||||||
|
return [cls.NORTHEAST, cls.SOUTHEAST, cls.SOUTHWEST, cls.NORTHWEST]
|
||||||
|
|
||||||
|
|
||||||
class EnvActions(Enum):
|
class EnvActions(Enum):
|
||||||
NOOP = 'no_op'
|
NOOP = 'no_op'
|
||||||
@ -71,14 +80,13 @@ class EnvActions(Enum):
|
|||||||
ITEM_ACTION = 'item_action'
|
ITEM_ACTION = 'item_action'
|
||||||
|
|
||||||
|
|
||||||
d = DiagonalMoves
|
m = MovingAction
|
||||||
m = ManhattanMoves
|
|
||||||
c = Constants
|
c = Constants
|
||||||
|
|
||||||
ACTIONMAP = defaultdict(lambda: (0, 0), {m.NORTH: (-1, 0), d.NORTHEAST: (-1, +1),
|
ACTIONMAP = defaultdict(lambda: (0, 0), {m.NORTH: (-1, 0), m.NORTHEAST: (-1, +1),
|
||||||
m.EAST: (0, 1), d.SOUTHEAST: (1, 1),
|
m.EAST: (0, 1), m.SOUTHEAST: (1, 1),
|
||||||
m.SOUTH: (1, 0), d.SOUTHWEST: (+1, -1),
|
m.SOUTH: (1, 0), m.SOUTHWEST: (+1, -1),
|
||||||
m.WEST: (0, -1), d.NORTHWEST: (-1, -1)
|
m.WEST: (0, -1), m.NORTHWEST: (-1, -1)
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -126,8 +134,10 @@ def asset_str(agent):
|
|||||||
return 'agent_collision', 'blank'
|
return 'agent_collision', 'blank'
|
||||||
elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names:
|
elif not agent.temp_valid or c.LEVEL.name in col_names or c.AGENT.name in col_names:
|
||||||
return c.AGENT.value, 'invalid'
|
return c.AGENT.value, 'invalid'
|
||||||
elif agent.temp_valid:
|
elif agent.temp_valid and not MovingAction.is_member(agent.temp_action):
|
||||||
return c.AGENT.value, 'valid'
|
return c.AGENT.value, 'valid'
|
||||||
|
elif agent.temp_valid and MovingAction.is_member(agent.temp_action):
|
||||||
|
return c.AGENT.value, 'move'
|
||||||
else:
|
else:
|
||||||
return c.AGENT.value, 'idle'
|
return c.AGENT.value, 'idle'
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import pickle
|
import pickle
|
||||||
|
from collections import defaultdict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
|
|
||||||
@ -17,7 +18,7 @@ class MonitorCallback(BaseCallback):
|
|||||||
super(MonitorCallback, self).__init__()
|
super(MonitorCallback, self).__init__()
|
||||||
self.filepath = Path(filepath)
|
self.filepath = Path(filepath)
|
||||||
self._monitor_df = pd.DataFrame()
|
self._monitor_df = pd.DataFrame()
|
||||||
self._monitor_dict = dict()
|
self._monitor_dicts = defaultdict(dict)
|
||||||
self.plotting = plotting
|
self.plotting = plotting
|
||||||
self.started = False
|
self.started = False
|
||||||
self.closed = False
|
self.closed = False
|
||||||
@ -69,16 +70,22 @@ class MonitorCallback(BaseCallback):
|
|||||||
|
|
||||||
def _on_step(self, alt_infos: List[Dict] = None, alt_dones: List[bool] = None) -> bool:
|
def _on_step(self, alt_infos: List[Dict] = None, alt_dones: List[bool] = None) -> bool:
|
||||||
infos = alt_infos or self.locals.get('infos', [])
|
infos = alt_infos or self.locals.get('infos', [])
|
||||||
dones = alt_dones or self.locals.get('dones', None) or self.locals.get('done', [None])
|
if alt_dones is not None:
|
||||||
for _, info in enumerate(infos):
|
dones = alt_dones
|
||||||
self._monitor_dict[self.num_timesteps] = {key: val for key, val in info.items()
|
elif self.locals.get('dones', None) is not None:
|
||||||
|
dones =self.locals.get('dones', None)
|
||||||
|
elif self.locals.get('dones', None) is not None:
|
||||||
|
dones = self.locals.get('done', [None])
|
||||||
|
else:
|
||||||
|
dones = []
|
||||||
|
|
||||||
|
for env_idx, (info, done) in enumerate(zip(infos, dones)):
|
||||||
|
self._monitor_dicts[env_idx][self.num_timesteps - env_idx] = {key: val for key, val in info.items()
|
||||||
if key not in ['terminal_observation', 'episode']
|
if key not in ['terminal_observation', 'episode']
|
||||||
and not key.startswith('rec_')}
|
and not key.startswith('rec_')}
|
||||||
|
|
||||||
for env_idx, done in enumerate(dones):
|
|
||||||
if done:
|
if done:
|
||||||
env_monitor_df = pd.DataFrame.from_dict(self._monitor_dict, orient='index')
|
env_monitor_df = pd.DataFrame.from_dict(self._monitor_dicts[env_idx], orient='index')
|
||||||
self._monitor_dict = dict()
|
self._monitor_dicts[env_idx] = dict()
|
||||||
columns = [col for col in env_monitor_df.columns if col not in IGNORED_DF_COLUMNS]
|
columns = [col for col in env_monitor_df.columns if col not in IGNORED_DF_COLUMNS]
|
||||||
env_monitor_df = env_monitor_df.aggregate(
|
env_monitor_df = env_monitor_df.aggregate(
|
||||||
{col: 'mean' if col.endswith('ount') else 'sum' for col in columns}
|
{col: 'mean' if col.endswith('ount') else 'sum' for col in columns}
|
||||||
|
43
main.py
43
main.py
@ -8,6 +8,7 @@ import time
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from stable_baselines3.common.callbacks import CallbackList
|
from stable_baselines3.common.callbacks import CallbackList
|
||||||
|
from stable_baselines3.common.vec_env import SubprocVecEnv
|
||||||
|
|
||||||
from environments.factory.double_task_factory import DoubleTaskFactory, ItemProperties
|
from environments.factory.double_task_factory import DoubleTaskFactory, ItemProperties
|
||||||
from environments.factory.simple_factory import DirtProperties, SimpleFactory
|
from environments.factory.simple_factory import DirtProperties, SimpleFactory
|
||||||
@ -84,8 +85,20 @@ def compare_runs(run_path: Path, run_identifier: int, parameter: Union[str, List
|
|||||||
print('Plotting done.')
|
print('Plotting done.')
|
||||||
|
|
||||||
|
|
||||||
|
def make_env(env_kwargs_dict):
|
||||||
|
|
||||||
|
def _init():
|
||||||
|
with SimpleFactory(**env_kwargs_dict) as init_env:
|
||||||
|
return init_env
|
||||||
|
|
||||||
|
return _init
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
# combine_runs(Path('debug_out') / 'A2C_1630314192')
|
||||||
|
# exit()
|
||||||
|
|
||||||
# compare_runs(Path('debug_out'), 1623052687, ['step_reward'])
|
# compare_runs(Path('debug_out'), 1623052687, ['step_reward'])
|
||||||
# exit()
|
# exit()
|
||||||
|
|
||||||
@ -93,31 +106,33 @@ if __name__ == '__main__':
|
|||||||
from algorithms.reg_dqn import RegDQN
|
from algorithms.reg_dqn import RegDQN
|
||||||
# from sb3_contrib import QRDQN
|
# from sb3_contrib import QRDQN
|
||||||
|
|
||||||
dirt_props = DirtProperties(clean_amount=1, gain_amount=0.1, max_global_amount=20,
|
dirt_props = DirtProperties(clean_amount=2, gain_amount=0.1, max_global_amount=20,
|
||||||
max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05,
|
max_local_amount=1, spawn_frequency=3, max_spawn_ratio=0.05,
|
||||||
dirt_smear_amount=0.0, agent_can_interact=False)
|
dirt_smear_amount=0.0, agent_can_interact=True)
|
||||||
item_props = ItemProperties(n_items=5, agent_can_interact=True)
|
item_props = ItemProperties(n_items=5, agent_can_interact=True)
|
||||||
move_props = MovementProperties(allow_diagonal_movement=True,
|
move_props = MovementProperties(allow_diagonal_movement=False,
|
||||||
allow_square_movement=True,
|
allow_square_movement=True,
|
||||||
allow_no_op=False)
|
allow_no_op=False)
|
||||||
train_steps = 6e5
|
train_steps = 1e6
|
||||||
time_stamp = int(time.time())
|
time_stamp = int(time.time())
|
||||||
|
|
||||||
out_path = None
|
out_path = None
|
||||||
|
|
||||||
for modeL_type in [A2C, PPO, DQN]: # ,RegDQN, QRDQN]:
|
for modeL_type in [A2C, PPO, DQN]: # ,RegDQN, QRDQN]:
|
||||||
for seed in range(3):
|
for seed in range(3):
|
||||||
|
env_kwargs = dict(n_agents=1,
|
||||||
with SimpleFactory(n_agents=1,
|
|
||||||
# with_dirt=True,
|
# with_dirt=True,
|
||||||
# item_properties=item_props,
|
# item_properties=item_props,
|
||||||
dirt_properties=dirt_props,
|
dirt_properties=dirt_props,
|
||||||
movement_properties=move_props,
|
movement_properties=move_props,
|
||||||
pomdp_radius=2, max_steps=500, parse_doors=True,
|
pomdp_r=2, max_steps=400, parse_doors=True,
|
||||||
level_name='rooms', frames_to_stack=3,
|
level_name='simple', frames_to_stack=6,
|
||||||
omit_agent_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False,
|
omit_agent_in_obs=True, combin_agent_obs=True, record_episodes=False,
|
||||||
cast_shadows=True, doors_have_area=False, seed=seed, verbose=False,
|
cast_shadows=True, doors_have_area=False, env_seed=seed, verbose=False,
|
||||||
) as env:
|
)
|
||||||
|
|
||||||
|
# env = make_env(env_kwargs)()
|
||||||
|
env = SubprocVecEnv([make_env(env_kwargs) for _ in range(12)], start_method="spawn")
|
||||||
|
|
||||||
if modeL_type.__name__ in ["PPO", "A2C"]:
|
if modeL_type.__name__ in ["PPO", "A2C"]:
|
||||||
kwargs = dict(ent_coef=0.01)
|
kwargs = dict(ent_coef=0.01)
|
||||||
@ -129,7 +144,7 @@ if __name__ == '__main__':
|
|||||||
exploration_fraction=0.25,
|
exploration_fraction=0.25,
|
||||||
exploration_final_eps=0.025)
|
exploration_final_eps=0.025)
|
||||||
else:
|
else:
|
||||||
raise NameError(f'The model "{model.__name__}" has the wrong name.')
|
raise NameError(f'The model "{modeL_type.__name__}" has the wrong name.')
|
||||||
model = modeL_type("MlpPolicy", env, verbose=1, seed=seed, device='cpu', **kwargs)
|
model = modeL_type("MlpPolicy", env, verbose=1, seed=seed, device='cpu', **kwargs)
|
||||||
|
|
||||||
out_path = Path('debug_out') / f'{model.__class__.__name__}_{time_stamp}'
|
out_path = Path('debug_out') / f'{model.__class__.__name__}_{time_stamp}'
|
||||||
@ -150,7 +165,7 @@ if __name__ == '__main__':
|
|||||||
save_path = out_path / f'model_{identifier}.zip'
|
save_path = out_path / f'model_{identifier}.zip'
|
||||||
save_path.parent.mkdir(parents=True, exist_ok=True)
|
save_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
model.save(save_path)
|
model.save(save_path)
|
||||||
env.save_params(out_path.parent / f'env_{model.__class__.__name__}_{time_stamp}.yaml')
|
env.env_method('save_params', out_path.parent / f'env_{model.__class__.__name__}_{time_stamp}.yaml')
|
||||||
print("Model Trained and saved")
|
print("Model Trained and saved")
|
||||||
print("Model Group Done.. Plotting...")
|
print("Model Group Done.. Plotting...")
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
from natsort import natsorted
|
from natsort import natsorted
|
||||||
from stable_baselines3 import PPO
|
from stable_baselines3 import PPO, DQN, A2C
|
||||||
from stable_baselines3.common.evaluation import evaluate_policy
|
from stable_baselines3.common.evaluation import evaluate_policy
|
||||||
|
|
||||||
from environments.factory.simple_factory import DirtProperties, SimpleFactory
|
from environments.factory.simple_factory import DirtProperties, SimpleFactory
|
||||||
@ -12,16 +12,19 @@ from environments.factory.double_task_factory import ItemProperties, DoubleTaskF
|
|||||||
warnings.filterwarnings('ignore', category=FutureWarning)
|
warnings.filterwarnings('ignore', category=FutureWarning)
|
||||||
warnings.filterwarnings('ignore', category=UserWarning)
|
warnings.filterwarnings('ignore', category=UserWarning)
|
||||||
|
|
||||||
|
model_map = dict(PPO=PPO, DQN=DQN, A2C=A2C)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
model_name = 'A2C_1630073286'
|
model_name = 'A2C_1630414444'
|
||||||
run_id = 0
|
run_id = 0
|
||||||
|
seed=69
|
||||||
out_path = Path(__file__).parent / 'debug_out'
|
out_path = Path(__file__).parent / 'debug_out'
|
||||||
model_path = out_path / model_name
|
model_path = out_path / model_name
|
||||||
|
|
||||||
with (model_path / f'env_{model_name}.yaml').open('r') as f:
|
with (model_path / f'env_{model_name}.yaml').open('r') as f:
|
||||||
env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
|
env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
|
||||||
|
env_kwargs.update(verbose=True, env_seed=seed)
|
||||||
if False:
|
if False:
|
||||||
env_kwargs.update(dirt_properties=DirtProperties(clean_amount=1, gain_amount=0.1, max_global_amount=20,
|
env_kwargs.update(dirt_properties=DirtProperties(clean_amount=1, gain_amount=0.1, max_global_amount=20,
|
||||||
max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05,
|
max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05,
|
||||||
@ -30,9 +33,10 @@ if __name__ == '__main__':
|
|||||||
with SimpleFactory(**env_kwargs) as env:
|
with SimpleFactory(**env_kwargs) as env:
|
||||||
|
|
||||||
# Edit THIS:
|
# Edit THIS:
|
||||||
|
env.seed(seed)
|
||||||
model_files = list(natsorted((model_path / f'{run_id}_{model_name}').rglob('model_*.zip')))
|
model_files = list(natsorted((model_path / f'{run_id}_{model_name}').rglob('model_*.zip')))
|
||||||
this_model = model_files[0]
|
this_model = model_files[0]
|
||||||
|
model_cls = next(val for key, val in model_map.items() if key in model_name)
|
||||||
model = PPO.load(this_model)
|
model = model_cls.load(this_model)
|
||||||
evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=False, render=True)
|
evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=True, render=True)
|
||||||
print(evaluation_result)
|
print(evaluation_result)
|
||||||
|
Reference in New Issue
Block a user