DEbugging done but he does not learn

This commit is contained in:
Steffen Illium
2021-08-27 16:36:19 +02:00
parent 4731f63ba6
commit 2bf9aaed15
6 changed files with 20 additions and 25 deletions

View File

@ -181,7 +181,7 @@ class BaseFactory(gym.Env):
# Move this in a seperate function?
for action, agent in zip(actions, self[c.AGENT]):
agent.clear_temp_sate()
action_obj = self._actions[action]
action_obj = self._actions[int(action)]
if self._actions.is_moving_action(action_obj):
valid = self._move_or_colide(agent, action_obj)
elif h.EnvActions.NOOP == agent.temp_action:
@ -285,6 +285,7 @@ class BaseFactory(gym.Env):
if r := self.pomdp_r:
x, y = self._level_shape
self._padded_obs_cube[:] = c.SHADOWED_CELL.value
self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube
global_x, global_y = map(sum, zip(agent.pos, (r, r)))
x0, x1 = max(0, global_x - self.pomdp_r), global_x + self.pomdp_r + 1
@ -321,9 +322,7 @@ class BaseFactory(gym.Env):
light_block_map[xs, ys] = 0
agent.temp_light_map = light_block_map
for obs_idx in can_be_shadowed_idxs:
obs[obs_idx] = (obs[obs_idx] * light_block_map) - (
(1 - light_block_map) * obs[0]
)
obs[obs_idx] = ((obs[obs_idx] * light_block_map) + 0.) - (1 - light_block_map) # * obs[0])
return obs
else:
@ -404,7 +403,7 @@ class BaseFactory(gym.Env):
self.print(f'{agent.name} did just use the door at {agent.pos}.')
info_dict.update(door_used=1)
else:
reward -= 0.01
reward -= 0.00
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_door_open': 1})
@ -416,6 +415,9 @@ class BaseFactory(gym.Env):
reward += additional_reward
info_dict.update(additional_info_dict)
if agent.temp_collisions:
self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
for other_agent in agent.temp_collisions:
info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1})

View File

@ -48,7 +48,7 @@ class Register:
return self._register.items()
def __getitem__(self, item):
if isinstance(item, int):
if isinstance(item, (int, np.int64, np.int32)):
try:
return next(v for i, v in enumerate(self._register.values()) if i == item)
except StopIteration:

View File

@ -291,7 +291,7 @@ if __name__ == '__main__':
factory = DoubleTaskFactory(item_props, n_agents=3, done_at_collision=False, frames_to_stack=0,
level_name='rooms', max_steps=4000,
omit_agent_slice_in_obs=True, parse_doors=True, pomdp_r=3,
omit_agent_in_obs=True, parse_doors=True, pomdp_r=3,
record_episodes=False, verbose=False
)

View File

@ -18,12 +18,6 @@ from environments.utility_classes import MovementProperties
CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP
class ObsSlice(Enum):
OWN = -1
LEVEL = c.LEVEL.value
AGENT = c.AGENT.value
class DirtProperties(NamedTuple):
clean_amount: int = 1 # How much does the robot clean with one actions.
max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent.
@ -33,7 +27,6 @@ class DirtProperties(NamedTuple):
max_global_amount: int = 20 # Max dirt amount in the whole environment.
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place.
agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment.
on_obs_slice: Enum = ObsSlice.LEVEL
class Dirt(Entity):
@ -217,16 +210,13 @@ class SimpleFactory(BaseFactory):
info_dict.update(dirty_tile_count=dirty_tile_count)
info_dict.update(dirt_distribution_score=dirt_distribution_score)
if agent.temp_collisions:
self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
if agent.temp_action == CLEAN_UP_ACTION:
if agent.temp_valid:
reward += 0.5
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
info_dict.update(dirt_cleaned=1)
else:
reward -= 0.01
reward -= 0.00
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_action': 1})
@ -244,9 +234,9 @@ if __name__ == '__main__':
move_props = MovementProperties(True, True, False)
factory = SimpleFactory(n_agents=1, done_at_collision=False, frames_to_stack=0,
level_name='rooms', max_steps=400,
omit_agent_slice_in_obs=True, parse_doors=True, pomdp_r=2,
record_episodes=False, verbose=False
level_name='rooms', max_steps=400, combin_agent_obs=True,
omit_agent_in_obs=True, parse_doors=True, pomdp_r=2,
record_episodes=False, verbose=True
)
# noinspection DuplicatedCode