Debugging

This commit is contained in:
Steffen Illium
2022-01-11 10:54:02 +01:00
parent 435056f373
commit 3150757347
6 changed files with 67 additions and 58 deletions

View File

@ -35,7 +35,7 @@ class BaseFactory(gym.Env):
@property
def named_action_space(self):
return {x.identifier.value: idx for idx, x in enumerate(self._actions.values())}
return {x.identifier: idx for idx, x in enumerate(self._actions.values())}
@property
def observation_space(self):
@ -287,7 +287,7 @@ class BaseFactory(gym.Env):
doors.tick_doors()
# Finalize
reward, reward_info = self.build_reward_result()
reward, reward_info = self.build_reward_result(rewards)
info.update(reward_info)
if self._steps >= self.max_steps:
@ -313,8 +313,8 @@ class BaseFactory(gym.Env):
if door is not None:
door.use()
valid = c.VALID
self.print(f'{agent.name} just used a door {door.name}')
info_dict = {f'{agent.name}_door_use_{door.name}': 1}
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
info_dict = {f'{agent.name}_door_use': 1}
# When he doesn't...
else:
valid = c.NOT_VALID
@ -478,8 +478,7 @@ class BaseFactory(gym.Env):
return oobs
def get_all_tiles_with_collisions(self) -> List[Tile]:
tiles = [x.tile for y in self._entities for x in y if
y.can_collide and not isinstance(y, WallTiles) and x.can_collide and len(x.tile.guests) > 1]
tiles = [x for x in self[c.FLOOR] if len(x.guests_that_can_collide) > 1]
if False:
tiles_with_collisions = list()
for tile in self[c.FLOOR]:
@ -503,11 +502,11 @@ class BaseFactory(gym.Env):
else:
valid = c.NOT_VALID
self.print(f'{agent.name} just hit the wall at {agent.pos}.')
info_dict.update({f'{agent.pos}_wall_collide': 1})
info_dict.update({f'{agent.name}_wall_collide': 1})
else:
# Agent seems to be trying to Leave the level
self.print(f'{agent.name} tried to leave the level {agent.pos}.')
info_dict.update({f'{agent.pos}_wall_collide': 1})
info_dict.update({f'{agent.name}_wall_collide': 1})
reward_value = r.MOVEMENTS_VALID if valid else r.MOVEMENTS_FAIL
reward = {'value': reward_value, 'reason': action.identifier, 'info': info_dict}
return valid, reward
@ -554,7 +553,7 @@ class BaseFactory(gym.Env):
def additional_per_agent_rewards(self, agent) -> List[dict]:
return []
def build_reward_result(self) -> (int, dict):
def build_reward_result(self, global_env_rewards: list) -> (int, dict):
# Returns: Reward, Info
info = defaultdict(lambda: 0.0)
@ -584,12 +583,14 @@ class BaseFactory(gym.Env):
combined_info_dict = dict(combined_info_dict)
combined_info_dict.update(info)
global_reward_sum = sum(global_env_rewards)
if self.individual_rewards:
self.print(f"rewards are {comb_rewards}")
reward = list(comb_rewards.values())
reward = [x + global_reward_sum for x in reward]
return reward, combined_info_dict
else:
reward = sum(comb_rewards.values())
reward = sum(comb_rewards.values()) + global_reward_sum
self.print(f"reward is {reward}")
return reward, combined_info_dict

View File

@ -268,7 +268,7 @@ class DirtFactory(BaseFactory):
if __name__ == '__main__':
from environments.utility_classes import AgentRenderOptions as aro
render = False
render = True
dirt_props = DirtProperties(
initial_dirt_ratio=0.35,
@ -293,11 +293,11 @@ if __name__ == '__main__':
global_timings = []
for i in range(10):
factory = DirtFactory(n_agents=2, done_at_collision=False,
factory = DirtFactory(n_agents=4, done_at_collision=False,
level_name='rooms', max_steps=1000,
doors_have_area=False,
obs_prop=obs_props, parse_doors=True,
verbose=False,
verbose=True,
mv_prop=move_props, dirt_prop=dirt_props,
# inject_agents=[TSPDirtAgent],
)
@ -307,6 +307,7 @@ if __name__ == '__main__':
_ = factory.observation_space
obs_space = factory.observation_space
obs_space_named = factory.named_observation_space
action_space_named = factory.named_action_space
times = []
for epoch in range(10):
start_time = time.time()

View File

@ -78,12 +78,12 @@ class EnvActions:
class Rewards:
MOVEMENTS_VALID = -0.001
MOVEMENTS_FAIL = -0.001
NOOP = -0.1
USE_DOOR_VALID = -0.001
USE_DOOR_FAIL = -0.001
COLLISION = -1
MOVEMENTS_VALID = -0.01
MOVEMENTS_FAIL = -0.1
NOOP = -0.01
USE_DOOR_VALID = -0.01
USE_DOOR_FAIL = -0.1
COLLISION = -0.5
m = EnvActions
@ -120,7 +120,7 @@ class ObservationTranslator:
def translate_observation(self, agent_idx: int, obs: np.ndarray):
target_obs_space = self._per_agent_named_obs_space[agent_idx]
translation = [idx_space_dict['explained_idxs'] for name, idx_space_dict in target_obs_space.items()]
translation = [idx_space_dict for name, idx_space_dict in target_obs_space.items()]
flat_translation = [x for y in translation for x in y]
return np.take(obs, flat_translation, axis=1 if obs.ndim == 4 else 0)