Debugging
This commit is contained in:
@ -35,7 +35,7 @@ class BaseFactory(gym.Env):
|
||||
|
||||
@property
|
||||
def named_action_space(self):
|
||||
return {x.identifier.value: idx for idx, x in enumerate(self._actions.values())}
|
||||
return {x.identifier: idx for idx, x in enumerate(self._actions.values())}
|
||||
|
||||
@property
|
||||
def observation_space(self):
|
||||
@ -287,7 +287,7 @@ class BaseFactory(gym.Env):
|
||||
doors.tick_doors()
|
||||
|
||||
# Finalize
|
||||
reward, reward_info = self.build_reward_result()
|
||||
reward, reward_info = self.build_reward_result(rewards)
|
||||
|
||||
info.update(reward_info)
|
||||
if self._steps >= self.max_steps:
|
||||
@ -313,8 +313,8 @@ class BaseFactory(gym.Env):
|
||||
if door is not None:
|
||||
door.use()
|
||||
valid = c.VALID
|
||||
self.print(f'{agent.name} just used a door {door.name}')
|
||||
info_dict = {f'{agent.name}_door_use_{door.name}': 1}
|
||||
self.print(f'{agent.name} just used a {door.name} at {door.pos}')
|
||||
info_dict = {f'{agent.name}_door_use': 1}
|
||||
# When he doesn't...
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
@ -478,8 +478,7 @@ class BaseFactory(gym.Env):
|
||||
return oobs
|
||||
|
||||
def get_all_tiles_with_collisions(self) -> List[Tile]:
|
||||
tiles = [x.tile for y in self._entities for x in y if
|
||||
y.can_collide and not isinstance(y, WallTiles) and x.can_collide and len(x.tile.guests) > 1]
|
||||
tiles = [x for x in self[c.FLOOR] if len(x.guests_that_can_collide) > 1]
|
||||
if False:
|
||||
tiles_with_collisions = list()
|
||||
for tile in self[c.FLOOR]:
|
||||
@ -503,11 +502,11 @@ class BaseFactory(gym.Env):
|
||||
else:
|
||||
valid = c.NOT_VALID
|
||||
self.print(f'{agent.name} just hit the wall at {agent.pos}.')
|
||||
info_dict.update({f'{agent.pos}_wall_collide': 1})
|
||||
info_dict.update({f'{agent.name}_wall_collide': 1})
|
||||
else:
|
||||
# Agent seems to be trying to Leave the level
|
||||
self.print(f'{agent.name} tried to leave the level {agent.pos}.')
|
||||
info_dict.update({f'{agent.pos}_wall_collide': 1})
|
||||
info_dict.update({f'{agent.name}_wall_collide': 1})
|
||||
reward_value = r.MOVEMENTS_VALID if valid else r.MOVEMENTS_FAIL
|
||||
reward = {'value': reward_value, 'reason': action.identifier, 'info': info_dict}
|
||||
return valid, reward
|
||||
@ -554,7 +553,7 @@ class BaseFactory(gym.Env):
|
||||
def additional_per_agent_rewards(self, agent) -> List[dict]:
|
||||
return []
|
||||
|
||||
def build_reward_result(self) -> (int, dict):
|
||||
def build_reward_result(self, global_env_rewards: list) -> (int, dict):
|
||||
# Returns: Reward, Info
|
||||
info = defaultdict(lambda: 0.0)
|
||||
|
||||
@ -584,12 +583,14 @@ class BaseFactory(gym.Env):
|
||||
combined_info_dict = dict(combined_info_dict)
|
||||
combined_info_dict.update(info)
|
||||
|
||||
global_reward_sum = sum(global_env_rewards)
|
||||
if self.individual_rewards:
|
||||
self.print(f"rewards are {comb_rewards}")
|
||||
reward = list(comb_rewards.values())
|
||||
reward = [x + global_reward_sum for x in reward]
|
||||
return reward, combined_info_dict
|
||||
else:
|
||||
reward = sum(comb_rewards.values())
|
||||
reward = sum(comb_rewards.values()) + global_reward_sum
|
||||
self.print(f"reward is {reward}")
|
||||
return reward, combined_info_dict
|
||||
|
||||
|
@ -268,7 +268,7 @@ class DirtFactory(BaseFactory):
|
||||
|
||||
if __name__ == '__main__':
|
||||
from environments.utility_classes import AgentRenderOptions as aro
|
||||
render = False
|
||||
render = True
|
||||
|
||||
dirt_props = DirtProperties(
|
||||
initial_dirt_ratio=0.35,
|
||||
@ -293,11 +293,11 @@ if __name__ == '__main__':
|
||||
global_timings = []
|
||||
for i in range(10):
|
||||
|
||||
factory = DirtFactory(n_agents=2, done_at_collision=False,
|
||||
factory = DirtFactory(n_agents=4, done_at_collision=False,
|
||||
level_name='rooms', max_steps=1000,
|
||||
doors_have_area=False,
|
||||
obs_prop=obs_props, parse_doors=True,
|
||||
verbose=False,
|
||||
verbose=True,
|
||||
mv_prop=move_props, dirt_prop=dirt_props,
|
||||
# inject_agents=[TSPDirtAgent],
|
||||
)
|
||||
@ -307,6 +307,7 @@ if __name__ == '__main__':
|
||||
_ = factory.observation_space
|
||||
obs_space = factory.observation_space
|
||||
obs_space_named = factory.named_observation_space
|
||||
action_space_named = factory.named_action_space
|
||||
times = []
|
||||
for epoch in range(10):
|
||||
start_time = time.time()
|
||||
|
@ -78,12 +78,12 @@ class EnvActions:
|
||||
|
||||
class Rewards:
|
||||
|
||||
MOVEMENTS_VALID = -0.001
|
||||
MOVEMENTS_FAIL = -0.001
|
||||
NOOP = -0.1
|
||||
USE_DOOR_VALID = -0.001
|
||||
USE_DOOR_FAIL = -0.001
|
||||
COLLISION = -1
|
||||
MOVEMENTS_VALID = -0.01
|
||||
MOVEMENTS_FAIL = -0.1
|
||||
NOOP = -0.01
|
||||
USE_DOOR_VALID = -0.01
|
||||
USE_DOOR_FAIL = -0.1
|
||||
COLLISION = -0.5
|
||||
|
||||
|
||||
m = EnvActions
|
||||
@ -120,7 +120,7 @@ class ObservationTranslator:
|
||||
|
||||
def translate_observation(self, agent_idx: int, obs: np.ndarray):
|
||||
target_obs_space = self._per_agent_named_obs_space[agent_idx]
|
||||
translation = [idx_space_dict['explained_idxs'] for name, idx_space_dict in target_obs_space.items()]
|
||||
translation = [idx_space_dict for name, idx_space_dict in target_obs_space.items()]
|
||||
flat_translation = [x for y in translation for x in y]
|
||||
return np.take(obs, flat_translation, axis=1 if obs.ndim == 4 else 0)
|
||||
|
||||
|
Reference in New Issue
Block a user