Individual Rewards

This commit is contained in:
Steffen Illium
2021-11-16 12:14:11 +01:00
parent b6bda84033
commit 0fe90f3ac0
11 changed files with 130 additions and 108 deletions

View File

@ -61,7 +61,8 @@ class BaseFactory(gym.Env):
mv_prop: MovementProperties = MovementProperties(),
obs_prop: ObservationProperties = ObservationProperties(),
parse_doors=False, record_episodes=False, done_at_collision=False,
verbose=False, doors_have_area=True, env_seed=time.time_ns(), **kwargs):
verbose=False, doors_have_area=True, env_seed=time.time_ns(), individual_rewards=False,
**kwargs):
if isinstance(mv_prop, dict):
mv_prop = MovementProperties(**mv_prop)
@ -94,6 +95,7 @@ class BaseFactory(gym.Env):
self.record_episodes = record_episodes
self.parse_doors = parse_doors
self.doors_have_area = doors_have_area
self.individual_rewards = individual_rewards
# Reset
self.reset()
@ -487,31 +489,32 @@ class BaseFactory(gym.Env):
def calculate_reward(self) -> (int, dict):
# Returns: Reward, Info
per_agent_info_dict = defaultdict(dict)
reward = 0
reward = {}
for agent in self[c.AGENT]:
per_agent_reward = 0
if self._actions.is_moving_action(agent.temp_action):
if agent.temp_valid:
# info_dict.update(movement=1)
reward -= 0.01
per_agent_reward -= 0.01
pass
else:
reward -= 0.05
per_agent_reward -= 0.05
self.print(f'{agent.name} just hit the wall at {agent.pos}.')
per_agent_info_dict[agent.name].update({f'{agent.name}_vs_LEVEL': 1})
elif h.EnvActions.USE_DOOR == agent.temp_action:
if agent.temp_valid:
# reward += 0.00
# per_agent_reward += 0.00
self.print(f'{agent.name} did just use the door at {agent.pos}.')
per_agent_info_dict[agent.name].update(door_used=1)
else:
# reward -= 0.00
# per_agent_reward -= 0.00
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
per_agent_info_dict[agent.name].update({f'{agent.name}_failed_door_open': 1})
elif h.EnvActions.NOOP == agent.temp_action:
per_agent_info_dict[agent.name].update(no_op=1)
# reward -= 0.00
# per_agent_reward -= 0.00
# Monitor Notes
if agent.temp_valid:
@ -522,7 +525,7 @@ class BaseFactory(gym.Env):
per_agent_info_dict[agent.name].update({f'{agent.name}_failed_action': 1})
additional_reward, additional_info_dict = self.calculate_additional_reward(agent)
reward += additional_reward
per_agent_reward += additional_reward
per_agent_info_dict[agent.name].update(additional_info_dict)
if agent.temp_collisions:
@ -531,6 +534,7 @@ class BaseFactory(gym.Env):
for other_agent in agent.temp_collisions:
per_agent_info_dict[agent.name].update({f'{agent.name}_vs_{other_agent.name}': 1})
reward[agent.name] = per_agent_reward
# Combine the per_agent_info_dict:
combined_info_dict = defaultdict(lambda: 0)
@ -539,7 +543,13 @@ class BaseFactory(gym.Env):
combined_info_dict[key] += value
combined_info_dict = dict(combined_info_dict)
self.print(f"reward is {reward}")
if self.individual_rewards:
self.print(f"rewards are {reward}")
reward = list(reward.values())
return reward, combined_info_dict
else:
reward = sum(reward.values())
self.print(f"reward is {reward}")
return reward, combined_info_dict
def render(self, mode='human'):