pomdp
This commit is contained in:
@ -99,7 +99,8 @@ class SimpleFactory(BaseFactory):
|
||||
self.next_dirt_spawn = self._dirt_properties.spawn_frequency
|
||||
else:
|
||||
self.next_dirt_spawn -= 1
|
||||
return self.state, r, done, info
|
||||
obs = self._return_state()
|
||||
return obs, r, done, info
|
||||
|
||||
def do_additional_actions(self, agent_i: int, action: int) -> ((int, int), bool):
|
||||
if action != self._is_moving_action(action):
|
||||
@ -118,12 +119,14 @@ class SimpleFactory(BaseFactory):
|
||||
self.state = np.concatenate((self.state, dirt_slice)) # dirt is now the last slice
|
||||
self.spawn_dirt()
|
||||
self.next_dirt_spawn = self._dirt_properties.spawn_frequency
|
||||
return self.state
|
||||
obs = self._return_state()
|
||||
return obs
|
||||
|
||||
def calculate_reward(self, agent_states: List[AgentState]) -> (int, dict):
|
||||
# TODO: What reward to use?
|
||||
current_dirt_amount = self.state[DIRT_INDEX].sum()
|
||||
dirty_tiles = np.argwhere(self.state[DIRT_INDEX] != h.IS_FREE_CELL).shape[0]
|
||||
info_dict = dict()
|
||||
|
||||
try:
|
||||
# penalty = current_dirt_amount
|
||||
@ -143,33 +146,35 @@ class SimpleFactory(BaseFactory):
|
||||
if agent_state.action_valid:
|
||||
reward += 1
|
||||
self.print(f'Agent {agent_state.i} did just clean up some dirt at {agent_state.pos}.')
|
||||
self.monitor.set('dirt_cleaned', 1)
|
||||
info_dict.update(dirt_cleaned=1)
|
||||
else:
|
||||
reward -= 0.5
|
||||
reward -= 0.0
|
||||
self.print(f'Agent {agent_state.i} just tried to clean up some dirt '
|
||||
f'at {agent_state.pos}, but was unsucsessfull.')
|
||||
self.monitor.set('failed_cleanup_attempt', 1)
|
||||
info_dict.update(failed_cleanup_attempt=1)
|
||||
|
||||
elif self._is_moving_action(agent_state.action):
|
||||
if agent_state.action_valid:
|
||||
info_dict.update(movement=1)
|
||||
reward -= 0.00
|
||||
else:
|
||||
reward -= 0.5
|
||||
info_dict.update(collision=1)
|
||||
reward -= 0.00
|
||||
|
||||
else:
|
||||
self.monitor.set('no_op', 1)
|
||||
reward -= 0.1
|
||||
info_dict.update(collision=1)
|
||||
reward -= 0.00
|
||||
|
||||
for entity in cols:
|
||||
if entity != self.state_slices.by_name("dirt"):
|
||||
self.monitor.set(f'agent_{agent_state.i}_vs_{self.state_slices[entity]}', 1)
|
||||
info_dict.update({f'agent_{agent_state.i}_vs_{self.state_slices[entity]}': 1})
|
||||
|
||||
self.monitor.set('dirt_amount', current_dirt_amount)
|
||||
self.monitor.set('dirty_tile_count', dirty_tiles)
|
||||
info_dict.update(dirt_amount=current_dirt_amount)
|
||||
info_dict.update(dirty_tile_count=dirty_tiles)
|
||||
self.print(f"reward is {reward}")
|
||||
# Potential based rewards ->
|
||||
# track the last reward , minus the current reward = potential
|
||||
return reward, {}
|
||||
return reward, info_dict
|
||||
|
||||
def print(self, string):
|
||||
if self.verbose:
|
||||
|
Reference in New Issue
Block a user