Stable Baseline Running

This commit is contained in:
steffen-illium
2021-05-19 18:27:22 +02:00
parent 134f06b3d7
commit c1cb7a4ffc
3 changed files with 15 additions and 13 deletions

View File

@ -120,8 +120,8 @@ class SimpleFactory(BaseFactory):
dirty_tiles = len(np.nonzero(self.state[DIRT_INDEX]))
try:
this_step_reward = -(dirty_tiles / current_dirt_amount)
except ZeroDivisionError:
this_step_reward = (dirty_tiles / current_dirt_amount)
except (ZeroDivisionError, RuntimeWarning):
this_step_reward = 0
for agent_state in agent_states:
@ -136,6 +136,7 @@ class SimpleFactory(BaseFactory):
self.monitor.add(f'agent_{agent_state.i}_vs_{self.slice_strings[entity]}', 1)
self.monitor.set('dirt_amount', current_dirt_amount)
self.monitor.set('dirty_tiles', dirty_tiles)
print(f"reward is {this_step_reward}")
return this_step_reward, {}