From 3114cdffc36f10a8de56b4dd9025692b2bd8782c Mon Sep 17 00:00:00 2001 From: steffen-illium Date: Thu, 20 May 2021 09:58:10 +0200 Subject: [PATCH] reward, done -> fixed --- environments/factory/base_factory.py | 9 ++++----- environments/factory/simple_factory.py | 5 +++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/environments/factory/base_factory.py b/environments/factory/base_factory.py index 89dae4d..07ed3ea 100644 --- a/environments/factory/base_factory.py +++ b/environments/factory/base_factory.py @@ -68,9 +68,7 @@ class BaseFactory(gym.Env): raise NotImplementedError('Please register additional actions ') def reset(self) -> (np.ndarray, int, bool, dict): - self.done = False self.steps = 0 - self.cumulative_reward = 0 self.monitor = FactoryMonitor(self) self.agent_states = [] # Agent placement ... @@ -95,6 +93,7 @@ class BaseFactory(gym.Env): actions = [actions] if isinstance(actions, int) or np.isscalar(actions) else actions assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]' self.steps += 1 + done = False # Move this in a seperate function? agent_states = list() @@ -113,15 +112,15 @@ class BaseFactory(gym.Env): for i, collision_vec in enumerate(self.check_all_collisions(agent_states, self.state.shape[0])): agent_states[i].update(collision_vector=collision_vec) if self.done_at_collision and collision_vec.any(): - self.done = True + done = True self.agent_states = agent_states reward, info = self.calculate_reward(agent_states) if self.steps >= self.max_steps: - self.done = True + done = True self.monitor.add('step_reward', reward) - return self.state, reward, self.done, info + return self.state, reward, done, info def _is_moving_action(self, action): return action < self.movement_actions diff --git a/environments/factory/simple_factory.py b/environments/factory/simple_factory.py index 1d2a2e0..bb0e207 100644 --- a/environments/factory/simple_factory.py +++ b/environments/factory/simple_factory.py @@ -82,16 +82,17 @@ class SimpleFactory(BaseFactory): return pos, cleanup_was_sucessfull def step(self, actions): + # TODO: For debugging only!!!! Remove at times..... if self.state[h.LEVEL_IDX][self.agent_i_position(0)] == h.IS_OCCUPIED_CELL: print(f'fAgent placed on wall!!!!, step is :{self.steps}') raise Exception('Agent placed on wall!!!!') - _, _, _, info = super(SimpleFactory, self).step(actions) + _, r, done, info = super(SimpleFactory, self).step(actions) if not self.next_dirt_spawn: self.spawn_dirt() self.next_dirt_spawn = self._dirt_properties.spawn_frequency else: self.next_dirt_spawn -= 1 - return self.state, self.cumulative_reward, self.done, info + return self.state, r, done, info def additional_actions(self, agent_i: int, action: int) -> ((int, int), bool): if action != self._is_moving_action(action):