diff --git a/algorithms/_base.py b/algorithms/_base.py index 767087b..e8639fe 100644 --- a/algorithms/_base.py +++ b/algorithms/_base.py @@ -209,7 +209,7 @@ if __name__ == '__main__': allow_square_movement=True, allow_no_op=False) env = SimpleFactory(dirt_properties=dirt_props, movement_properties=move_props, n_agents=N_AGENTS, pomdp_radius=2, max_steps=400, omit_agent_slice_in_obs=False) - env = DummyVecEnv([lambda: env]) + # env = DummyVecEnv([lambda: env]) print(env) from stable_baselines3.dqn import DQN diff --git a/environments/factory/base_factory.py b/environments/factory/base_factory.py index c10ea68..09968a4 100644 --- a/environments/factory/base_factory.py +++ b/environments/factory/base_factory.py @@ -210,12 +210,13 @@ class BaseFactory(gym.Env): # Step the door close intervall agents_pos = [agent.pos for agent in self._agent_states] - for door_i, door in enumerate(self._door_states): - if door.is_open and door.time_to_close and door.pos not in agents_pos: - door.time_to_close -= 1 - elif door.is_open and not door.time_to_close and door.pos not in agents_pos: - door.use() - self._state[self._state_slices.by_name(h.DOORS)] = 1 if door.is_closed else -1 + if self.has_doors: + for door_i, door in enumerate(self._door_states): + if door.is_open and door.time_to_close and door.pos not in agents_pos: + door.time_to_close -= 1 + elif door.is_open and not door.time_to_close and door.pos not in agents_pos: + door.use() + self._state[self._state_slices.by_name(h.DOORS)] = 1 if door.is_closed else -1 reward, info = self.calculate_reward(self._agent_states)