Agents not smear Dirt
This commit is contained in:
@ -10,7 +10,7 @@ from gym.wrappers import FrameStack
|
|||||||
|
|
||||||
from environments.helpers import Constants as c, Constants
|
from environments.helpers import Constants as c, Constants
|
||||||
from environments import helpers as h
|
from environments import helpers as h
|
||||||
from environments.factory.base.objects import Slice, Agent, Tile, Action, MoveableEntity
|
from environments.factory.base.objects import Slice, Agent, Tile, Action
|
||||||
from environments.factory.base.registers import StateSlices, Actions, Entities, Agents, Doors, FloorTiles
|
from environments.factory.base.registers import StateSlices, Actions, Entities, Agents, Doors, FloorTiles
|
||||||
from environments.utility_classes import MovementProperties
|
from environments.utility_classes import MovementProperties
|
||||||
|
|
||||||
@ -85,9 +85,6 @@ class BaseFactory(gym.Env):
|
|||||||
movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
|
movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
|
||||||
combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False,
|
combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False,
|
||||||
omit_agent_slice_in_obs=False, done_at_collision=False, **kwargs):
|
omit_agent_slice_in_obs=False, done_at_collision=False, **kwargs):
|
||||||
assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
|
|
||||||
(not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
|
|
||||||
'Both options are exclusive'
|
|
||||||
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
|
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
|
||||||
|
|
||||||
# Attribute Assignment
|
# Attribute Assignment
|
||||||
@ -125,7 +122,7 @@ class BaseFactory(gym.Env):
|
|||||||
|
|
||||||
# Doors
|
# Doors
|
||||||
parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
|
parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
|
||||||
doors = [Slice(c.DOORS.value, parsed_doors)] if parsed_doors.any() and self.parse_doors else []
|
doors = [Slice(c.DOORS.name, parsed_doors)] if parsed_doors.any() and self.parse_doors else []
|
||||||
|
|
||||||
# Agents
|
# Agents
|
||||||
agents = []
|
agents = []
|
||||||
@ -283,15 +280,17 @@ class BaseFactory(gym.Env):
|
|||||||
obs = self._padded_obs_cube[:, x0:x1, y0:y1]
|
obs = self._padded_obs_cube[:, x0:x1, y0:y1]
|
||||||
else:
|
else:
|
||||||
obs = self._obs_cube
|
obs = self._obs_cube
|
||||||
if self.omit_agent_slice_in_obs:
|
|
||||||
obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val]]
|
if self.combin_agent_slices_in_obs and self.n_agents >= 1:
|
||||||
return obs_new
|
agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name and
|
||||||
else:
|
(not self.omit_agent_slice_in_obs and slice.name != agent.name)]],
|
||||||
if self.combin_agent_slices_in_obs:
|
|
||||||
agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name]],
|
|
||||||
axis=0, keepdims=True)
|
axis=0, keepdims=True)
|
||||||
obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
|
obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
|
||||||
return obs
|
return obs
|
||||||
|
else:
|
||||||
|
if self.omit_agent_slice_in_obs:
|
||||||
|
obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val.name]]
|
||||||
|
return obs_new
|
||||||
else:
|
else:
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
|
@ -196,7 +196,7 @@ class Door(Entity):
|
|||||||
def encoding(self):
|
def encoding(self):
|
||||||
return 1 if self.is_closed else -1
|
return 1 if self.is_closed else -1
|
||||||
|
|
||||||
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=500):
|
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10):
|
||||||
super(Door, self).__init__(*args)
|
super(Door, self).__init__(*args)
|
||||||
self._state = c.IS_CLOSED_DOOR
|
self._state = c.IS_CLOSED_DOOR
|
||||||
self.auto_close_interval = auto_close_interval
|
self.auto_close_interval = auto_close_interval
|
||||||
|
@ -39,7 +39,7 @@ class Renderer:
|
|||||||
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
self.font = pygame.font.Font(None, 20)
|
self.font = pygame.font.Font(None, 20)
|
||||||
self.font.set_bold(1.0)
|
self.font.set_bold(1)
|
||||||
print('Loading System font with pygame.font.Font took', time.time() - now)
|
print('Loading System font with pygame.font.Font took', time.time() - now)
|
||||||
|
|
||||||
def fill_bg(self):
|
def fill_bg(self):
|
||||||
|
@ -3,9 +3,8 @@ import random
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
from environments import helpers as h
|
|
||||||
from environments.helpers import Constants as c
|
from environments.helpers import Constants as c
|
||||||
|
from environments import helpers as h
|
||||||
from environments.factory.base.base_factory import BaseFactory
|
from environments.factory.base.base_factory import BaseFactory
|
||||||
from environments.factory.base.objects import Agent, Action, Object, Slice
|
from environments.factory.base.objects import Agent, Action, Object, Slice
|
||||||
from environments.factory.base.registers import Entities
|
from environments.factory.base.registers import Entities
|
||||||
@ -18,12 +17,13 @@ CLEAN_UP_ACTION = 'clean_up'
|
|||||||
|
|
||||||
|
|
||||||
class DirtProperties(NamedTuple):
|
class DirtProperties(NamedTuple):
|
||||||
clean_amount: int = 2 # How much does the robot clean with one actions.
|
clean_amount: int = 1 # How much does the robot clean with one actions.
|
||||||
max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent.
|
max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent.
|
||||||
gain_amount: float = 0.5 # How much dirt does spawn per tile
|
gain_amount: float = 0.3 # How much dirt does spawn per tile
|
||||||
spawn_frequency: int = 5 # Spawn Frequency in Steps
|
spawn_frequency: int = 5 # Spawn Frequency in Steps
|
||||||
max_local_amount: int = 1 # Max dirt amount per tile.
|
max_local_amount: int = 2 # Max dirt amount per tile.
|
||||||
max_global_amount: int = 20 # Max dirt amount in the whole environment.
|
max_global_amount: int = 20 # Max dirt amount in the whole environment.
|
||||||
|
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place
|
||||||
|
|
||||||
|
|
||||||
# noinspection PyAttributeOutsideInit
|
# noinspection PyAttributeOutsideInit
|
||||||
@ -116,6 +116,17 @@ class SimpleFactory(BaseFactory):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def post_step(self) -> dict:
|
def post_step(self) -> dict:
|
||||||
|
if smear_amount := self.dirt_properties.dirt_smear_amount:
|
||||||
|
dirt_slice = self._slices.by_name(DIRT).slice
|
||||||
|
for agent in self._agents:
|
||||||
|
if agent.temp_valid and agent.last_pos != h.NO_POS:
|
||||||
|
if dirt := dirt_slice[agent.last_pos]:
|
||||||
|
if smeared_dirt := round(dirt * smear_amount, 2):
|
||||||
|
dirt_slice[agent.last_pos] = max(0, dirt_slice[agent.last_pos]-smeared_dirt)
|
||||||
|
dirt_slice[agent.pos] = min((self.dirt_properties.max_local_amount,
|
||||||
|
dirt_slice[agent.pos] + smeared_dirt)
|
||||||
|
)
|
||||||
|
|
||||||
if not self._next_dirt_spawn:
|
if not self._next_dirt_spawn:
|
||||||
self.spawn_dirt()
|
self.spawn_dirt()
|
||||||
self._next_dirt_spawn = self.dirt_properties.spawn_frequency
|
self._next_dirt_spawn = self.dirt_properties.spawn_frequency
|
||||||
@ -170,6 +181,7 @@ class SimpleFactory(BaseFactory):
|
|||||||
reward -= 0.01
|
reward -= 0.01
|
||||||
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
|
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
|
||||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||||
|
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||||
info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
|
info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
|
||||||
|
|
||||||
elif self._actions.is_moving_action(agent.temp_action):
|
elif self._actions.is_moving_action(agent.temp_action):
|
||||||
@ -210,11 +222,11 @@ class SimpleFactory(BaseFactory):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
render = False
|
render = True
|
||||||
|
|
||||||
move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True)
|
move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True)
|
||||||
dirt_props = DirtProperties()
|
dirt_props = DirtProperties(dirt_smear_amount=0.2)
|
||||||
factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=10,
|
factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=1,
|
||||||
combin_agent_slices_in_obs=False, level_name='rooms', parse_doors=True,
|
combin_agent_slices_in_obs=False, level_name='rooms', parse_doors=True,
|
||||||
pomdp_radius=3)
|
pomdp_radius=3)
|
||||||
|
|
||||||
|
7
main.py
7
main.py
@ -92,8 +92,9 @@ if __name__ == '__main__':
|
|||||||
from algorithms.reg_dqn import RegDQN
|
from algorithms.reg_dqn import RegDQN
|
||||||
# from sb3_contrib import QRDQN
|
# from sb3_contrib import QRDQN
|
||||||
|
|
||||||
dirt_props = DirtProperties(clean_amount=3, gain_amount=1, max_global_amount=30,
|
dirt_props = DirtProperties(clean_amount=1, gain_amount=0.3, max_global_amount=20,
|
||||||
max_local_amount=5, spawn_frequency=3, max_spawn_ratio=0.05)
|
max_local_amount=2, spawn_frequency=3, max_spawn_ratio=0.05,
|
||||||
|
dirt_smear_amount=0.2)
|
||||||
move_props = MovementProperties(allow_diagonal_movement=True,
|
move_props = MovementProperties(allow_diagonal_movement=True,
|
||||||
allow_square_movement=True,
|
allow_square_movement=True,
|
||||||
allow_no_op=False)
|
allow_no_op=False)
|
||||||
@ -106,7 +107,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=False,
|
with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=False,
|
||||||
movement_properties=move_props, level_name='rooms', frames_to_stack=4,
|
movement_properties=move_props, level_name='rooms', frames_to_stack=4,
|
||||||
omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True, record_episodes=False
|
omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False
|
||||||
) as env:
|
) as env:
|
||||||
|
|
||||||
if modeL_type.__name__ in ["PPO", "A2C"]:
|
if modeL_type.__name__ in ["PPO", "A2C"]:
|
||||||
|
@ -14,13 +14,17 @@ warnings.filterwarnings('ignore', category=UserWarning)
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
model_name = 'PPO_1626075586'
|
model_name = 'A2C_1626103200'
|
||||||
run_id = 0
|
run_id = 0
|
||||||
out_path = Path(__file__).parent / 'debug_out'
|
out_path = Path(__file__).parent / 'debug_out'
|
||||||
model_path = out_path / model_name
|
model_path = out_path / model_name
|
||||||
|
|
||||||
with (model_path / f'env_{model_name}.yaml').open('r') as f:
|
with (model_path / f'env_{model_name}.yaml').open('r') as f:
|
||||||
env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
|
env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
|
||||||
|
env_kwargs.update(dirt_properties=DirtProperties(clean_amount=1, gain_amount=0.3, max_global_amount=20,
|
||||||
|
max_local_amount=2, spawn_frequency=5, max_spawn_ratio=0.05,
|
||||||
|
dirt_smear_amount=0.2),
|
||||||
|
combin_agent_slices_in_obs=True, omit_agent_slice_in_obs=True)
|
||||||
with SimpleFactory(**env_kwargs) as env:
|
with SimpleFactory(**env_kwargs) as env:
|
||||||
|
|
||||||
# Edit THIS:
|
# Edit THIS:
|
||||||
@ -28,5 +32,5 @@ if __name__ == '__main__':
|
|||||||
this_model = model_files[0]
|
this_model = model_files[0]
|
||||||
|
|
||||||
model = PPO.load(this_model)
|
model = PPO.load(this_model)
|
||||||
evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=False, render=True)
|
evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=True, render=True)
|
||||||
print(evaluation_result)
|
print(evaluation_result)
|
||||||
|
Reference in New Issue
Block a user