Agents not smear Dirt

This commit is contained in:
steffen-illium
2021-07-13 17:12:01 +02:00
parent 01e7b752b8
commit 4841336e31
6 changed files with 47 additions and 31 deletions

View File

@ -10,7 +10,7 @@ from gym.wrappers import FrameStack
from environments.helpers import Constants as c, Constants from environments.helpers import Constants as c, Constants
from environments import helpers as h from environments import helpers as h
from environments.factory.base.objects import Slice, Agent, Tile, Action, MoveableEntity from environments.factory.base.objects import Slice, Agent, Tile, Action
from environments.factory.base.registers import StateSlices, Actions, Entities, Agents, Doors, FloorTiles from environments.factory.base.registers import StateSlices, Actions, Entities, Agents, Doors, FloorTiles
from environments.utility_classes import MovementProperties from environments.utility_classes import MovementProperties
@ -85,9 +85,6 @@ class BaseFactory(gym.Env):
movement_properties: MovementProperties = MovementProperties(), parse_doors=False, movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False, combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False,
omit_agent_slice_in_obs=False, done_at_collision=False, **kwargs): omit_agent_slice_in_obs=False, done_at_collision=False, **kwargs):
assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
(not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
'Both options are exclusive'
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1." assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
# Attribute Assignment # Attribute Assignment
@ -125,7 +122,7 @@ class BaseFactory(gym.Env):
# Doors # Doors
parsed_doors = h.one_hot_level(parsed_level, c.DOOR) parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
doors = [Slice(c.DOORS.value, parsed_doors)] if parsed_doors.any() and self.parse_doors else [] doors = [Slice(c.DOORS.name, parsed_doors)] if parsed_doors.any() and self.parse_doors else []
# Agents # Agents
agents = [] agents = []
@ -283,15 +280,17 @@ class BaseFactory(gym.Env):
obs = self._padded_obs_cube[:, x0:x1, y0:y1] obs = self._padded_obs_cube[:, x0:x1, y0:y1]
else: else:
obs = self._obs_cube obs = self._obs_cube
if self.omit_agent_slice_in_obs:
obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val]] if self.combin_agent_slices_in_obs and self.n_agents >= 1:
return obs_new agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name and
(not self.omit_agent_slice_in_obs and slice.name != agent.name)]],
axis=0, keepdims=True)
obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
return obs
else: else:
if self.combin_agent_slices_in_obs: if self.omit_agent_slice_in_obs:
agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name]], obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val.name]]
axis=0, keepdims=True) return obs_new
obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
return obs
else: else:
return obs return obs

View File

@ -196,7 +196,7 @@ class Door(Entity):
def encoding(self): def encoding(self):
return 1 if self.is_closed else -1 return 1 if self.is_closed else -1
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=500): def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10):
super(Door, self).__init__(*args) super(Door, self).__init__(*args)
self._state = c.IS_CLOSED_DOOR self._state = c.IS_CLOSED_DOOR
self.auto_close_interval = auto_close_interval self.auto_close_interval = auto_close_interval

View File

@ -39,7 +39,7 @@ class Renderer:
now = time.time() now = time.time()
self.font = pygame.font.Font(None, 20) self.font = pygame.font.Font(None, 20)
self.font.set_bold(1.0) self.font.set_bold(1)
print('Loading System font with pygame.font.Font took', time.time() - now) print('Loading System font with pygame.font.Font took', time.time() - now)
def fill_bg(self): def fill_bg(self):

View File

@ -3,9 +3,8 @@ import random
import numpy as np import numpy as np
from environments import helpers as h
from environments.helpers import Constants as c from environments.helpers import Constants as c
from environments import helpers as h
from environments.factory.base.base_factory import BaseFactory from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action, Object, Slice from environments.factory.base.objects import Agent, Action, Object, Slice
from environments.factory.base.registers import Entities from environments.factory.base.registers import Entities
@ -18,12 +17,13 @@ CLEAN_UP_ACTION = 'clean_up'
class DirtProperties(NamedTuple): class DirtProperties(NamedTuple):
clean_amount: int = 2 # How much does the robot clean with one actions. clean_amount: int = 1 # How much does the robot clean with one actions.
max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent. max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent.
gain_amount: float = 0.5 # How much dirt does spawn per tile gain_amount: float = 0.3 # How much dirt does spawn per tile
spawn_frequency: int = 5 # Spawn Frequency in Steps spawn_frequency: int = 5 # Spawn Frequency in Steps
max_local_amount: int = 1 # Max dirt amount per tile. max_local_amount: int = 2 # Max dirt amount per tile.
max_global_amount: int = 20 # Max dirt amount in the whole environment. max_global_amount: int = 20 # Max dirt amount in the whole environment.
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place
# noinspection PyAttributeOutsideInit # noinspection PyAttributeOutsideInit
@ -116,6 +116,17 @@ class SimpleFactory(BaseFactory):
return False return False
def post_step(self) -> dict: def post_step(self) -> dict:
if smear_amount := self.dirt_properties.dirt_smear_amount:
dirt_slice = self._slices.by_name(DIRT).slice
for agent in self._agents:
if agent.temp_valid and agent.last_pos != h.NO_POS:
if dirt := dirt_slice[agent.last_pos]:
if smeared_dirt := round(dirt * smear_amount, 2):
dirt_slice[agent.last_pos] = max(0, dirt_slice[agent.last_pos]-smeared_dirt)
dirt_slice[agent.pos] = min((self.dirt_properties.max_local_amount,
dirt_slice[agent.pos] + smeared_dirt)
)
if not self._next_dirt_spawn: if not self._next_dirt_spawn:
self.spawn_dirt() self.spawn_dirt()
self._next_dirt_spawn = self.dirt_properties.spawn_frequency self._next_dirt_spawn = self.dirt_properties.spawn_frequency
@ -170,6 +181,7 @@ class SimpleFactory(BaseFactory):
reward -= 0.01 reward -= 0.01
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.') self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1}) info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1}) info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
elif self._actions.is_moving_action(agent.temp_action): elif self._actions.is_moving_action(agent.temp_action):
@ -210,11 +222,11 @@ class SimpleFactory(BaseFactory):
if __name__ == '__main__': if __name__ == '__main__':
render = False render = True
move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True) move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True)
dirt_props = DirtProperties() dirt_props = DirtProperties(dirt_smear_amount=0.2)
factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=10, factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=1,
combin_agent_slices_in_obs=False, level_name='rooms', parse_doors=True, combin_agent_slices_in_obs=False, level_name='rooms', parse_doors=True,
pomdp_radius=3) pomdp_radius=3)

View File

@ -92,8 +92,9 @@ if __name__ == '__main__':
from algorithms.reg_dqn import RegDQN from algorithms.reg_dqn import RegDQN
# from sb3_contrib import QRDQN # from sb3_contrib import QRDQN
dirt_props = DirtProperties(clean_amount=3, gain_amount=1, max_global_amount=30, dirt_props = DirtProperties(clean_amount=1, gain_amount=0.3, max_global_amount=20,
max_local_amount=5, spawn_frequency=3, max_spawn_ratio=0.05) max_local_amount=2, spawn_frequency=3, max_spawn_ratio=0.05,
dirt_smear_amount=0.2)
move_props = MovementProperties(allow_diagonal_movement=True, move_props = MovementProperties(allow_diagonal_movement=True,
allow_square_movement=True, allow_square_movement=True,
allow_no_op=False) allow_no_op=False)
@ -106,7 +107,7 @@ if __name__ == '__main__':
with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=False, with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=False,
movement_properties=move_props, level_name='rooms', frames_to_stack=4, movement_properties=move_props, level_name='rooms', frames_to_stack=4,
omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True, record_episodes=False omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False
) as env: ) as env:
if modeL_type.__name__ in ["PPO", "A2C"]: if modeL_type.__name__ in ["PPO", "A2C"]:

View File

@ -14,13 +14,17 @@ warnings.filterwarnings('ignore', category=UserWarning)
if __name__ == '__main__': if __name__ == '__main__':
model_name = 'PPO_1626075586' model_name = 'A2C_1626103200'
run_id = 0 run_id = 0
out_path = Path(__file__).parent / 'debug_out' out_path = Path(__file__).parent / 'debug_out'
model_path = out_path / model_name model_path = out_path / model_name
with (model_path / f'env_{model_name}.yaml').open('r') as f: with (model_path / f'env_{model_name}.yaml').open('r') as f:
env_kwargs = yaml.load(f, Loader=yaml.FullLoader) env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
env_kwargs.update(dirt_properties=DirtProperties(clean_amount=1, gain_amount=0.3, max_global_amount=20,
max_local_amount=2, spawn_frequency=5, max_spawn_ratio=0.05,
dirt_smear_amount=0.2),
combin_agent_slices_in_obs=True, omit_agent_slice_in_obs=True)
with SimpleFactory(**env_kwargs) as env: with SimpleFactory(**env_kwargs) as env:
# Edit THIS: # Edit THIS:
@ -28,5 +32,5 @@ if __name__ == '__main__':
this_model = model_files[0] this_model = model_files[0]
model = PPO.load(this_model) model = PPO.load(this_model)
evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=False, render=True) evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=True, render=True)
print(evaluation_result) print(evaluation_result)