Agents not smear Dirt

This commit is contained in:
steffen-illium
2021-07-13 17:12:01 +02:00
parent 01e7b752b8
commit 4841336e31
6 changed files with 47 additions and 31 deletions

View File

@ -10,7 +10,7 @@ from gym.wrappers import FrameStack
from environments.helpers import Constants as c, Constants
from environments import helpers as h
from environments.factory.base.objects import Slice, Agent, Tile, Action, MoveableEntity
from environments.factory.base.objects import Slice, Agent, Tile, Action
from environments.factory.base.registers import StateSlices, Actions, Entities, Agents, Doors, FloorTiles
from environments.utility_classes import MovementProperties
@ -85,9 +85,6 @@ class BaseFactory(gym.Env):
movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False,
omit_agent_slice_in_obs=False, done_at_collision=False, **kwargs):
assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
(not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
'Both options are exclusive'
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
# Attribute Assignment
@ -125,7 +122,7 @@ class BaseFactory(gym.Env):
# Doors
parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
doors = [Slice(c.DOORS.value, parsed_doors)] if parsed_doors.any() and self.parse_doors else []
doors = [Slice(c.DOORS.name, parsed_doors)] if parsed_doors.any() and self.parse_doors else []
# Agents
agents = []
@ -283,15 +280,17 @@ class BaseFactory(gym.Env):
obs = self._padded_obs_cube[:, x0:x1, y0:y1]
else:
obs = self._obs_cube
if self.omit_agent_slice_in_obs:
obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val]]
return obs_new
if self.combin_agent_slices_in_obs and self.n_agents >= 1:
agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name and
(not self.omit_agent_slice_in_obs and slice.name != agent.name)]],
axis=0, keepdims=True)
obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
return obs
else:
if self.combin_agent_slices_in_obs:
agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name]],
axis=0, keepdims=True)
obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
return obs
if self.omit_agent_slice_in_obs:
obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val.name]]
return obs_new
else:
return obs

View File

@ -196,7 +196,7 @@ class Door(Entity):
def encoding(self):
return 1 if self.is_closed else -1
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=500):
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10):
super(Door, self).__init__(*args)
self._state = c.IS_CLOSED_DOOR
self.auto_close_interval = auto_close_interval

View File

@ -39,7 +39,7 @@ class Renderer:
now = time.time()
self.font = pygame.font.Font(None, 20)
self.font.set_bold(1.0)
self.font.set_bold(1)
print('Loading System font with pygame.font.Font took', time.time() - now)
def fill_bg(self):

View File

@ -3,9 +3,8 @@ import random
import numpy as np
from environments import helpers as h
from environments.helpers import Constants as c
from environments import helpers as h
from environments.factory.base.base_factory import BaseFactory
from environments.factory.base.objects import Agent, Action, Object, Slice
from environments.factory.base.registers import Entities
@ -18,12 +17,13 @@ CLEAN_UP_ACTION = 'clean_up'
class DirtProperties(NamedTuple):
clean_amount: int = 2 # How much does the robot clean with one actions.
max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent.
gain_amount: float = 0.5 # How much dirt does spawn per tile
spawn_frequency: int = 5 # Spawn Frequency in Steps
max_local_amount: int = 1 # Max dirt amount per tile.
max_global_amount: int = 20 # Max dirt amount in the whole environment.
clean_amount: int = 1 # How much does the robot clean with one actions.
max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent.
gain_amount: float = 0.3 # How much dirt does spawn per tile
spawn_frequency: int = 5 # Spawn Frequency in Steps
max_local_amount: int = 2 # Max dirt amount per tile.
max_global_amount: int = 20 # Max dirt amount in the whole environment.
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place
# noinspection PyAttributeOutsideInit
@ -116,6 +116,17 @@ class SimpleFactory(BaseFactory):
return False
def post_step(self) -> dict:
if smear_amount := self.dirt_properties.dirt_smear_amount:
dirt_slice = self._slices.by_name(DIRT).slice
for agent in self._agents:
if agent.temp_valid and agent.last_pos != h.NO_POS:
if dirt := dirt_slice[agent.last_pos]:
if smeared_dirt := round(dirt * smear_amount, 2):
dirt_slice[agent.last_pos] = max(0, dirt_slice[agent.last_pos]-smeared_dirt)
dirt_slice[agent.pos] = min((self.dirt_properties.max_local_amount,
dirt_slice[agent.pos] + smeared_dirt)
)
if not self._next_dirt_spawn:
self.spawn_dirt()
self._next_dirt_spawn = self.dirt_properties.spawn_frequency
@ -170,6 +181,7 @@ class SimpleFactory(BaseFactory):
reward -= 0.01
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_action': 1})
info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
elif self._actions.is_moving_action(agent.temp_action):
@ -210,11 +222,11 @@ class SimpleFactory(BaseFactory):
if __name__ == '__main__':
render = False
render = True
move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True)
dirt_props = DirtProperties()
factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=10,
dirt_props = DirtProperties(dirt_smear_amount=0.2)
factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=1,
combin_agent_slices_in_obs=False, level_name='rooms', parse_doors=True,
pomdp_radius=3)

View File

@ -92,8 +92,9 @@ if __name__ == '__main__':
from algorithms.reg_dqn import RegDQN
# from sb3_contrib import QRDQN
dirt_props = DirtProperties(clean_amount=3, gain_amount=1, max_global_amount=30,
max_local_amount=5, spawn_frequency=3, max_spawn_ratio=0.05)
dirt_props = DirtProperties(clean_amount=1, gain_amount=0.3, max_global_amount=20,
max_local_amount=2, spawn_frequency=3, max_spawn_ratio=0.05,
dirt_smear_amount=0.2)
move_props = MovementProperties(allow_diagonal_movement=True,
allow_square_movement=True,
allow_no_op=False)
@ -106,7 +107,7 @@ if __name__ == '__main__':
with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=False,
movement_properties=move_props, level_name='rooms', frames_to_stack=4,
omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True, record_episodes=False
omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False
) as env:
if modeL_type.__name__ in ["PPO", "A2C"]:

View File

@ -14,13 +14,17 @@ warnings.filterwarnings('ignore', category=UserWarning)
if __name__ == '__main__':
model_name = 'PPO_1626075586'
model_name = 'A2C_1626103200'
run_id = 0
out_path = Path(__file__).parent / 'debug_out'
model_path = out_path / model_name
with (model_path / f'env_{model_name}.yaml').open('r') as f:
env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
env_kwargs.update(dirt_properties=DirtProperties(clean_amount=1, gain_amount=0.3, max_global_amount=20,
max_local_amount=2, spawn_frequency=5, max_spawn_ratio=0.05,
dirt_smear_amount=0.2),
combin_agent_slices_in_obs=True, omit_agent_slice_in_obs=True)
with SimpleFactory(**env_kwargs) as env:
# Edit THIS:
@ -28,5 +32,5 @@ if __name__ == '__main__':
this_model = model_files[0]
model = PPO.load(this_model)
evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=False, render=True)
evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=True, render=True)
print(evaluation_result)