Agents not smear Dirt
This commit is contained in:
@ -10,7 +10,7 @@ from gym.wrappers import FrameStack
|
||||
|
||||
from environments.helpers import Constants as c, Constants
|
||||
from environments import helpers as h
|
||||
from environments.factory.base.objects import Slice, Agent, Tile, Action, MoveableEntity
|
||||
from environments.factory.base.objects import Slice, Agent, Tile, Action
|
||||
from environments.factory.base.registers import StateSlices, Actions, Entities, Agents, Doors, FloorTiles
|
||||
from environments.utility_classes import MovementProperties
|
||||
|
||||
@ -85,9 +85,6 @@ class BaseFactory(gym.Env):
|
||||
movement_properties: MovementProperties = MovementProperties(), parse_doors=False,
|
||||
combin_agent_slices_in_obs: bool = False, frames_to_stack=0, record_episodes=False,
|
||||
omit_agent_slice_in_obs=False, done_at_collision=False, **kwargs):
|
||||
assert (combin_agent_slices_in_obs != omit_agent_slice_in_obs) or \
|
||||
(not combin_agent_slices_in_obs and not omit_agent_slice_in_obs), \
|
||||
'Both options are exclusive'
|
||||
assert frames_to_stack != 1 and frames_to_stack >= 0, "'frames_to_stack' cannot be negative or 1."
|
||||
|
||||
# Attribute Assignment
|
||||
@ -125,7 +122,7 @@ class BaseFactory(gym.Env):
|
||||
|
||||
# Doors
|
||||
parsed_doors = h.one_hot_level(parsed_level, c.DOOR)
|
||||
doors = [Slice(c.DOORS.value, parsed_doors)] if parsed_doors.any() and self.parse_doors else []
|
||||
doors = [Slice(c.DOORS.name, parsed_doors)] if parsed_doors.any() and self.parse_doors else []
|
||||
|
||||
# Agents
|
||||
agents = []
|
||||
@ -283,15 +280,17 @@ class BaseFactory(gym.Env):
|
||||
obs = self._padded_obs_cube[:, x0:x1, y0:y1]
|
||||
else:
|
||||
obs = self._obs_cube
|
||||
if self.omit_agent_slice_in_obs:
|
||||
obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val]]
|
||||
return obs_new
|
||||
else:
|
||||
if self.combin_agent_slices_in_obs:
|
||||
agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name]],
|
||||
|
||||
if self.combin_agent_slices_in_obs and self.n_agents >= 1:
|
||||
agent_obs = np.sum(obs[[key for key, slice in self._slices.items() if c.AGENT.name in slice.name and
|
||||
(not self.omit_agent_slice_in_obs and slice.name != agent.name)]],
|
||||
axis=0, keepdims=True)
|
||||
obs = np.concatenate((obs[:first_agent_slice], agent_obs, obs[first_agent_slice+self.n_agents:]))
|
||||
return obs
|
||||
else:
|
||||
if self.omit_agent_slice_in_obs:
|
||||
obs_new = obs[[key for key, val in self._slices.items() if c.AGENT.value not in val.name]]
|
||||
return obs_new
|
||||
else:
|
||||
return obs
|
||||
|
||||
|
@ -196,7 +196,7 @@ class Door(Entity):
|
||||
def encoding(self):
|
||||
return 1 if self.is_closed else -1
|
||||
|
||||
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=500):
|
||||
def __init__(self, *args, context, closed_on_init=True, auto_close_interval=10):
|
||||
super(Door, self).__init__(*args)
|
||||
self._state = c.IS_CLOSED_DOOR
|
||||
self.auto_close_interval = auto_close_interval
|
||||
|
@ -39,7 +39,7 @@ class Renderer:
|
||||
|
||||
now = time.time()
|
||||
self.font = pygame.font.Font(None, 20)
|
||||
self.font.set_bold(1.0)
|
||||
self.font.set_bold(1)
|
||||
print('Loading System font with pygame.font.Font took', time.time() - now)
|
||||
|
||||
def fill_bg(self):
|
||||
|
@ -3,9 +3,8 @@ import random
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
from environments import helpers as h
|
||||
from environments.helpers import Constants as c
|
||||
from environments import helpers as h
|
||||
from environments.factory.base.base_factory import BaseFactory
|
||||
from environments.factory.base.objects import Agent, Action, Object, Slice
|
||||
from environments.factory.base.registers import Entities
|
||||
@ -18,12 +17,13 @@ CLEAN_UP_ACTION = 'clean_up'
|
||||
|
||||
|
||||
class DirtProperties(NamedTuple):
|
||||
clean_amount: int = 2 # How much does the robot clean with one actions.
|
||||
clean_amount: int = 1 # How much does the robot clean with one actions.
|
||||
max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent.
|
||||
gain_amount: float = 0.5 # How much dirt does spawn per tile
|
||||
gain_amount: float = 0.3 # How much dirt does spawn per tile
|
||||
spawn_frequency: int = 5 # Spawn Frequency in Steps
|
||||
max_local_amount: int = 1 # Max dirt amount per tile.
|
||||
max_local_amount: int = 2 # Max dirt amount per tile.
|
||||
max_global_amount: int = 20 # Max dirt amount in the whole environment.
|
||||
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit
|
||||
@ -116,6 +116,17 @@ class SimpleFactory(BaseFactory):
|
||||
return False
|
||||
|
||||
def post_step(self) -> dict:
|
||||
if smear_amount := self.dirt_properties.dirt_smear_amount:
|
||||
dirt_slice = self._slices.by_name(DIRT).slice
|
||||
for agent in self._agents:
|
||||
if agent.temp_valid and agent.last_pos != h.NO_POS:
|
||||
if dirt := dirt_slice[agent.last_pos]:
|
||||
if smeared_dirt := round(dirt * smear_amount, 2):
|
||||
dirt_slice[agent.last_pos] = max(0, dirt_slice[agent.last_pos]-smeared_dirt)
|
||||
dirt_slice[agent.pos] = min((self.dirt_properties.max_local_amount,
|
||||
dirt_slice[agent.pos] + smeared_dirt)
|
||||
)
|
||||
|
||||
if not self._next_dirt_spawn:
|
||||
self.spawn_dirt()
|
||||
self._next_dirt_spawn = self.dirt_properties.spawn_frequency
|
||||
@ -170,6 +181,7 @@ class SimpleFactory(BaseFactory):
|
||||
reward -= 0.01
|
||||
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
|
||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||
info_dict.update({f'{agent.name}_failed_dirt_cleanup': 1})
|
||||
|
||||
elif self._actions.is_moving_action(agent.temp_action):
|
||||
@ -210,11 +222,11 @@ class SimpleFactory(BaseFactory):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
render = False
|
||||
render = True
|
||||
|
||||
move_props = MovementProperties(allow_diagonal_movement=True, allow_square_movement=True)
|
||||
dirt_props = DirtProperties()
|
||||
factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=10,
|
||||
dirt_props = DirtProperties(dirt_smear_amount=0.2)
|
||||
factory = SimpleFactory(movement_properties=move_props, dirt_properties=dirt_props, n_agents=1,
|
||||
combin_agent_slices_in_obs=False, level_name='rooms', parse_doors=True,
|
||||
pomdp_radius=3)
|
||||
|
||||
|
7
main.py
7
main.py
@ -92,8 +92,9 @@ if __name__ == '__main__':
|
||||
from algorithms.reg_dqn import RegDQN
|
||||
# from sb3_contrib import QRDQN
|
||||
|
||||
dirt_props = DirtProperties(clean_amount=3, gain_amount=1, max_global_amount=30,
|
||||
max_local_amount=5, spawn_frequency=3, max_spawn_ratio=0.05)
|
||||
dirt_props = DirtProperties(clean_amount=1, gain_amount=0.3, max_global_amount=20,
|
||||
max_local_amount=2, spawn_frequency=3, max_spawn_ratio=0.05,
|
||||
dirt_smear_amount=0.2)
|
||||
move_props = MovementProperties(allow_diagonal_movement=True,
|
||||
allow_square_movement=True,
|
||||
allow_no_op=False)
|
||||
@ -106,7 +107,7 @@ if __name__ == '__main__':
|
||||
|
||||
with SimpleFactory(n_agents=1, dirt_properties=dirt_props, pomdp_radius=2, max_steps=400, parse_doors=False,
|
||||
movement_properties=move_props, level_name='rooms', frames_to_stack=4,
|
||||
omit_agent_slice_in_obs=False, combin_agent_slices_in_obs=True, record_episodes=False
|
||||
omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False
|
||||
) as env:
|
||||
|
||||
if modeL_type.__name__ in ["PPO", "A2C"]:
|
||||
|
@ -14,13 +14,17 @@ warnings.filterwarnings('ignore', category=UserWarning)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
model_name = 'PPO_1626075586'
|
||||
model_name = 'A2C_1626103200'
|
||||
run_id = 0
|
||||
out_path = Path(__file__).parent / 'debug_out'
|
||||
model_path = out_path / model_name
|
||||
|
||||
with (model_path / f'env_{model_name}.yaml').open('r') as f:
|
||||
env_kwargs = yaml.load(f, Loader=yaml.FullLoader)
|
||||
env_kwargs.update(dirt_properties=DirtProperties(clean_amount=1, gain_amount=0.3, max_global_amount=20,
|
||||
max_local_amount=2, spawn_frequency=5, max_spawn_ratio=0.05,
|
||||
dirt_smear_amount=0.2),
|
||||
combin_agent_slices_in_obs=True, omit_agent_slice_in_obs=True)
|
||||
with SimpleFactory(**env_kwargs) as env:
|
||||
|
||||
# Edit THIS:
|
||||
@ -28,5 +32,5 @@ if __name__ == '__main__':
|
||||
this_model = model_files[0]
|
||||
|
||||
model = PPO.load(this_model)
|
||||
evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=False, render=True)
|
||||
evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=True, render=True)
|
||||
print(evaluation_result)
|
||||
|
Reference in New Issue
Block a user