Smaller fixes, now running.
This commit is contained in:
parent
84eb381307
commit
d0e1175ff1
@ -3,23 +3,24 @@ import time
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import List, Union, Iterable, Dict
|
||||
import numpy as np
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
from gym import spaces
|
||||
|
||||
import yaml
|
||||
from gym.wrappers import FrameStack
|
||||
|
||||
from environments.factory.base.shadow_casting import Map
|
||||
from environments.factory.renderer import Renderer, RenderEntity
|
||||
from environments.helpers import Constants as c, Constants
|
||||
from environments import helpers as h
|
||||
from environments.factory.base.objects import Agent, Tile, Action
|
||||
from environments.factory.base.objects import Agent, Tile, Action, Wall
|
||||
from environments.factory.base.registers import Actions, Entities, Agents, Doors, FloorTiles, WallTiles
|
||||
from environments.utility_classes import MovementProperties
|
||||
|
||||
REC_TAC = 'rec'
|
||||
import simplejson
|
||||
|
||||
|
||||
REC_TAC = 'rec_'
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit
|
||||
@ -67,6 +68,8 @@ class BaseFactory(gym.Env):
|
||||
self.env_seed = env_seed
|
||||
self.seed(env_seed)
|
||||
self._base_rng = np.random.default_rng(self.env_seed)
|
||||
if isinstance(movement_properties, dict):
|
||||
movement_properties = MovementProperties(**movement_properties)
|
||||
self.movement_properties = movement_properties
|
||||
self.level_name = level_name
|
||||
self._level_shape = None
|
||||
@ -118,7 +121,7 @@ class BaseFactory(gym.Env):
|
||||
entities.update({c.FLOOR: floor})
|
||||
|
||||
# NOPOS
|
||||
self.NO_POS_TILE = Tile(c.NO_POS.value)
|
||||
self._NO_POS_TILE = Tile(c.NO_POS.value)
|
||||
|
||||
# Doors
|
||||
if self.parse_doors:
|
||||
@ -175,7 +178,7 @@ class BaseFactory(gym.Env):
|
||||
|
||||
def step(self, actions):
|
||||
|
||||
if self.n_agents == 1:
|
||||
if self.n_agents == 1 and not isinstance(actions, list):
|
||||
actions = [int(actions)]
|
||||
|
||||
assert isinstance(actions, Iterable), f'"actions" has to be in [{int, list}]'
|
||||
@ -470,16 +473,16 @@ class BaseFactory(gym.Env):
|
||||
d = {key: val for key, val in self.__dict__.items() if not key.startswith('_') and not key.startswith('__')}
|
||||
filepath.parent.mkdir(parents=True, exist_ok=True)
|
||||
with filepath.open('w') as f:
|
||||
yaml.dump(d, f)
|
||||
# pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
simplejson.dump(d, f, indent=4, namedtuple_as_object=True)
|
||||
|
||||
def _summarize_state(self):
|
||||
summary = {f'{REC_TAC}_step': self._steps}
|
||||
summary = {f'{REC_TAC}step': self._steps}
|
||||
|
||||
self[c.WALLS].summarize_state()
|
||||
for entity in self._entities:
|
||||
if hasattr(entity, 'summarize_state'):
|
||||
summary.update({f'{REC_TAC}_{entity.name}': entity.summarize_state()})
|
||||
if self._steps == 0:
|
||||
summary.update({f'{REC_TAC}{self[c.WALLS].name}': {self[c.WALLS].summarize_states()}})
|
||||
for entity_group in self._entities:
|
||||
if not isinstance(entity_group, WallTiles):
|
||||
summary.update({f'{REC_TAC}{entity_group.name}': entity_group.summarize_states()})
|
||||
return summary
|
||||
|
||||
def print(self, string):
|
||||
|
@ -124,6 +124,9 @@ class Tile(Object):
|
||||
def __repr__(self):
|
||||
return f'{self.name}(@{self.pos})'
|
||||
|
||||
def summarize_state(self):
|
||||
return dict(name=self.name, x=self.x, y=self.y)
|
||||
|
||||
|
||||
class Wall(Tile):
|
||||
pass
|
||||
@ -160,8 +163,9 @@ class Entity(Object):
|
||||
self._tile = tile
|
||||
tile.enter(self)
|
||||
|
||||
def summarize_state(self):
|
||||
return self.__dict__.copy()
|
||||
def summarize_state(self) -> dict:
|
||||
return dict(name=str(self.name), x=int(self.x), y=int(self.y),
|
||||
tile=str(self.tile.name), can_collide=bool(self.can_collide))
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.name}(@{self.pos})'
|
||||
@ -180,6 +184,10 @@ class Door(Entity):
|
||||
def encoding(self):
|
||||
return 1 if self.is_closed else 0.5
|
||||
|
||||
@property
|
||||
def str_state(self):
|
||||
return 'open' if self.is_open else 'closed'
|
||||
|
||||
@property
|
||||
def access_area(self):
|
||||
return [node for node in self.connectivity.nodes
|
||||
@ -206,6 +214,11 @@ class Door(Entity):
|
||||
if not closed_on_init:
|
||||
self._open()
|
||||
|
||||
def summarize_state(self):
|
||||
state_dict = super().summarize_state()
|
||||
state_dict.update(state=str(self.str_state), time_to_close=int(self.time_to_close))
|
||||
return state_dict
|
||||
|
||||
@property
|
||||
def is_closed(self):
|
||||
return self._state == c.CLOSED_DOOR
|
||||
@ -296,3 +309,8 @@ class Agent(MoveableEntity):
|
||||
self.temp_valid = None
|
||||
self.temp_action = None
|
||||
self.temp_light_map = None
|
||||
|
||||
def summarize_state(self):
|
||||
state_dict = super().summarize_state()
|
||||
state_dict.update(valid=bool(self.temp_valid), action=str(self.temp_action))
|
||||
return state_dict
|
||||
|
@ -15,7 +15,7 @@ class Register:
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.__class__.__name__
|
||||
return f'{self.__class__.__name__}'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._register = dict()
|
||||
@ -78,6 +78,9 @@ class ObjectRegister(Register):
|
||||
if self.individual_slices:
|
||||
self._array = np.concatenate((self._array, np.zeros((1, *self._level_shape))))
|
||||
|
||||
def summarize_states(self):
|
||||
return [val.summarize_state() for val in self.values()]
|
||||
|
||||
|
||||
class EntityObjectRegister(ObjectRegister, ABC):
|
||||
|
||||
@ -154,8 +157,8 @@ class Entities(Register):
|
||||
def __init__(self):
|
||||
super(Entities, self).__init__()
|
||||
|
||||
def __iter__(self):
|
||||
return iter([x for sublist in self.values() for x in sublist])
|
||||
def iter_individual_entitites(self):
|
||||
return iter((x for sublist in self.values() for x in sublist))
|
||||
|
||||
def register_item(self, other: dict):
|
||||
assert not any([key for key in other.keys() if key in self.keys()]), \
|
||||
|
@ -167,6 +167,8 @@ class ItemProperties(NamedTuple):
|
||||
class DoubleTaskFactory(SimpleFactory):
|
||||
# noinspection PyMissingConstructor
|
||||
def __init__(self, item_properties: ItemProperties, *args, env_seed=time.time_ns(), **kwargs):
|
||||
if isinstance(item_properties, dict):
|
||||
item_properties = ItemProperties(**item_properties)
|
||||
self.item_properties = item_properties
|
||||
kwargs.update(env_seed=env_seed)
|
||||
self._item_rng = np.random.default_rng(env_seed)
|
||||
@ -210,7 +212,7 @@ class DoubleTaskFactory(SimpleFactory):
|
||||
elif item := self[c.ITEM].by_pos(agent.pos):
|
||||
try:
|
||||
inventory.append(item)
|
||||
item.move(self.NO_POS_TILE)
|
||||
item.move(self._NO_POS_TILE)
|
||||
return c.VALID
|
||||
except RuntimeError:
|
||||
return c.NOT_VALID
|
||||
|
@ -1,5 +1,6 @@
|
||||
import time
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import List, Union, NamedTuple, Dict
|
||||
import random
|
||||
|
||||
@ -12,7 +13,7 @@ from environments.factory.base.objects import Agent, Action, Entity, Tile
|
||||
from environments.factory.base.registers import Entities, MovingEntityObjectRegister
|
||||
|
||||
from environments.factory.renderer import RenderEntity
|
||||
from environments.utility_classes import MovementProperties
|
||||
from environments.logging.recorder import RecorderCallback
|
||||
|
||||
|
||||
CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP
|
||||
@ -50,6 +51,11 @@ class Dirt(Entity):
|
||||
def set_new_amount(self, amount):
|
||||
self._amount = amount
|
||||
|
||||
def summarize_state(self):
|
||||
state_dict = super().summarize_state()
|
||||
state_dict.update(amount=float(self.amount))
|
||||
return state_dict
|
||||
|
||||
|
||||
class DirtRegister(MovingEntityObjectRegister):
|
||||
|
||||
@ -127,6 +133,8 @@ class SimpleFactory(BaseFactory):
|
||||
return super_entities
|
||||
|
||||
def __init__(self, *args, dirt_properties: DirtProperties = DirtProperties(), env_seed=time.time_ns(), **kwargs):
|
||||
if isinstance(dirt_properties, dict):
|
||||
dirt_properties = DirtProperties(**dirt_properties)
|
||||
self.dirt_properties = dirt_properties
|
||||
self._dirt_rng = np.random.default_rng(env_seed)
|
||||
self._dirt: DirtRegister
|
||||
@ -235,30 +243,41 @@ class SimpleFactory(BaseFactory):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
render = True
|
||||
render = False
|
||||
|
||||
dirt_props = DirtProperties(1, 0.05, 0.1, 3, 1, 20, 0.0)
|
||||
move_props = MovementProperties(True, True, False)
|
||||
move_props = {'allow_square_movement': True,
|
||||
'allow_diagonal_movement': False,
|
||||
'allow_no_op': False} #MovementProperties(True, True, False)
|
||||
|
||||
factory = SimpleFactory(n_agents=1, done_at_collision=False, frames_to_stack=0,
|
||||
level_name='rooms', max_steps=400, combin_agent_obs=True,
|
||||
omit_agent_in_obs=True, parse_doors=False, pomdp_r=2,
|
||||
record_episodes=False, verbose=True, cast_shadows=False
|
||||
)
|
||||
with RecorderCallback(filepath=Path('debug_out') / f'recorder_xxxx.json', occupation_map=False,
|
||||
trajectory_map=False) as recorder:
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
n_actions = factory.action_space.n - 1
|
||||
_ = factory.observation_space
|
||||
factory = SimpleFactory(n_agents=1, done_at_collision=False, frames_to_stack=0,
|
||||
level_name='rooms', max_steps=400, combin_agent_obs=True,
|
||||
omit_agent_in_obs=True, parse_doors=True, pomdp_r=3,
|
||||
record_episodes=True, verbose=True, cast_shadows=True,
|
||||
movement_properties=move_props, dirt_properties=dirt_props
|
||||
)
|
||||
|
||||
for epoch in range(100):
|
||||
random_actions = [[random.randint(0, n_actions) for _ in range(factory.n_agents)] for _ in range(200)]
|
||||
env_state = factory.reset()
|
||||
r = 0
|
||||
for agent_i_action in random_actions:
|
||||
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
|
||||
r += step_r
|
||||
if render:
|
||||
factory.render()
|
||||
if done_bool:
|
||||
break
|
||||
print(f'Factory run {epoch} done, reward is:\n {r}')
|
||||
# noinspection DuplicatedCode
|
||||
n_actions = factory.action_space.n - 1
|
||||
_ = factory.observation_space
|
||||
|
||||
for epoch in range(4):
|
||||
random_actions = [[random.randint(0, n_actions) for _
|
||||
in range(factory.n_agents)] for _
|
||||
in range(factory.max_steps+1)]
|
||||
env_state = factory.reset()
|
||||
r = 0
|
||||
for agent_i_action in random_actions:
|
||||
env_state, step_r, done_bool, info_obj = factory.step(agent_i_action)
|
||||
recorder.read_info(0, info_obj)
|
||||
r += step_r
|
||||
if render:
|
||||
factory.render()
|
||||
if done_bool:
|
||||
recorder.read_done(0, done_bool)
|
||||
break
|
||||
print(f'Factory run {epoch} done, reward is:\n {r}')
|
||||
pass
|
||||
|
@ -1,4 +1,5 @@
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
@ -9,6 +10,7 @@ from environments.factory.base.base_factory import REC_TAC
|
||||
from environments.helpers import IGNORED_DF_COLUMNS
|
||||
|
||||
|
||||
# noinspection PyAttributeOutsideInit
|
||||
class RecorderCallback(BaseCallback):
|
||||
|
||||
def __init__(self, filepath: Union[str, Path], occupation_map: bool = False, trajectory_map: bool = False):
|
||||
@ -16,74 +18,82 @@ class RecorderCallback(BaseCallback):
|
||||
self.trajectory_map = trajectory_map
|
||||
self.occupation_map = occupation_map
|
||||
self.filepath = Path(filepath)
|
||||
self._recorder_dict = dict()
|
||||
self._recorder_df = pd.DataFrame()
|
||||
self._recorder_dict = defaultdict(dict)
|
||||
self._recorder_json_list = list()
|
||||
self.do_record: bool
|
||||
self.started = False
|
||||
self.closed = False
|
||||
|
||||
def read_info(self, env_idx, info: dict):
|
||||
if info_dict := {key.replace(REC_TAC, ''): val for key, val in info.items() if key.startswith(f'{REC_TAC}')}:
|
||||
info_dict.update(episode=(self.num_timesteps + env_idx))
|
||||
self._recorder_dict[env_idx][len(self._recorder_dict[env_idx])] = info_dict
|
||||
else:
|
||||
pass
|
||||
return
|
||||
|
||||
def read_done(self, env_idx, done):
|
||||
if done:
|
||||
self._recorder_json_list.append(json.dumps(self._recorder_dict[env_idx]))
|
||||
self._recorder_dict[env_idx] = dict()
|
||||
else:
|
||||
pass
|
||||
|
||||
def start(self, force=False):
|
||||
if (hasattr(self.training_env, 'record_episodes') and self.training_env.record_episodes) or force:
|
||||
self.do_record = True
|
||||
self.filepath.parent.mkdir(exist_ok=True, parents=True)
|
||||
self.started = True
|
||||
else:
|
||||
self.do_record = False
|
||||
|
||||
def stop(self):
|
||||
if self.do_record and self.started:
|
||||
# self.out_file.unlink(missing_ok=True)
|
||||
with self.filepath.open('w') as f:
|
||||
json_list = self._recorder_json_list
|
||||
json.dump(json_list, f, indent=4)
|
||||
|
||||
if self.occupation_map:
|
||||
print('Recorder files were dumped to disk, now plotting the occupation map...')
|
||||
|
||||
if self.trajectory_map:
|
||||
print('Recorder files were dumped to disk, now plotting the occupation map...')
|
||||
|
||||
self.closed = True
|
||||
self.started = False
|
||||
else:
|
||||
pass
|
||||
|
||||
def _on_step(self) -> bool:
|
||||
if self.do_record and self.started:
|
||||
for _, info in enumerate(self.locals.get('infos', [])):
|
||||
self._recorder_dict[self.num_timesteps] = {key: val for key, val in info.items()
|
||||
if not key.startswith(f'{REC_TAC}_')}
|
||||
for env_idx, info in enumerate(self.locals.get('infos', [])):
|
||||
self.read_info(env_idx, info)
|
||||
|
||||
for env_idx, done in list(enumerate(self.locals.get('dones', []))) + \
|
||||
list(enumerate(self.locals.get('done', []))):
|
||||
if done:
|
||||
env_monitor_df = pd.DataFrame.from_dict(self._recorder_dict, orient='index')
|
||||
self._recorder_dict = dict()
|
||||
columns = [col for col in env_monitor_df.columns if col not in IGNORED_DF_COLUMNS]
|
||||
env_monitor_df = env_monitor_df.aggregate(
|
||||
{col: 'mean' if col.endswith('ount') else 'sum' for col in columns}
|
||||
)
|
||||
env_monitor_df['episode'] = len(self._recorder_df)
|
||||
self._recorder_df = self._recorder_df.append([env_monitor_df])
|
||||
else:
|
||||
pass
|
||||
for env_idx, done in list(
|
||||
enumerate(self.locals.get('dones', []))) + list(
|
||||
enumerate(self.locals.get('done', []))):
|
||||
self.read_done(env_idx, done)
|
||||
else:
|
||||
pass
|
||||
return True
|
||||
|
||||
def __enter__(self):
|
||||
self._on_training_start()
|
||||
self.start(force=True)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self._on_training_end()
|
||||
self.stop()
|
||||
|
||||
def _on_training_start(self) -> None:
|
||||
if self.started:
|
||||
pass
|
||||
else:
|
||||
if hasattr(self.training_env, 'record_episodes'):
|
||||
if self.training_env.record_episodes:
|
||||
self.do_record = True
|
||||
self.filepath.parent.mkdir(exist_ok=True, parents=True)
|
||||
self.started = True
|
||||
else:
|
||||
self.do_record = False
|
||||
else:
|
||||
self.do_record = False
|
||||
self.start()
|
||||
pass
|
||||
|
||||
def _on_training_end(self) -> None:
|
||||
if self.closed:
|
||||
pass
|
||||
else:
|
||||
if self.do_record and self.started:
|
||||
# self.out_file.unlink(missing_ok=True)
|
||||
with self.filepath.open('w') as f:
|
||||
json_df = self._recorder_df.to_json(orient="table")
|
||||
parsed = json.loads(json_df)
|
||||
json.dump(parsed, f, indent=4)
|
||||
|
||||
if self.occupation_map:
|
||||
print('Recorder files were dumped to disk, now plotting the occupation map...')
|
||||
|
||||
if self.trajectory_map:
|
||||
print('Recorder files were dumped to disk, now plotting the occupation map...')
|
||||
|
||||
self.closed = True
|
||||
self.started = False
|
||||
else:
|
||||
pass
|
||||
self.stop()
|
||||
|
19
main.py
19
main.py
@ -45,8 +45,8 @@ def combine_runs(run_path: Union[str, PathLike]):
|
||||
value_vars=columns, var_name="Measurement",
|
||||
value_name="Score")
|
||||
|
||||
if df_melted['Episode'].max() > 100:
|
||||
skip_n = round(df_melted['Episode'].max() * 0.01)
|
||||
if df_melted['Episode'].max() > 80:
|
||||
skip_n = round(df_melted['Episode'].max() * 0.02, 2)
|
||||
df_melted = df_melted[df_melted['Episode'] % skip_n == 0]
|
||||
|
||||
prepare_plot(run_path / f'{run_path.name}_monitor_lineplot.png', df_melted)
|
||||
@ -72,15 +72,18 @@ def compare_runs(run_path: Path, run_identifier: int, parameter: Union[str, List
|
||||
df = df.fillna(0).rename(columns={'episode': 'Episode', 'run': 'Run', 'model': 'Model'})
|
||||
columns = [col for col in df.columns if col in parameter]
|
||||
|
||||
roll_n = 40
|
||||
last_episode_to_report = min(df.groupby(['Model'])['Episode'].max())
|
||||
df = df[df['Episode'] < last_episode_to_report]
|
||||
|
||||
roll_n = 40
|
||||
non_overlapp_window = df.groupby(['Model', 'Run', 'Episode']).rolling(roll_n, min_periods=1).mean()
|
||||
|
||||
df_melted = non_overlapp_window[columns].reset_index().melt(id_vars=['Episode', 'Run', 'Model'],
|
||||
value_vars=columns, var_name="Measurement",
|
||||
value_name="Score")
|
||||
|
||||
if df_melted['Episode'].max() > 100:
|
||||
skip_n = round(df_melted['Episode'].max() * 0.01)
|
||||
skip_n = round(df_melted['Episode'].max() * 0.02)
|
||||
df_melted = df_melted[df_melted['Episode'] % skip_n == 0]
|
||||
|
||||
style = 'Measurement' if len(columns) > 1 else None
|
||||
@ -113,10 +116,10 @@ if __name__ == '__main__':
|
||||
max_local_amount=1, spawn_frequency=3, max_spawn_ratio=0.05,
|
||||
dirt_smear_amount=0.0, agent_can_interact=True)
|
||||
item_props = ItemProperties(n_items=5, agent_can_interact=True)
|
||||
move_props = MovementProperties(allow_diagonal_movement=False,
|
||||
move_props = MovementProperties(allow_diagonal_movement=True,
|
||||
allow_square_movement=True,
|
||||
allow_no_op=False)
|
||||
train_steps = 1e5
|
||||
train_steps = 8e5
|
||||
time_stamp = int(time.time())
|
||||
|
||||
out_path = None
|
||||
@ -129,14 +132,14 @@ if __name__ == '__main__':
|
||||
dirt_properties=dirt_props,
|
||||
movement_properties=move_props,
|
||||
pomdp_r=2, max_steps=400, parse_doors=True,
|
||||
level_name='simple', frames_to_stack=6,
|
||||
level_name='rooms', frames_to_stack=3,
|
||||
omit_agent_in_obs=True, combin_agent_obs=True, record_episodes=False,
|
||||
cast_shadows=True, doors_have_area=False, env_seed=seed, verbose=False,
|
||||
)
|
||||
|
||||
if modeL_type.__name__ in ["PPO", "A2C"]:
|
||||
kwargs = dict(ent_coef=0.01)
|
||||
env = SubprocVecEnv([make_env(env_kwargs) for _ in range(6)], start_method="spawn")
|
||||
env = SubprocVecEnv([make_env(env_kwargs) for _ in range(10)], start_method="spawn")
|
||||
elif modeL_type.__name__ in ["RegDQN", "DQN", "QRDQN"]:
|
||||
env = make_env(env_kwargs)()
|
||||
kwargs = dict(buffer_size=50000,
|
||||
|
Loading…
x
Reference in New Issue
Block a user