study e_1 corpus

This commit is contained in:
Steffen Illium 2021-09-08 16:24:14 +02:00
parent b09055d95d
commit 4c21a0af7c
8 changed files with 246 additions and 87 deletions

View File

@ -195,7 +195,7 @@ class BaseFactory(gym.Env):
for action, agent in zip(actions, self[c.AGENT]): for action, agent in zip(actions, self[c.AGENT]):
agent.clear_temp_state() agent.clear_temp_state()
action_obj = self._actions[int(action)] action_obj = self._actions[int(action)]
self.print(f'Action #{action} has been resolved to: {action_obj}') # self.print(f'Action #{action} has been resolved to: {action_obj}')
if h.MovingAction.is_member(action_obj): if h.MovingAction.is_member(action_obj):
valid = self._move_or_colide(agent, action_obj) valid = self._move_or_colide(agent, action_obj)
elif h.EnvActions.NOOP == agent.temp_action: elif h.EnvActions.NOOP == agent.temp_action:

View File

@ -66,12 +66,90 @@ class Object:
return other.name == self.name return other.name == self.name
class Entity(Object):
@property
def can_collide(self):
return True
@property
def encoding(self):
return c.OCCUPIED_CELL.value
@property
def x(self):
return self.pos[0]
@property
def y(self):
return self.pos[1]
@property
def pos(self):
return self._tile.pos
@property
def tile(self):
return self._tile
def __init__(self, tile, **kwargs):
super(Entity, self).__init__(**kwargs)
self._tile = tile
tile.enter(self)
def summarize_state(self) -> dict:
return dict(name=str(self.name), x=int(self.x), y=int(self.y),
tile=str(self.tile.name), can_collide=bool(self.can_collide))
def __repr__(self):
return f'{self.name}(@{self.pos})'
class MoveableEntity(Entity):
@property
def last_tile(self):
return self._last_tile
@property
def last_pos(self):
if self._last_tile:
return self._last_tile.pos
else:
return c.NO_POS
@property
def direction_of_view(self):
last_x, last_y = self.last_pos
curr_x, curr_y = self.pos
return last_x-curr_x, last_y-curr_y
def __init__(self, *args, **kwargs):
super(MoveableEntity, self).__init__(*args, **kwargs)
self._last_tile = None
def move(self, next_tile):
curr_tile = self.tile
if curr_tile != next_tile:
next_tile.enter(self)
curr_tile.leave(self)
self._tile = next_tile
self._last_tile = curr_tile
return True
else:
return False
class Action(Object): class Action(Object):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Action, self).__init__(*args, **kwargs) super(Action, self).__init__(*args, **kwargs)
class PlaceHolder(MoveableEntity):
pass
class Tile(Object): class Tile(Object):
@property @property
@ -133,45 +211,6 @@ class Wall(Tile):
pass pass
class Entity(Object):
@property
def can_collide(self):
return True
@property
def encoding(self):
return c.OCCUPIED_CELL.value
@property
def x(self):
return self.pos[0]
@property
def y(self):
return self.pos[1]
@property
def pos(self):
return self._tile.pos
@property
def tile(self):
return self._tile
def __init__(self, tile: Tile, **kwargs):
super(Entity, self).__init__(**kwargs)
self._tile = tile
tile.enter(self)
def summarize_state(self) -> dict:
return dict(name=str(self.name), x=int(self.x), y=int(self.y),
tile=str(self.tile.name), can_collide=bool(self.can_collide))
def __repr__(self):
return f'{self.name}(@{self.pos})'
class Door(Entity): class Door(Entity):
@property @property
@ -261,41 +300,6 @@ class Door(Entity):
return False return False
class MoveableEntity(Entity):
@property
def last_tile(self):
return self._last_tile
@property
def last_pos(self):
if self._last_tile:
return self._last_tile.pos
else:
return c.NO_POS
@property
def direction_of_view(self):
last_x, last_y = self.last_pos
curr_x, curr_y = self.pos
return last_x-curr_x, last_y-curr_y
def __init__(self, *args, **kwargs):
super(MoveableEntity, self).__init__(*args, **kwargs)
self._last_tile = None
def move(self, next_tile):
curr_tile = self.tile
if curr_tile != next_tile:
next_tile.enter(self)
curr_tile.leave(self)
self._tile = next_tile
self._last_tile = curr_tile
return True
else:
return False
class Agent(MoveableEntity): class Agent(MoveableEntity):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):

View File

@ -4,7 +4,7 @@ from typing import List, Union, Dict
import numpy as np import numpy as np
from environments.factory.base.objects import Entity, Tile, Agent, Door, Action, Wall, Object from environments.factory.base.objects import Entity, Tile, Agent, Door, Action, Wall, Object, PlaceHolder
from environments.utility_classes import MovementProperties from environments.utility_classes import MovementProperties
from environments import helpers as h from environments import helpers as h
from environments.helpers import Constants as c from environments.helpers import Constants as c
@ -156,6 +156,25 @@ class MovingEntityObjectRegister(EntityObjectRegister, ABC):
del self[name] del self[name]
class PlaceHolderRegister(MovingEntityObjectRegister):
_accepted_objects = PlaceHolder
# noinspection DuplicatedCode
def as_array(self):
self._array[:] = c.FREE_CELL.value
# noinspection PyTupleAssignmentBalance
for z, x, y, v in zip(range(len(self)), *zip(*[x.pos for x in self]), [x.encoding for x in self]):
if self.individual_slices:
self._array[z, x, y] += v
else:
self._array[0, x, y] += v
if self.individual_slices:
return self._array
else:
return self._array.sum(axis=0, keepdims=True)
class Entities(Register): class Entities(Register):
_accepted_objects = EntityObjectRegister _accepted_objects = EntityObjectRegister
@ -256,6 +275,9 @@ class FloorTiles(WallTiles):
class Agents(MovingEntityObjectRegister): class Agents(MovingEntityObjectRegister):
_accepted_objects = Agent
# noinspection DuplicatedCode
def as_array(self): def as_array(self):
self._array[:] = c.FREE_CELL.value self._array[:] = c.FREE_CELL.value
# noinspection PyTupleAssignmentBalance # noinspection PyTupleAssignmentBalance
@ -269,8 +291,6 @@ class Agents(MovingEntityObjectRegister):
else: else:
return self._array.sum(axis=0, keepdims=True) return self._array.sum(axis=0, keepdims=True)
_accepted_objects = Agent
@property @property
def positions(self): def positions(self):
return [agent.pos for agent in self] return [agent.pos for agent in self]

View File

@ -311,15 +311,17 @@ class ItemFactory(BaseFactory):
reward, info_dict = super().calculate_additional_reward(agent) reward, info_dict = super().calculate_additional_reward(agent)
if h.EnvActions.ITEM_ACTION == agent.temp_action: if h.EnvActions.ITEM_ACTION == agent.temp_action:
if agent.temp_valid: if agent.temp_valid:
if self[c.DROP_OFF].by_pos(agent.pos): if drop_off := self[c.DROP_OFF].by_pos(agent.pos):
info_dict.update({f'{agent.name}_item_dropoff': 1}) info_dict.update({f'{agent.name}_item_dropoff': 1})
self.print(f'{agent.name} just dropped of an item at {drop_off.pos}.')
reward += 0.5 reward += 0.5
else: else:
info_dict.update({f'{agent.name}_item_pickup': 1}) info_dict.update({f'{agent.name}_item_pickup': 1})
self.print(f'{agent.name} just picked up an item at {agent.pos}')
reward += 0.1 reward += 0.1
else: else:
info_dict.update({f'{agent.name}_failed_item_action': 1}) info_dict.update({f'{agent.name}_failed_item_action': 1})
self.print(f'{agent.name} just tried to pick up an item at {agent.pos}, but failed.')
reward -= 0.1 reward -= 0.1
return reward, info_dict return reward, info_dict

View File

@ -5,6 +5,8 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from pathlib import Path from pathlib import Path
from stable_baselines3 import PPO, DQN, A2C
LEVELS_DIR = 'levels' LEVELS_DIR = 'levels'
TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles'] TO_BE_AVERAGED = ['dirt_amount', 'dirty_tiles']
@ -142,6 +144,8 @@ def asset_str(agent):
return c.AGENT.value, 'idle' return c.AGENT.value, 'idle'
model_map = dict(PPO=PPO, DQN=DQN, A2C=A2C)
if __name__ == '__main__': if __name__ == '__main__':
parsed_level = parse_level(Path(__file__).parent / 'factory' / 'levels' / 'simple.txt') parsed_level = parse_level(Path(__file__).parent / 'factory' / 'levels' / 'simple.txt')
y = one_hot_level(parsed_level) y = one_hot_level(parsed_level)

View File

@ -139,7 +139,7 @@ if __name__ == '__main__':
if modeL_type.__name__ in ["PPO", "A2C"]: if modeL_type.__name__ in ["PPO", "A2C"]:
kwargs = dict(ent_coef=0.01) kwargs = dict(ent_coef=0.01)
env = SubprocVecEnv([make_env(env_kwargs) for _ in range(10)], start_method="spawn") env = SubprocVecEnv([make_env(env_kwargs) for _ in range(1)], start_method="spawn")
elif modeL_type.__name__ in ["RegDQN", "DQN", "QRDQN"]: elif modeL_type.__name__ in ["RegDQN", "DQN", "QRDQN"]:
env = make_env(env_kwargs)() env = make_env(env_kwargs)()
kwargs = dict(buffer_size=50000, kwargs = dict(buffer_size=50000,
@ -147,7 +147,8 @@ if __name__ == '__main__':
batch_size=64, batch_size=64,
target_update_interval=5000, target_update_interval=5000,
exploration_fraction=0.25, exploration_fraction=0.25,
exploration_final_eps=0.025) exploration_final_eps=0.025
)
else: else:
raise NameError(f'The model "{modeL_type.__name__}" has the wrong name.') raise NameError(f'The model "{modeL_type.__name__}" has the wrong name.')

View File

@ -3,7 +3,6 @@ from pathlib import Path
import yaml import yaml
from natsort import natsorted from natsort import natsorted
from stable_baselines3 import PPO, DQN, A2C
from stable_baselines3.common.evaluation import evaluate_policy from stable_baselines3.common.evaluation import evaluate_policy
from environments.factory.factory_dirt import DirtProperties, DirtFactory from environments.factory.factory_dirt import DirtProperties, DirtFactory
@ -12,13 +11,12 @@ from environments.factory.factory_item import ItemProperties, ItemFactory
warnings.filterwarnings('ignore', category=FutureWarning) warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning) warnings.filterwarnings('ignore', category=UserWarning)
model_map = dict(PPO=PPO, DQN=DQN, A2C=A2C)
if __name__ == '__main__': if __name__ == '__main__':
model_name = 'PPO_1631029150' model_name = 'DQN_1631092016'
run_id = 0 run_id = 0
seed=69 seed = 69
out_path = Path(__file__).parent / 'debug_out' out_path = Path(__file__).parent / 'debug_out'
model_path = out_path / model_name model_path = out_path / model_name
@ -38,5 +36,5 @@ if __name__ == '__main__':
this_model = model_files[0] this_model = model_files[0]
model_cls = next(val for key, val in model_map.items() if key in model_name) model_cls = next(val for key, val in model_map.items() if key in model_name)
model = model_cls.load(this_model) model = model_cls.load(this_model)
evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=True, render=True) evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=False, render=True)
print(evaluation_result) print(evaluation_result)

130
studies/e_1.py Normal file
View File

@ -0,0 +1,130 @@
import itertools
import random
from pathlib import Path
import simplejson
from stable_baselines3 import DQN, PPO, A2C
from environments.factory.factory_dirt import DirtProperties, DirtFactory
from environments.factory.factory_item import ItemProperties, ItemFactory
if __name__ == '__main__':
"""
In this studie, we want to explore the macro behaviour of multi agents which are trained on the same task,
but never saw each other in training.
Those agents learned
We start with training a single policy on a single task (dirt cleanup / item pickup).
Then multiple agent equipped with the same policy are deployed in the same environment.
There are further distinctions to be made:
1. No Observation - ['no_obs']:
- Agent do not see each other but their consequences of their combined actions
- Agents can collide
2. Observation in seperate slice - [['seperate_0'], ['seperate_1'], ['seperate_N']]:
- Agents see other entitys on a seperate slice
- This slice has been filled with $0 | 1 | \mathbb{N}(0, 1)$
-- Depending ob the fill value, agents will react diffently
-> TODO: Test this!
3. Observation in level slice - ['in_lvl_obs']:
- This tells the agent to treat other agents as obstacle.
- However, the state space is altered since moving obstacles are not part the original agent observation.
- We are out of distribution.
"""
def bundle_model(model_class):
if model_class.__class__.__name__ in ["PPO", "A2C"]:
kwargs = dict(ent_coef=0.01)
elif model_class.__class__.__name__ in ["RegDQN", "DQN", "QRDQN"]:
kwargs = dict(buffer_size=50000,
learning_starts=64,
batch_size=64,
target_update_interval=5000,
exploration_fraction=0.25,
exploration_final_eps=0.025
)
return lambda: model_class(kwargs)
if __name__ == '__main__':
# Define a global studi save path
study_root_path = Path(Path(__file__).stem) / 'out'
# TODO: Define Global Env Parameters
factory_kwargs = {
}
# TODO: Define global model parameters
# TODO: Define parameters for both envs
dirt_props = DirtProperties()
item_props = ItemProperties()
# Bundle both environments with global kwargs and parameters
env_bundles = [lambda: ('dirt', DirtFactory(factory_kwargs, dirt_properties=dirt_props)),
lambda: ('item', ItemFactory(factory_kwargs, item_properties=item_props))]
# Define parameter versions according with #1,2[1,0,N],3
observation_modes = ['no_obs', 'seperate_0', 'seperate_1', 'seperate_N', 'in_lvl_obs']
# Define RL-Models
model_bundles = [bundle_model(model) for model in [A2C, PPO, DQN]]
# Zip parameters, parameter versions, Env Classes and models
combinations = itertools.product(model_bundles, env_bundles)
# Train starts here ############################################################
# Build Major Loop
for model, (env_identifier, env_bundle) in combinations:
for observation_mode in observation_modes:
# TODO: Create an identifier, which is unique for every combination and easy to read in filesystem
identifier = f'{model.name}_{observation_mode}_{env_identifier}'
# Train each combination per seed
for seed in range(3):
# TODO: Output folder
# TODO: Monitor Init
# TODO: Env Init
# TODO: Model Init
# TODO: Model train
# TODO: Model save
pass
# TODO: Seed Compare Plot
# Train ends here ############################################################
# Evaluation starts here #####################################################
# Iterate Observation Modes
for observation_mode in observation_modes:
# TODO: For trained policy in study_root_path / identifier
for policy_group in (x for x in study_root_path.iterdir() if x.is_dir()):
# TODO: Pick random seed or iterate over available seeds
policy_seed = next((y for y in study_root_path.iterdir() if y.is_dir()))
# TODO: retrieve model class
# TODO: Load both agents
models = []
# TODO: Evaluation Loop for i in range(100) Episodes
for episode in range(100):
with next(policy_seed.glob('*.yaml')).open('r') as f:
env_kwargs = simplejson.load(f)
# TODO: Monitor Init
env = None # TODO: Init Env
for step in range(400):
random_actions = [[random.randint(0, env.n_actions) for _ in range(len(models))] for _ in range(200)]
env_state = env.reset()
rew = 0
for agent_i_action in random_actions:
env_state, step_r, done_bool, info_obj = env.step(agent_i_action)
rew += step_r
if done_bool:
break
print(f'Factory run {episode} done, reward is:\n {rew}')
# TODO: Plotting
pass