mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-05-22 14:56:43 +02:00
Debugging
This commit is contained in:
parent
3150757347
commit
b6c8cbd2e3
@ -5,11 +5,25 @@ from networkx.algorithms.approximation import traveling_salesman as tsp
|
||||
from environments.factory.base.objects import Agent
|
||||
from environments.helpers import points_to_graph
|
||||
from environments import helpers as h
|
||||
from environments.helpers import Constants as c
|
||||
|
||||
from environments.helpers import Constants as BaseConstants
|
||||
from environments.helpers import EnvActions as BaseActions
|
||||
|
||||
|
||||
class Constants(BaseConstants):
|
||||
DIRT = 'Dirt'
|
||||
|
||||
|
||||
class Actions(BaseActions):
|
||||
CLEAN_UP = 'do_cleanup_action'
|
||||
|
||||
|
||||
a = Actions
|
||||
c = Constants
|
||||
|
||||
future_planning = 7
|
||||
|
||||
|
||||
class TSPDirtAgent(Agent):
|
||||
|
||||
def __init__(self, env, *args,
|
||||
@ -26,7 +40,7 @@ class TSPDirtAgent(Agent):
|
||||
def predict(self, *_, **__):
|
||||
if self._env[c.DIRT].by_pos(self.pos) is not None:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = h.EnvActions.CLEAN_UP
|
||||
action = a.CLEAN_UP
|
||||
elif any('door' in x.name.lower() for x in self.tile.guests):
|
||||
door = next(x for x in self.tile.guests if 'door' in x.name.lower())
|
||||
if door.is_closed:
|
||||
@ -37,7 +51,7 @@ class TSPDirtAgent(Agent):
|
||||
else:
|
||||
action = self._predict_move()
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action_obj = next(action_i for action_i, action_obj in enumerate(self._env._actions) if action_obj == action)
|
||||
action_obj = next(action_i for action_name, action_i in self._env.named_action_space.items() if action_name == action)
|
||||
return action_obj
|
||||
|
||||
def _predict_move(self):
|
||||
|
@ -181,11 +181,11 @@ class BaseFactory(gym.Env):
|
||||
if agents_to_spawn:
|
||||
agents = Agents.from_tiles(floor.empty_tiles[:agents_to_spawn], self._level_shape, **agents_kwargs)
|
||||
else:
|
||||
agents = Agents(**agents_kwargs)
|
||||
agents = Agents(self._level_shape, **agents_kwargs)
|
||||
if self._injected_agents:
|
||||
initialized_injections = list()
|
||||
for i, injection in enumerate(self._injected_agents):
|
||||
agents.register_item(injection(self, floor.empty_tiles[agents_to_spawn+i+1], static_problem=False))
|
||||
agents.register_item(injection(self, floor.empty_tiles[0], agents, static_problem=False))
|
||||
initialized_injections.append(agents[-1])
|
||||
self._initialized_injections = initialized_injections
|
||||
self._entities.register_additional_items({c.AGENT: agents})
|
||||
@ -335,7 +335,12 @@ class BaseFactory(gym.Env):
|
||||
# Generel Observations
|
||||
lvl_obs = self[c.WALLS].as_array()
|
||||
door_obs = self[c.DOORS].as_array()
|
||||
global_agent_obs = self[c.AGENT].as_array() if self.obs_prop.render_agents != a_obs.NOT else None
|
||||
if self.obs_prop.render_agents == a_obs.NOT:
|
||||
global_agent_obs = None
|
||||
elif self.obs_prop.omit_agent_self and self.n_agents == 1:
|
||||
global_agent_obs = None
|
||||
else:
|
||||
global_agent_obs = self[c.AGENT].as_array().copy()
|
||||
placeholder_obs = self[c.AGENT_PLACEHOLDER].as_array() if self[c.AGENT_PLACEHOLDER] else None
|
||||
add_obs_dict = self._additional_observations()
|
||||
|
||||
@ -343,7 +348,7 @@ class BaseFactory(gym.Env):
|
||||
obs_dict = dict()
|
||||
# Build Agent Observations
|
||||
if self.obs_prop.render_agents != a_obs.NOT:
|
||||
if self.obs_prop.omit_agent_self:
|
||||
if self.obs_prop.omit_agent_self and self.n_agents >= 2:
|
||||
if self.obs_prop.render_agents == a_obs.SEPERATE:
|
||||
other_agent_obs_idx = [x for x in range(self.n_agents) if x != agent_idx]
|
||||
agent_obs = np.take(global_agent_obs, other_agent_obs_idx, axis=0)
|
||||
@ -361,11 +366,12 @@ class BaseFactory(gym.Env):
|
||||
lvl_obs += global_agent_obs
|
||||
|
||||
obs_dict[c.WALLS] = lvl_obs
|
||||
if self.obs_prop.render_agents in [a_obs.SEPERATE, a_obs.COMBINED]:
|
||||
if self.obs_prop.render_agents in [a_obs.SEPERATE, a_obs.COMBINED] and agent_obs is not None:
|
||||
obs_dict[c.AGENT] = agent_obs
|
||||
if self[c.AGENT_PLACEHOLDER]:
|
||||
if self[c.AGENT_PLACEHOLDER] and placeholder_obs is not None:
|
||||
obs_dict[c.AGENT_PLACEHOLDER] = placeholder_obs
|
||||
obs_dict[c.DOORS] = door_obs
|
||||
if self.parse_doors and door_obs is not None:
|
||||
obs_dict[c.DOORS] = door_obs
|
||||
obs_dict.update(add_obs_dict)
|
||||
obsn = np.vstack(list(obs_dict.values()))
|
||||
if self.obs_prop.pomdp_r:
|
||||
@ -381,20 +387,21 @@ class BaseFactory(gym.Env):
|
||||
zip(keys, idxs, list(idxs[1:]) + [idxs[-1]+1, ])}
|
||||
|
||||
# Shadow Casting
|
||||
try:
|
||||
light_block_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
|
||||
if self[key].is_blocking_light]
|
||||
# Flatten
|
||||
light_block_obs = [x for y in light_block_obs for x in y]
|
||||
shadowed_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
|
||||
if self[key].can_be_shadowed]
|
||||
# Flatten
|
||||
shadowed_obs = [x for y in shadowed_obs for x in y]
|
||||
except AttributeError as e:
|
||||
print('Check your Keys! Only use Constants as Keys!')
|
||||
print(e)
|
||||
raise e
|
||||
if self.obs_prop.cast_shadows:
|
||||
try:
|
||||
light_block_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
|
||||
if self[key].is_blocking_light]
|
||||
# Flatten
|
||||
light_block_obs = [x for y in light_block_obs for x in y]
|
||||
shadowed_obs = [obs_idx for key, obs_idx in per_agent_expl_idx[agent.name].items()
|
||||
if self[key].can_be_shadowed]
|
||||
# Flatten
|
||||
shadowed_obs = [x for y in shadowed_obs for x in y]
|
||||
except AttributeError as e:
|
||||
print('Check your Keys! Only use Constants as Keys!')
|
||||
print(e)
|
||||
raise e
|
||||
|
||||
obs_block_light = obsn[light_block_obs] != c.OCCUPIED_CELL
|
||||
door_shadowing = False
|
||||
if self.parse_doors:
|
||||
|
@ -6,6 +6,7 @@ import random
|
||||
import numpy as np
|
||||
|
||||
# from algorithms.TSP_dirt_agent import TSPDirtAgent
|
||||
from algorithms.TSP_dirt_agent import TSPDirtAgent
|
||||
from environments.helpers import Constants as BaseConstants
|
||||
from environments.helpers import EnvActions as BaseActions
|
||||
from environments.helpers import Rewards as BaseRewards
|
||||
@ -27,9 +28,9 @@ class Actions(BaseActions):
|
||||
|
||||
|
||||
class Rewards(BaseRewards):
|
||||
CLEAN_UP_VALID = 0.5
|
||||
CLEAN_UP_FAIL = -0.1
|
||||
CLEAN_UP_LAST_PIECE = 4.5
|
||||
CLEAN_UP_VALID = 1
|
||||
CLEAN_UP_FAIL = -0.1
|
||||
CLEAN_UP_LAST_PIECE = 4
|
||||
|
||||
|
||||
class DirtProperties(NamedTuple):
|
||||
@ -293,13 +294,13 @@ if __name__ == '__main__':
|
||||
global_timings = []
|
||||
for i in range(10):
|
||||
|
||||
factory = DirtFactory(n_agents=4, done_at_collision=False,
|
||||
factory = DirtFactory(n_agents=1, done_at_collision=False,
|
||||
level_name='rooms', max_steps=1000,
|
||||
doors_have_area=False,
|
||||
obs_prop=obs_props, parse_doors=True,
|
||||
verbose=True,
|
||||
mv_prop=move_props, dirt_prop=dirt_props,
|
||||
# inject_agents=[TSPDirtAgent],
|
||||
inject_agents=[TSPDirtAgent],
|
||||
)
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
@ -317,10 +318,11 @@ if __name__ == '__main__':
|
||||
env_state = factory.reset()
|
||||
if render:
|
||||
factory.render()
|
||||
# tsp_agent = factory.get_injected_agents()[0]
|
||||
tsp_agent = factory.get_injected_agents()[0]
|
||||
|
||||
rwrd = 0
|
||||
for agent_i_action in random_actions:
|
||||
agent_i_action = tsp_agent.predict()
|
||||
env_state, step_rwrd, done_bool, info_obj = factory.step(agent_i_action)
|
||||
rwrd += step_rwrd
|
||||
if render:
|
||||
|
@ -1,14 +1,10 @@
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import yaml
|
||||
from stable_baselines3 import A2C
|
||||
from stable_baselines3 import A2C, PPO, DQN
|
||||
|
||||
from environments import helpers as h
|
||||
from environments.helpers import Constants as c
|
||||
from environments.factory.factory_dirt import DirtFactory
|
||||
from environments.factory.combined_factories import DirtItemFactory
|
||||
from environments.logging.recorder import EnvRecorder
|
||||
|
||||
warnings.filterwarnings('ignore', category=FutureWarning)
|
||||
@ -17,7 +13,7 @@ warnings.filterwarnings('ignore', category=UserWarning)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
determin = True
|
||||
determin = False
|
||||
render = True
|
||||
record = False
|
||||
seed = 67
|
||||
@ -37,7 +33,7 @@ if __name__ == '__main__':
|
||||
|
||||
this_model = out_path / 'model.zip'
|
||||
|
||||
model_cls = A2C # next(val for key, val in h.MODEL_MAP.items() if key in out_path.parent.name)
|
||||
model_cls = PPO # next(val for key, val in h.MODEL_MAP.items() if key in out_path.parent.name)
|
||||
models = [model_cls.load(this_model)]
|
||||
|
||||
# Init Env
|
||||
|
@ -114,6 +114,7 @@ if __name__ == '__main__':
|
||||
train = True
|
||||
individual_run = True
|
||||
combined_run = True
|
||||
multi_env = False
|
||||
|
||||
train_steps = 2e5
|
||||
frames_to_stack = 3
|
||||
@ -122,7 +123,7 @@ if __name__ == '__main__':
|
||||
study_root_path = Path(__file__).parent.parent / 'study_out' / f'{Path(__file__).stem}'
|
||||
|
||||
def policy_model_kwargs():
|
||||
return dict(learning_rate=0.0003, n_steps=10, gamma=0.95, gae_lambda=0.0, ent_coef=0.01, vf_coef=0.5)
|
||||
return dict()
|
||||
|
||||
# Define Global Env Parameters
|
||||
# Define properties object parameters
|
||||
@ -142,22 +143,22 @@ if __name__ == '__main__':
|
||||
item_props = ItemProperties(n_items=10, spawn_frequency=30, n_drop_off_locations=2,
|
||||
max_agent_inventory_capacity=15)
|
||||
dest_props = DestProperties(n_dests=4, spawn_mode=DestModeOptions.GROUPED, spawn_frequency=1)
|
||||
factory_kwargs = dict(n_agents=1, max_steps=400, parse_doors=True,
|
||||
level_name='rooms', doors_have_area=False,
|
||||
factory_kwargs = dict(n_agents=1, max_steps=500, parse_doors=True,
|
||||
level_name='rooms', doors_have_area=True,
|
||||
verbose=False,
|
||||
mv_prop=move_props,
|
||||
obs_prop=obs_props,
|
||||
done_at_collision=True
|
||||
done_at_collision=False
|
||||
)
|
||||
|
||||
# Bundle both environments with global kwargs and parameters
|
||||
env_map = {}
|
||||
env_map.update({'dirt': (DirtFactory, dict(dirt_prop=dirt_props,
|
||||
**factory_kwargs.copy()))})
|
||||
env_map.update({'item': (ItemFactory, dict(item_prop=item_props,
|
||||
**factory_kwargs.copy()))})
|
||||
env_map.update({'dest': (DestFactory, dict(dest_prop=dest_props,
|
||||
**factory_kwargs.copy()))})
|
||||
# env_map.update({'item': (ItemFactory, dict(item_prop=item_props,
|
||||
# **factory_kwargs.copy()))})
|
||||
# env_map.update({'dest': (DestFactory, dict(dest_prop=dest_props,
|
||||
# **factory_kwargs.copy()))})
|
||||
env_map.update({'combined': (DirtDestItemFactory, dict(dest_prop=dest_props,
|
||||
item_prop=item_props,
|
||||
dirt_prop=dirt_props,
|
||||
@ -168,7 +169,7 @@ if __name__ == '__main__':
|
||||
# Build Major Loop parameters, parameter versions, Env Classes and models
|
||||
if train:
|
||||
for env_key in (env_key for env_key in env_map if 'combined' != env_key):
|
||||
model_cls = h.MODEL_MAP['A2C']
|
||||
model_cls = h.MODEL_MAP['PPO']
|
||||
combination_path = study_root_path / env_key
|
||||
env_class, env_kwargs = env_map[env_key]
|
||||
|
||||
@ -177,8 +178,11 @@ if __name__ == '__main__':
|
||||
continue
|
||||
combination_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs)
|
||||
for _ in range(6)], start_method="spawn")
|
||||
if not multi_env:
|
||||
env_factory = encapsule_env_factory(env_class, env_kwargs)()
|
||||
else:
|
||||
env_factory = SubprocVecEnv([encapsule_env_factory(env_class, env_kwargs)
|
||||
for _ in range(6)], start_method="spawn")
|
||||
|
||||
param_path = combination_path / f'env_params.json'
|
||||
try:
|
||||
|
Loading…
x
Reference in New Issue
Block a user