mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-06-19 10:52:54 +02:00
DEbugging done but he does not learn
This commit is contained in:
@ -181,7 +181,7 @@ class BaseFactory(gym.Env):
|
|||||||
# Move this in a seperate function?
|
# Move this in a seperate function?
|
||||||
for action, agent in zip(actions, self[c.AGENT]):
|
for action, agent in zip(actions, self[c.AGENT]):
|
||||||
agent.clear_temp_sate()
|
agent.clear_temp_sate()
|
||||||
action_obj = self._actions[action]
|
action_obj = self._actions[int(action)]
|
||||||
if self._actions.is_moving_action(action_obj):
|
if self._actions.is_moving_action(action_obj):
|
||||||
valid = self._move_or_colide(agent, action_obj)
|
valid = self._move_or_colide(agent, action_obj)
|
||||||
elif h.EnvActions.NOOP == agent.temp_action:
|
elif h.EnvActions.NOOP == agent.temp_action:
|
||||||
@ -285,6 +285,7 @@ class BaseFactory(gym.Env):
|
|||||||
|
|
||||||
if r := self.pomdp_r:
|
if r := self.pomdp_r:
|
||||||
x, y = self._level_shape
|
x, y = self._level_shape
|
||||||
|
self._padded_obs_cube[:] = c.SHADOWED_CELL.value
|
||||||
self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube
|
self._padded_obs_cube[:, r:r + x, r:r + y] = self._obs_cube
|
||||||
global_x, global_y = map(sum, zip(agent.pos, (r, r)))
|
global_x, global_y = map(sum, zip(agent.pos, (r, r)))
|
||||||
x0, x1 = max(0, global_x - self.pomdp_r), global_x + self.pomdp_r + 1
|
x0, x1 = max(0, global_x - self.pomdp_r), global_x + self.pomdp_r + 1
|
||||||
@ -321,9 +322,7 @@ class BaseFactory(gym.Env):
|
|||||||
light_block_map[xs, ys] = 0
|
light_block_map[xs, ys] = 0
|
||||||
agent.temp_light_map = light_block_map
|
agent.temp_light_map = light_block_map
|
||||||
for obs_idx in can_be_shadowed_idxs:
|
for obs_idx in can_be_shadowed_idxs:
|
||||||
obs[obs_idx] = (obs[obs_idx] * light_block_map) - (
|
obs[obs_idx] = ((obs[obs_idx] * light_block_map) + 0.) - (1 - light_block_map) # * obs[0])
|
||||||
(1 - light_block_map) * obs[0]
|
|
||||||
)
|
|
||||||
|
|
||||||
return obs
|
return obs
|
||||||
else:
|
else:
|
||||||
@ -404,7 +403,7 @@ class BaseFactory(gym.Env):
|
|||||||
self.print(f'{agent.name} did just use the door at {agent.pos}.')
|
self.print(f'{agent.name} did just use the door at {agent.pos}.')
|
||||||
info_dict.update(door_used=1)
|
info_dict.update(door_used=1)
|
||||||
else:
|
else:
|
||||||
reward -= 0.01
|
reward -= 0.00
|
||||||
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
|
self.print(f'{agent.name} just tried to use a door at {agent.pos}, but failed.')
|
||||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||||
info_dict.update({f'{agent.name}_failed_door_open': 1})
|
info_dict.update({f'{agent.name}_failed_door_open': 1})
|
||||||
@ -416,6 +415,9 @@ class BaseFactory(gym.Env):
|
|||||||
reward += additional_reward
|
reward += additional_reward
|
||||||
info_dict.update(additional_info_dict)
|
info_dict.update(additional_info_dict)
|
||||||
|
|
||||||
|
if agent.temp_collisions:
|
||||||
|
self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
|
||||||
|
|
||||||
for other_agent in agent.temp_collisions:
|
for other_agent in agent.temp_collisions:
|
||||||
info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1})
|
info_dict.update({f'{agent.name}_vs_{other_agent.name}': 1})
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ class Register:
|
|||||||
return self._register.items()
|
return self._register.items()
|
||||||
|
|
||||||
def __getitem__(self, item):
|
def __getitem__(self, item):
|
||||||
if isinstance(item, int):
|
if isinstance(item, (int, np.int64, np.int32)):
|
||||||
try:
|
try:
|
||||||
return next(v for i, v in enumerate(self._register.values()) if i == item)
|
return next(v for i, v in enumerate(self._register.values()) if i == item)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
|
@ -291,7 +291,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
factory = DoubleTaskFactory(item_props, n_agents=3, done_at_collision=False, frames_to_stack=0,
|
factory = DoubleTaskFactory(item_props, n_agents=3, done_at_collision=False, frames_to_stack=0,
|
||||||
level_name='rooms', max_steps=4000,
|
level_name='rooms', max_steps=4000,
|
||||||
omit_agent_slice_in_obs=True, parse_doors=True, pomdp_r=3,
|
omit_agent_in_obs=True, parse_doors=True, pomdp_r=3,
|
||||||
record_episodes=False, verbose=False
|
record_episodes=False, verbose=False
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -18,12 +18,6 @@ from environments.utility_classes import MovementProperties
|
|||||||
CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP
|
CLEAN_UP_ACTION = h.EnvActions.CLEAN_UP
|
||||||
|
|
||||||
|
|
||||||
class ObsSlice(Enum):
|
|
||||||
OWN = -1
|
|
||||||
LEVEL = c.LEVEL.value
|
|
||||||
AGENT = c.AGENT.value
|
|
||||||
|
|
||||||
|
|
||||||
class DirtProperties(NamedTuple):
|
class DirtProperties(NamedTuple):
|
||||||
clean_amount: int = 1 # How much does the robot clean with one actions.
|
clean_amount: int = 1 # How much does the robot clean with one actions.
|
||||||
max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent.
|
max_spawn_ratio: float = 0.2 # On max how much tiles does the dirt spawn in percent.
|
||||||
@ -33,7 +27,6 @@ class DirtProperties(NamedTuple):
|
|||||||
max_global_amount: int = 20 # Max dirt amount in the whole environment.
|
max_global_amount: int = 20 # Max dirt amount in the whole environment.
|
||||||
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place.
|
dirt_smear_amount: float = 0.2 # Agents smear dirt, when not cleaning up in place.
|
||||||
agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment.
|
agent_can_interact: bool = True # Whether the agents can interact with the dirt in this environment.
|
||||||
on_obs_slice: Enum = ObsSlice.LEVEL
|
|
||||||
|
|
||||||
|
|
||||||
class Dirt(Entity):
|
class Dirt(Entity):
|
||||||
@ -217,16 +210,13 @@ class SimpleFactory(BaseFactory):
|
|||||||
info_dict.update(dirty_tile_count=dirty_tile_count)
|
info_dict.update(dirty_tile_count=dirty_tile_count)
|
||||||
info_dict.update(dirt_distribution_score=dirt_distribution_score)
|
info_dict.update(dirt_distribution_score=dirt_distribution_score)
|
||||||
|
|
||||||
if agent.temp_collisions:
|
|
||||||
self.print(f't = {self._steps}\t{agent.name} has collisions with {agent.temp_collisions}')
|
|
||||||
|
|
||||||
if agent.temp_action == CLEAN_UP_ACTION:
|
if agent.temp_action == CLEAN_UP_ACTION:
|
||||||
if agent.temp_valid:
|
if agent.temp_valid:
|
||||||
reward += 0.5
|
reward += 0.5
|
||||||
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
|
self.print(f'{agent.name} did just clean up some dirt at {agent.pos}.')
|
||||||
info_dict.update(dirt_cleaned=1)
|
info_dict.update(dirt_cleaned=1)
|
||||||
else:
|
else:
|
||||||
reward -= 0.01
|
reward -= 0.00
|
||||||
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
|
self.print(f'{agent.name} just tried to clean up some dirt at {agent.pos}, but failed.')
|
||||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||||
info_dict.update({f'{agent.name}_failed_action': 1})
|
info_dict.update({f'{agent.name}_failed_action': 1})
|
||||||
@ -244,9 +234,9 @@ if __name__ == '__main__':
|
|||||||
move_props = MovementProperties(True, True, False)
|
move_props = MovementProperties(True, True, False)
|
||||||
|
|
||||||
factory = SimpleFactory(n_agents=1, done_at_collision=False, frames_to_stack=0,
|
factory = SimpleFactory(n_agents=1, done_at_collision=False, frames_to_stack=0,
|
||||||
level_name='rooms', max_steps=400,
|
level_name='rooms', max_steps=400, combin_agent_obs=True,
|
||||||
omit_agent_slice_in_obs=True, parse_doors=True, pomdp_r=2,
|
omit_agent_in_obs=True, parse_doors=True, pomdp_r=2,
|
||||||
record_episodes=False, verbose=False
|
record_episodes=False, verbose=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# noinspection DuplicatedCode
|
# noinspection DuplicatedCode
|
||||||
|
7
main.py
7
main.py
@ -115,8 +115,8 @@ if __name__ == '__main__':
|
|||||||
movement_properties=move_props,
|
movement_properties=move_props,
|
||||||
pomdp_radius=2, max_steps=500, parse_doors=True,
|
pomdp_radius=2, max_steps=500, parse_doors=True,
|
||||||
level_name='rooms', frames_to_stack=3,
|
level_name='rooms', frames_to_stack=3,
|
||||||
omit_agent_slice_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False,
|
omit_agent_in_obs=True, combin_agent_slices_in_obs=True, record_episodes=False,
|
||||||
cast_shadows=True, doors_have_area=False, seed=seed, verbose=True,
|
cast_shadows=True, doors_have_area=False, seed=seed, verbose=False,
|
||||||
) as env:
|
) as env:
|
||||||
|
|
||||||
if modeL_type.__name__ in ["PPO", "A2C"]:
|
if modeL_type.__name__ in ["PPO", "A2C"]:
|
||||||
@ -151,8 +151,11 @@ if __name__ == '__main__':
|
|||||||
save_path.parent.mkdir(parents=True, exist_ok=True)
|
save_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
model.save(save_path)
|
model.save(save_path)
|
||||||
env.save_params(out_path.parent / f'env_{model.__class__.__name__}_{time_stamp}.yaml')
|
env.save_params(out_path.parent / f'env_{model.__class__.__name__}_{time_stamp}.yaml')
|
||||||
|
print("Model Trained and saved")
|
||||||
|
print("Model Group Done.. Plotting...")
|
||||||
|
|
||||||
if out_path:
|
if out_path:
|
||||||
combine_runs(out_path.parent)
|
combine_runs(out_path.parent)
|
||||||
|
print("All Models Done... Evaluating")
|
||||||
if out_path:
|
if out_path:
|
||||||
compare_runs(Path('debug_out'), time_stamp, 'step_reward')
|
compare_runs(Path('debug_out'), time_stamp, 'step_reward')
|
||||||
|
@ -15,7 +15,7 @@ warnings.filterwarnings('ignore', category=UserWarning)
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
model_name = 'A2C_1629467677'
|
model_name = 'A2C_1630073286'
|
||||||
run_id = 0
|
run_id = 0
|
||||||
out_path = Path(__file__).parent / 'debug_out'
|
out_path = Path(__file__).parent / 'debug_out'
|
||||||
model_path = out_path / model_name
|
model_path = out_path / model_name
|
||||||
@ -27,7 +27,7 @@ if __name__ == '__main__':
|
|||||||
max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05,
|
max_local_amount=1, spawn_frequency=5, max_spawn_ratio=0.05,
|
||||||
dirt_smear_amount=0.5),
|
dirt_smear_amount=0.5),
|
||||||
combin_agent_slices_in_obs=True, omit_agent_slice_in_obs=True)
|
combin_agent_slices_in_obs=True, omit_agent_slice_in_obs=True)
|
||||||
with DoubleTaskFactory(**env_kwargs) as env:
|
with SimpleFactory(**env_kwargs) as env:
|
||||||
|
|
||||||
# Edit THIS:
|
# Edit THIS:
|
||||||
model_files = list(natsorted((model_path / f'{run_id}_{model_name}').rglob('model_*.zip')))
|
model_files = list(natsorted((model_path / f'{run_id}_{model_name}').rglob('model_*.zip')))
|
||||||
|
Reference in New Issue
Block a user