Results resolved. Small ADjustments

This commit is contained in:
Steffen Illium 2023-11-16 15:29:07 +01:00
parent cb76972a5f
commit c3c434a97e
17 changed files with 236 additions and 93 deletions

View File

@ -46,7 +46,7 @@ class LoopMAPPO(LoopSNAC):
# monte carlo returns # monte carlo returns
mc_returns = self.monte_carlo_returns(batch[nms.REWARD], batch[nms.DONE], gamma) mc_returns = self.monte_carlo_returns(batch[nms.REWARD], batch[nms.DONE], gamma)
mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) # todo: norm across agent ok? mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) # todo: norm across agent ok?
advantages = mc_returns - out[nms.CRITIC][:, :-1] advantages = mc_returns - out[nms.CRITIC][:, :-1]
# policy loss # policy loss
log_ap = torch.log_softmax(logits, -1) log_ap = torch.log_softmax(logits, -1)

View File

@ -0,0 +1,66 @@
General:
env_seed: 69
individual_rewards: true
level_name: obs_test_map
pomdp_r: 0
verbose: True
tests: false
Agents:
Wolfgang:
Actions:
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
Positions:
- (1, 3)
Soeren:
Actions:
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
Positions:
- (1, 1)
Juergen:
Actions:
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
Positions:
- (1, 2)
Walter:
Actions:
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
Positions:
- (1, 4)
Entities:
DirtPiles:
Doors:
Rules:
# Utilities
WatchCollisions:
done_at_collisions: false
# Done Conditions
DoneAtMaxStepsReached:
max_steps: 500

View File

@ -2,7 +2,7 @@ Agents:
Wolfgang: Wolfgang:
Actions: Actions:
- Noop - Noop
- BtryCharge - Charge
- CleanUp - CleanUp
- DestAction - DestAction
- DoorUse - DoorUse

View File

@ -1,8 +1,20 @@
General:
env_seed: 69
individual_rewards: true
level_name: eight_puzzle
pomdp_r: 0
verbose: True
tests: false
Agents: Agents:
Wolfgang: Wolfgang:
Actions: Actions:
- Noop Noop:
- Move4 fail_reward: -0
valid_reward: 0
Move4:
fail_reward: -0.1
valid_reward: -.01
Observations: Observations:
- Other - Other
- Walls - Walls
@ -35,13 +47,6 @@ Entities:
Walter: 1 Walter: 1
Siggi: 1 Siggi: 1
Dennis: 1 Dennis: 1
General:
env_seed: 69
individual_rewards: true
level_name: eight_puzzle
pomdp_r: 3
verbose: True
tests: false
Rules: Rules:
# Utilities # Utilities

View File

@ -81,9 +81,10 @@ Rules:
reward_at_done: -1 reward_at_done: -1
done_at_collisions: false done_at_collisions: false
# Done Conditions # Done Conditions
# Load any of the rules, to check for done conditions. # Load any of the rules, to check for done conditions.
# DoneAtDestinationReachAny: DoneAtDestinationReach:
DoneAtDestinationReachAll: reward_at_done: 1
# reward_at_done: 1 # We want to give rewards only, when all targets have been reached.
condition: "all"
DoneAtMaxStepsReached: DoneAtMaxStepsReached:
max_steps: 200 max_steps: 200

View File

@ -1,4 +1,5 @@
import abc import abc
import random
from typing import Union from typing import Union
from marl_factory_grid.environment import rewards as r, constants as c from marl_factory_grid.environment import rewards as r, constants as c
@ -13,45 +14,50 @@ class Action(abc.ABC):
return self._identifier return self._identifier
@abc.abstractmethod @abc.abstractmethod
def __init__(self, identifier: str): def __init__(self, identifier: str, default_valid_reward: float, default_fail_reward: float,
valid_reward: float | None = None, fail_reward: float | None = None):
self.fail_reward = fail_reward if fail_reward is not None else default_fail_reward
self.valid_reward = valid_reward if valid_reward is not None else default_valid_reward
self._identifier = identifier self._identifier = identifier
@abc.abstractmethod @abc.abstractmethod
def do(self, entity, state) -> Union[None, ActionResult]: def do(self, entity, state) -> Union[None, ActionResult]:
print() validity = bool(random.choice([0, 1]))
return return self.get_result(validity, entity)
def __repr__(self): def __repr__(self):
return f'Action[{self._identifier}]' return f'Action[{self._identifier}]'
def get_result(self, validity, entity):
reward = self.valid_reward if validity else self.fail_reward
return ActionResult(self.__class__.__name__, validity, reward=reward, entity=entity)
class Noop(Action): class Noop(Action):
def __init__(self): def __init__(self, **kwargs):
super().__init__(c.NOOP) super().__init__(c.NOOP, r.NOOP, r.NOOP, **kwargs)
def do(self, entity, *_) -> Union[None, ActionResult]: def do(self, entity, *_) -> Union[None, ActionResult]:
return ActionResult(identifier=self._identifier, validity=c.VALID, return self.get_result(c.VALID, entity)
reward=r.NOOP, entity=entity)
class Move(Action, abc.ABC): class Move(Action, abc.ABC):
@abc.abstractmethod @abc.abstractmethod
def __init__(self, *args, **kwargs): def __init__(self, identifier, **kwargs):
super().__init__(*args, **kwargs) super().__init__(identifier, r.MOVEMENTS_VALID, r.MOVEMENTS_FAIL, **kwargs)
def do(self, entity, state): def do(self, entity, state):
new_pos = self._calc_new_pos(entity.pos) new_pos = self._calc_new_pos(entity.pos)
if state.check_move_validity(entity, new_pos): if state.check_move_validity(entity, new_pos):
# noinspection PyUnresolvedReferences # noinspection PyUnresolvedReferences
move_validity = entity.move(new_pos, state) move_really_was_valid = entity.move(new_pos, state)
reward = r.MOVEMENTS_VALID if move_validity else r.MOVEMENTS_FAIL return self.get_result(move_really_was_valid, entity)
return ActionResult(entity=entity, identifier=self._identifier, validity=move_validity, reward=reward)
else: # There is no place to go, propably collision else: # There is no place to go, propably collision
# This is currently handeld by the WatchCollisions rule, so that it can be switched on and off by conf.yml # This is currently handeld by the WatchCollisions rule, so that it can be switched on and off by conf.yml
# return ActionResult(entity=entity, identifier=self._identifier, validity=c.NOT_VALID, reward=r.COLLISION) # return ActionResult(entity=entity, identifier=self._identifier, validity=c.NOT_VALID, reward=r.COLLISION)
return ActionResult(entity=entity, identifier=self._identifier, validity=c.NOT_VALID) return self.get_result(c.NOT_VALID, entity)
def _calc_new_pos(self, pos): def _calc_new_pos(self, pos):
x_diff, y_diff = MOVEMAP[self._identifier] x_diff, y_diff = MOVEMAP[self._identifier]
@ -59,43 +65,43 @@ class Move(Action, abc.ABC):
class North(Move): class North(Move):
def __init__(self, *args, **kwargs): def __init__(self, **kwargs):
super().__init__(c.NORTH, *args, **kwargs) super().__init__(c.NORTH, **kwargs)
class NorthEast(Move): class NorthEast(Move):
def __init__(self, *args, **kwargs): def __init__(self, **kwargs):
super().__init__(c.NORTHEAST, *args, **kwargs) super().__init__(c.NORTHEAST, **kwargs)
class East(Move): class East(Move):
def __init__(self, *args, **kwargs): def __init__(self, **kwargs):
super().__init__(c.EAST, *args, **kwargs) super().__init__(c.EAST, **kwargs)
class SouthEast(Move): class SouthEast(Move):
def __init__(self, *args, **kwargs): def __init__(self, **kwargs):
super().__init__(c.SOUTHEAST, *args, **kwargs) super().__init__(c.SOUTHEAST, **kwargs)
class South(Move): class South(Move):
def __init__(self, *args, **kwargs): def __init__(self, **kwargs):
super().__init__(c.SOUTH, *args, **kwargs) super().__init__(c.SOUTH, **kwargs)
class SouthWest(Move): class SouthWest(Move):
def __init__(self, *args, **kwargs): def __init__(self, **kwargs):
super().__init__(c.SOUTHWEST, *args, **kwargs) super().__init__(c.SOUTHWEST, **kwargs)
class West(Move): class West(Move):
def __init__(self, *args, **kwargs): def __init__(self, **kwargs):
super().__init__(c.WEST, *args, **kwargs) super().__init__(c.WEST, **kwargs)
class NorthWest(Move): class NorthWest(Move):
def __init__(self, *args, **kwargs): def __init__(self, **kwargs):
super().__init__(c.NORTHWEST, *args, **kwargs) super().__init__(c.NORTHWEST, **kwargs)
Move4 = [North, East, South, West] Move4 = [North, East, South, West]

View File

@ -0,0 +1,12 @@
############
#----------#
#-#######--#
#-#-----D--#
#-#######--#
#-D-----D--#
#-#-#-#-#-##
#----------#
#----------#
#----------#
#----------#
############

View File

@ -1,4 +1,4 @@
from .actions import BtryCharge from .actions import Charge
from .entitites import ChargePod, Battery from .entitites import ChargePod, Battery
from .groups import ChargePods, Batteries from .groups import ChargePods, Batteries
from .rules import DoneAtBatteryDischarge, BatteryDecharge from .rules import DoneAtBatteryDischarge, BatteryDecharge

View File

@ -8,10 +8,10 @@ from marl_factory_grid.environment import constants as c
from marl_factory_grid.utils import helpers as h from marl_factory_grid.utils import helpers as h
class BtryCharge(Action): class Charge(Action):
def __init__(self): def __init__(self):
super().__init__(b.ACTION_CHARGE) super().__init__(b.ACTION_CHARGE, b.REWARD_CHARGE_VALID, b.Reward_CHARGE_FAIL)
def do(self, entity, state) -> Union[None, ActionResult]: def do(self, entity, state) -> Union[None, ActionResult]:
if charge_pod := h.get_first(state[b.CHARGE_PODS].by_pos(entity.pos)): if charge_pod := h.get_first(state[b.CHARGE_PODS].by_pos(entity.pos)):
@ -24,5 +24,4 @@ class BtryCharge(Action):
valid = c.NOT_VALID valid = c.NOT_VALID
state.print(f'{entity.name} failed to charged batteries at {entity.pos}.') state.print(f'{entity.name} failed to charged batteries at {entity.pos}.')
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, return self.get_result(valid, entity)
reward=b.REWARD_CHARGE_VALID if valid else b.Reward_CHARGE_FAIL)

View File

@ -11,7 +11,7 @@ from marl_factory_grid.environment import constants as c
class CleanUp(Action): class CleanUp(Action):
def __init__(self): def __init__(self):
super().__init__(d.CLEAN_UP) super().__init__(d.CLEAN_UP, r.CLEAN_UP_VALID, r.CLEAN_UP_FAIL)
def do(self, entity, state) -> Union[None, ActionResult]: def do(self, entity, state) -> Union[None, ActionResult]:
if dirt := next((x for x in state.entities.pos_dict[entity.pos] if "dirt" in x.name.lower()), None): if dirt := next((x for x in state.entities.pos_dict[entity.pos] if "dirt" in x.name.lower()), None):
@ -24,13 +24,10 @@ class CleanUp(Action):
valid = c.VALID valid = c.VALID
print_str = f'{entity.name} did just clean up some dirt at {entity.pos}.' print_str = f'{entity.name} did just clean up some dirt at {entity.pos}.'
state.print(print_str) state.print(print_str)
reward = r.CLEAN_UP_VALID
identifier = d.CLEAN_UP
else: else:
valid = c.NOT_VALID valid = c.NOT_VALID
print_str = f'{entity.name} just tried to clean up some dirt at {entity.pos}, but failed.' print_str = f'{entity.name} just tried to clean up some dirt at {entity.pos}, but failed.'
state.print(print_str) state.print(print_str)
reward = r.CLEAN_UP_FAIL
identifier = d.CLEAN_UP_FAIL
return ActionResult(identifier=identifier, validity=valid, reward=reward, entity=entity) return self.get_result(valid, entity)

View File

@ -11,7 +11,7 @@ from marl_factory_grid.environment import constants as c
class DestAction(Action): class DestAction(Action):
def __init__(self): def __init__(self):
super().__init__(d.DESTINATION) super().__init__(d.DESTINATION, d.REWARD_WAIT_VALID, d.REWARD_WAIT_FAIL)
def do(self, entity, state) -> Union[None, ActionResult]: def do(self, entity, state) -> Union[None, ActionResult]:
if destination := state[d.DESTINATION].by_pos(entity.pos): if destination := state[d.DESTINATION].by_pos(entity.pos):
@ -20,5 +20,4 @@ class DestAction(Action):
else: else:
valid = c.NOT_VALID valid = c.NOT_VALID
state.print(f'{entity.name} just tried to do_wait_action do_wait_action at {entity.pos} but failed') state.print(f'{entity.name} just tried to do_wait_action do_wait_action at {entity.pos} but failed')
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, return self.get_result(valid, entity)
reward=d.REWARD_WAIT_VALID if valid else d.REWARD_WAIT_FAIL)

View File

@ -1,6 +1,7 @@
from typing import Union from typing import Union
from marl_factory_grid.environment.actions import Action from marl_factory_grid.environment.actions import Action
from marl_factory_grid.modules.doors.entitites import Door
from marl_factory_grid.modules.doors import constants as d, rewards as r from marl_factory_grid.modules.doors import constants as d, rewards as r
from marl_factory_grid.environment import constants as c from marl_factory_grid.environment import constants as c
from marl_factory_grid.utils.results import ActionResult from marl_factory_grid.utils.results import ActionResult
@ -8,21 +9,23 @@ from marl_factory_grid.utils.results import ActionResult
class DoorUse(Action): class DoorUse(Action):
def __init__(self): def __init__(self, **kwargs):
super().__init__(d.ACTION_DOOR_USE) super().__init__(d.ACTION_DOOR_USE, r.USE_DOOR_VALID, r.USE_DOOR_FAIL, **kwargs)
def do(self, entity, state) -> Union[None, ActionResult]: def do(self, entity, state) -> Union[None, ActionResult]:
# Check if agent really is standing on a door: # Check if agent really is standing on a door:
e = state.entities.get_entities_near_pos(entity.pos) entities_close = state.entities.get_entities_near_pos(entity.pos)
try:
# Only one door opens TODO introduce loop
door = next(x for x in e if x.name.startswith(d.DOOR))
valid = door.use()
state.print(f'{entity.name} just used a {door.name} at {door.pos}')
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, reward=r.USE_DOOR_VALID)
except StopIteration: valid = False
# When he doesn't... for door in [e for e in entities_close if isinstance(e, Door)]:
try:
# Will always be true, when there is at least a single door.
valid = door.use()
state.print(f'{entity.name} just used a {door.name} at {door.pos}')
except AttributeError:
pass
if not valid:
# When he doesn't stand necxxt to a door tell me.
state.print(f'{entity.name} just tried to use a door at {entity.pos}, but there is none.') state.print(f'{entity.name} just tried to use a door at {entity.pos}, but there is none.')
return ActionResult(entity=entity, identifier=self._identifier, return self.get_result(valid, entity)
validity=c.NOT_VALID, reward=r.USE_DOOR_FAIL)

View File

@ -9,8 +9,14 @@ from marl_factory_grid.environment import constants as c
class ItemAction(Action): class ItemAction(Action):
def __init__(self): def __init__(self, failed_dropoff_reward: float | None = None, valid_dropoff_reward: float | None = None, **kwargs):
super().__init__(i.ITEM_ACTION) super().__init__(i.ITEM_ACTION, r.PICK_UP_FAIL, r.PICK_UP_VALID, **kwargs)
self.failed_drop_off_reward = failed_dropoff_reward if failed_dropoff_reward is not None else r.DROP_OFF_FAIL
self.valid_drop_off_reward = valid_dropoff_reward if valid_dropoff_reward is not None else r.DROP_OFF_FAIL
def get_dropoff_result(self, validity, entity):
reward = self.valid_drop_off_reward if validity else self.failed_drop_off_reward
return ActionResult(self.__name__, validity, reward=reward, entity=entity)
def do(self, entity, state) -> Union[None, ActionResult]: def do(self, entity, state) -> Union[None, ActionResult]:
inventory = state[i.INVENTORY].by_entity(entity) inventory = state[i.INVENTORY].by_entity(entity)
@ -23,16 +29,15 @@ class ItemAction(Action):
state.print(f'{entity.name} just dropped of an item at {drop_off.pos}.') state.print(f'{entity.name} just dropped of an item at {drop_off.pos}.')
else: else:
state.print(f'{entity.name} just tried to drop off at {entity.pos}, but failed.') state.print(f'{entity.name} just tried to drop off at {entity.pos}, but failed.')
reward = r.DROP_OFF_VALID if valid else r.DROP_OFF_FAIL return self.get_dropoff_result(valid, entity)
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, reward=reward)
elif items := state[i.ITEM].by_pos(entity.pos): elif items := state[i.ITEM].by_pos(entity.pos):
item = items[0] item = items[0]
item.change_parent_collection(inventory) item.change_parent_collection(inventory)
item.set_pos(c.VALUE_NO_POS) item.set_pos(c.VALUE_NO_POS)
state.print(f'{entity.name} just picked up an item at {entity.pos}') state.print(f'{entity.name} just picked up an item at {entity.pos}')
return ActionResult(entity=entity, identifier=self._identifier, validity=c.VALID, reward=r.PICK_UP_VALID) return self.get_result(c.VALID, entity)
else: else:
state.print(f'{entity.name} just tried to pick up an item at {entity.pos}, but failed.') state.print(f'{entity.name} just tried to pick up an item at {entity.pos}, but failed.')
return ActionResult(entity=entity, identifier=self._identifier, validity=c.NOT_VALID, reward=r.PICK_UP_FAIL) return self.get_result(c.NOT_VALID, entity)

View File

@ -12,15 +12,12 @@ from marl_factory_grid.utils import helpers as h
class MachineAction(Action): class MachineAction(Action):
def __init__(self): def __init__(self):
super().__init__(m.MACHINE_ACTION) super().__init__(m.MACHINE_ACTION, m.MAINTAIN_VALID, m.MAINTAIN_FAIL)
def do(self, entity, state) -> Union[None, ActionResult]: def do(self, entity, state) -> Union[None, ActionResult]:
if machine := h.get_first(state[m.MACHINES].by_pos(entity.pos)): if machine := h.get_first(state[m.MACHINES].by_pos(entity.pos)):
if valid := machine.maintain(): valid = machine.maintain()
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, reward=marl_factory_grid.modules.machines.constants.MAINTAIN_VALID) return self.get_result(valid, entity)
else:
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, reward=marl_factory_grid.modules.machines.constants.MAINTAIN_FAIL)
else: else:
return ActionResult(entity=entity, identifier=self._identifier, return self.get_result(c.NOT_VALID, entity)
validity=c.NOT_VALID, reward=marl_factory_grid.modules.machines.constants.MAINTAIN_FAIL
)

View File

@ -122,16 +122,28 @@ class FactoryConfigParser(object):
def parse_agents_conf(self): def parse_agents_conf(self):
parsed_agents_conf = dict() parsed_agents_conf = dict()
base_env_actions = self.default_actions.copy() + [c.MOVE4]
for name in self.agents: for name in self.agents:
# Actions # Actions
conf_actions = self.agents[name]['Actions']
actions = list() actions = list()
if c.DEFAULTS in self.agents[name]['Actions']:
actions.extend(self.default_actions) if isinstance(conf_actions, dict):
actions.extend(x for x in self.agents[name]['Actions'] if x != c.DEFAULTS) conf_kwargs = conf_actions.copy()
conf_actions = list(conf_actions.keys())
elif isinstance(conf_actions, list):
conf_kwargs = {}
if isinstance(conf_actions, dict):
raise ValueError
pass
for action in conf_actions:
if action == c.DEFAULTS:
actions.extend(self.default_actions)
else:
actions.append(action)
parsed_actions = list() parsed_actions = list()
for action in actions: for action in actions:
folder_path = MODULE_PATH if action not in base_env_actions else DEFAULT_PATH folder_path = MODULE_PATH if action not in [c.MOVE8, c.NOOP, c.MOVE4] else DEFAULT_PATH
folder_path = Path(__file__).parent.parent / folder_path folder_path = Path(__file__).parent.parent / folder_path
try: try:
class_or_classes = locate_and_import_class(action, folder_path) class_or_classes = locate_and_import_class(action, folder_path)
@ -142,7 +154,7 @@ class FactoryConfigParser(object):
except TypeError: except TypeError:
parsed_actions.append(class_or_classes) parsed_actions.append(class_or_classes)
parsed_actions = [x() for x in parsed_actions] parsed_actions = [x(**conf_kwargs.get(x, {})) for x in parsed_actions]
# Observation # Observation
observations = list() observations = list()

View File

@ -224,8 +224,8 @@ def locate_and_import_class(class_name, folder_path: Union[str, PurePath] = ''):
and x not in ['Entity', 'NamedTuple', 'List', 'Rule', 'Union', and x not in ['Entity', 'NamedTuple', 'List', 'Rule', 'Union',
'TickResult', 'ActionResult', 'Action', 'Agent', 'TickResult', 'ActionResult', 'Action', 'Agent',
'RenderEntity', 'TemplateRule', 'Objects', 'PositionMixin', 'RenderEntity', 'TemplateRule', 'Objects', 'PositionMixin',
'IsBoundMixin', 'EnvObject', 'EnvObjects', 'Dict', 'Any' 'IsBoundMixin', 'EnvObject', 'EnvObjects', 'Dict', 'Any', 'Factory',
]]) 'Move8']])
try: try:
model_class = mod.__getattribute__(class_name) model_class = mod.__getattribute__(class_name)
return model_class return model_class

41
test_observations.py Normal file
View File

@ -0,0 +1,41 @@
from pathlib import Path
from random import randint
from tqdm import trange
from marl_factory_grid.environment.factory import Factory
from marl_factory_grid.utils.logging.envmonitor import EnvMonitor
from marl_factory_grid.utils.logging.recorder import EnvRecorder
from marl_factory_grid.utils.plotting.plot_single_runs import plot_single_run
from marl_factory_grid.utils.tools import ConfigExplainer
if __name__ == '__main__':
# Render at each step?
render = True
run_path = Path('study_out')
# Path to config File
path = Path('marl_factory_grid/configs/_obs_test.yaml')
# Env Init
factory = Factory(path)
# RL learn Loop
for episode in trange(10):
_ = factory.reset()
done = False
if render:
factory.render()
action_spaces = factory.action_space
while not done:
a = [randint(0, x.n - 1) for x in action_spaces]
obs_type, _, _, done, info = factory.step(a)
if render:
factory.render()
if done:
print(f'Episode {episode} done...')
break
print('Done!!! Goodbye....')