Results resolved. Small ADjustments

This commit is contained in:
Steffen Illium 2023-11-16 15:29:07 +01:00
parent cb76972a5f
commit c3c434a97e
17 changed files with 236 additions and 93 deletions

View File

@ -46,7 +46,7 @@ class LoopMAPPO(LoopSNAC):
# monte carlo returns
mc_returns = self.monte_carlo_returns(batch[nms.REWARD], batch[nms.DONE], gamma)
mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) # todo: norm across agent ok?
advantages = mc_returns - out[nms.CRITIC][:, :-1]
advantages = mc_returns - out[nms.CRITIC][:, :-1]
# policy loss
log_ap = torch.log_softmax(logits, -1)

View File

@ -0,0 +1,66 @@
General:
env_seed: 69
individual_rewards: true
level_name: obs_test_map
pomdp_r: 0
verbose: True
tests: false
Agents:
Wolfgang:
Actions:
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
Positions:
- (1, 3)
Soeren:
Actions:
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
Positions:
- (1, 1)
Juergen:
Actions:
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
Positions:
- (1, 2)
Walter:
Actions:
- Noop
Observations:
- Walls
- Doors
- Other
- DirtPiles
Positions:
- (1, 4)
Entities:
DirtPiles:
Doors:
Rules:
# Utilities
WatchCollisions:
done_at_collisions: false
# Done Conditions
DoneAtMaxStepsReached:
max_steps: 500

View File

@ -2,7 +2,7 @@ Agents:
Wolfgang:
Actions:
- Noop
- BtryCharge
- Charge
- CleanUp
- DestAction
- DoorUse

View File

@ -1,8 +1,20 @@
General:
env_seed: 69
individual_rewards: true
level_name: eight_puzzle
pomdp_r: 0
verbose: True
tests: false
Agents:
Wolfgang:
Actions:
- Noop
- Move4
Noop:
fail_reward: -0
valid_reward: 0
Move4:
fail_reward: -0.1
valid_reward: -.01
Observations:
- Other
- Walls
@ -35,13 +47,6 @@ Entities:
Walter: 1
Siggi: 1
Dennis: 1
General:
env_seed: 69
individual_rewards: true
level_name: eight_puzzle
pomdp_r: 3
verbose: True
tests: false
Rules:
# Utilities

View File

@ -81,9 +81,10 @@ Rules:
reward_at_done: -1
done_at_collisions: false
# Done Conditions
# Load any of the rules, to check for done conditions.
# DoneAtDestinationReachAny:
DoneAtDestinationReachAll:
# reward_at_done: 1
# Load any of the rules, to check for done conditions.
DoneAtDestinationReach:
reward_at_done: 1
# We want to give rewards only, when all targets have been reached.
condition: "all"
DoneAtMaxStepsReached:
max_steps: 200

View File

@ -1,4 +1,5 @@
import abc
import random
from typing import Union
from marl_factory_grid.environment import rewards as r, constants as c
@ -13,45 +14,50 @@ class Action(abc.ABC):
return self._identifier
@abc.abstractmethod
def __init__(self, identifier: str):
def __init__(self, identifier: str, default_valid_reward: float, default_fail_reward: float,
valid_reward: float | None = None, fail_reward: float | None = None):
self.fail_reward = fail_reward if fail_reward is not None else default_fail_reward
self.valid_reward = valid_reward if valid_reward is not None else default_valid_reward
self._identifier = identifier
@abc.abstractmethod
def do(self, entity, state) -> Union[None, ActionResult]:
print()
return
validity = bool(random.choice([0, 1]))
return self.get_result(validity, entity)
def __repr__(self):
return f'Action[{self._identifier}]'
def get_result(self, validity, entity):
reward = self.valid_reward if validity else self.fail_reward
return ActionResult(self.__class__.__name__, validity, reward=reward, entity=entity)
class Noop(Action):
def __init__(self):
super().__init__(c.NOOP)
def __init__(self, **kwargs):
super().__init__(c.NOOP, r.NOOP, r.NOOP, **kwargs)
def do(self, entity, *_) -> Union[None, ActionResult]:
return ActionResult(identifier=self._identifier, validity=c.VALID,
reward=r.NOOP, entity=entity)
return self.get_result(c.VALID, entity)
class Move(Action, abc.ABC):
@abc.abstractmethod
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __init__(self, identifier, **kwargs):
super().__init__(identifier, r.MOVEMENTS_VALID, r.MOVEMENTS_FAIL, **kwargs)
def do(self, entity, state):
new_pos = self._calc_new_pos(entity.pos)
if state.check_move_validity(entity, new_pos):
# noinspection PyUnresolvedReferences
move_validity = entity.move(new_pos, state)
reward = r.MOVEMENTS_VALID if move_validity else r.MOVEMENTS_FAIL
return ActionResult(entity=entity, identifier=self._identifier, validity=move_validity, reward=reward)
move_really_was_valid = entity.move(new_pos, state)
return self.get_result(move_really_was_valid, entity)
else: # There is no place to go, propably collision
# This is currently handeld by the WatchCollisions rule, so that it can be switched on and off by conf.yml
# return ActionResult(entity=entity, identifier=self._identifier, validity=c.NOT_VALID, reward=r.COLLISION)
return ActionResult(entity=entity, identifier=self._identifier, validity=c.NOT_VALID)
return self.get_result(c.NOT_VALID, entity)
def _calc_new_pos(self, pos):
x_diff, y_diff = MOVEMAP[self._identifier]
@ -59,43 +65,43 @@ class Move(Action, abc.ABC):
class North(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.NORTH, *args, **kwargs)
def __init__(self, **kwargs):
super().__init__(c.NORTH, **kwargs)
class NorthEast(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.NORTHEAST, *args, **kwargs)
def __init__(self, **kwargs):
super().__init__(c.NORTHEAST, **kwargs)
class East(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.EAST, *args, **kwargs)
def __init__(self, **kwargs):
super().__init__(c.EAST, **kwargs)
class SouthEast(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.SOUTHEAST, *args, **kwargs)
def __init__(self, **kwargs):
super().__init__(c.SOUTHEAST, **kwargs)
class South(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.SOUTH, *args, **kwargs)
def __init__(self, **kwargs):
super().__init__(c.SOUTH, **kwargs)
class SouthWest(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.SOUTHWEST, *args, **kwargs)
def __init__(self, **kwargs):
super().__init__(c.SOUTHWEST, **kwargs)
class West(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.WEST, *args, **kwargs)
def __init__(self, **kwargs):
super().__init__(c.WEST, **kwargs)
class NorthWest(Move):
def __init__(self, *args, **kwargs):
super().__init__(c.NORTHWEST, *args, **kwargs)
def __init__(self, **kwargs):
super().__init__(c.NORTHWEST, **kwargs)
Move4 = [North, East, South, West]

View File

@ -0,0 +1,12 @@
############
#----------#
#-#######--#
#-#-----D--#
#-#######--#
#-D-----D--#
#-#-#-#-#-##
#----------#
#----------#
#----------#
#----------#
############

View File

@ -1,4 +1,4 @@
from .actions import BtryCharge
from .actions import Charge
from .entitites import ChargePod, Battery
from .groups import ChargePods, Batteries
from .rules import DoneAtBatteryDischarge, BatteryDecharge

View File

@ -8,10 +8,10 @@ from marl_factory_grid.environment import constants as c
from marl_factory_grid.utils import helpers as h
class BtryCharge(Action):
class Charge(Action):
def __init__(self):
super().__init__(b.ACTION_CHARGE)
super().__init__(b.ACTION_CHARGE, b.REWARD_CHARGE_VALID, b.Reward_CHARGE_FAIL)
def do(self, entity, state) -> Union[None, ActionResult]:
if charge_pod := h.get_first(state[b.CHARGE_PODS].by_pos(entity.pos)):
@ -24,5 +24,4 @@ class BtryCharge(Action):
valid = c.NOT_VALID
state.print(f'{entity.name} failed to charged batteries at {entity.pos}.')
return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
reward=b.REWARD_CHARGE_VALID if valid else b.Reward_CHARGE_FAIL)
return self.get_result(valid, entity)

View File

@ -11,7 +11,7 @@ from marl_factory_grid.environment import constants as c
class CleanUp(Action):
def __init__(self):
super().__init__(d.CLEAN_UP)
super().__init__(d.CLEAN_UP, r.CLEAN_UP_VALID, r.CLEAN_UP_FAIL)
def do(self, entity, state) -> Union[None, ActionResult]:
if dirt := next((x for x in state.entities.pos_dict[entity.pos] if "dirt" in x.name.lower()), None):
@ -24,13 +24,10 @@ class CleanUp(Action):
valid = c.VALID
print_str = f'{entity.name} did just clean up some dirt at {entity.pos}.'
state.print(print_str)
reward = r.CLEAN_UP_VALID
identifier = d.CLEAN_UP
else:
valid = c.NOT_VALID
print_str = f'{entity.name} just tried to clean up some dirt at {entity.pos}, but failed.'
state.print(print_str)
reward = r.CLEAN_UP_FAIL
identifier = d.CLEAN_UP_FAIL
return ActionResult(identifier=identifier, validity=valid, reward=reward, entity=entity)
return self.get_result(valid, entity)

View File

@ -11,7 +11,7 @@ from marl_factory_grid.environment import constants as c
class DestAction(Action):
def __init__(self):
super().__init__(d.DESTINATION)
super().__init__(d.DESTINATION, d.REWARD_WAIT_VALID, d.REWARD_WAIT_FAIL)
def do(self, entity, state) -> Union[None, ActionResult]:
if destination := state[d.DESTINATION].by_pos(entity.pos):
@ -20,5 +20,4 @@ class DestAction(Action):
else:
valid = c.NOT_VALID
state.print(f'{entity.name} just tried to do_wait_action do_wait_action at {entity.pos} but failed')
return ActionResult(entity=entity, identifier=self._identifier, validity=valid,
reward=d.REWARD_WAIT_VALID if valid else d.REWARD_WAIT_FAIL)
return self.get_result(valid, entity)

View File

@ -1,6 +1,7 @@
from typing import Union
from marl_factory_grid.environment.actions import Action
from marl_factory_grid.modules.doors.entitites import Door
from marl_factory_grid.modules.doors import constants as d, rewards as r
from marl_factory_grid.environment import constants as c
from marl_factory_grid.utils.results import ActionResult
@ -8,21 +9,23 @@ from marl_factory_grid.utils.results import ActionResult
class DoorUse(Action):
def __init__(self):
super().__init__(d.ACTION_DOOR_USE)
def __init__(self, **kwargs):
super().__init__(d.ACTION_DOOR_USE, r.USE_DOOR_VALID, r.USE_DOOR_FAIL, **kwargs)
def do(self, entity, state) -> Union[None, ActionResult]:
# Check if agent really is standing on a door:
e = state.entities.get_entities_near_pos(entity.pos)
try:
# Only one door opens TODO introduce loop
door = next(x for x in e if x.name.startswith(d.DOOR))
valid = door.use()
state.print(f'{entity.name} just used a {door.name} at {door.pos}')
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, reward=r.USE_DOOR_VALID)
entities_close = state.entities.get_entities_near_pos(entity.pos)
except StopIteration:
# When he doesn't...
valid = False
for door in [e for e in entities_close if isinstance(e, Door)]:
try:
# Will always be true, when there is at least a single door.
valid = door.use()
state.print(f'{entity.name} just used a {door.name} at {door.pos}')
except AttributeError:
pass
if not valid:
# When he doesn't stand necxxt to a door tell me.
state.print(f'{entity.name} just tried to use a door at {entity.pos}, but there is none.')
return ActionResult(entity=entity, identifier=self._identifier,
validity=c.NOT_VALID, reward=r.USE_DOOR_FAIL)
return self.get_result(valid, entity)

View File

@ -9,8 +9,14 @@ from marl_factory_grid.environment import constants as c
class ItemAction(Action):
def __init__(self):
super().__init__(i.ITEM_ACTION)
def __init__(self, failed_dropoff_reward: float | None = None, valid_dropoff_reward: float | None = None, **kwargs):
super().__init__(i.ITEM_ACTION, r.PICK_UP_FAIL, r.PICK_UP_VALID, **kwargs)
self.failed_drop_off_reward = failed_dropoff_reward if failed_dropoff_reward is not None else r.DROP_OFF_FAIL
self.valid_drop_off_reward = valid_dropoff_reward if valid_dropoff_reward is not None else r.DROP_OFF_FAIL
def get_dropoff_result(self, validity, entity):
reward = self.valid_drop_off_reward if validity else self.failed_drop_off_reward
return ActionResult(self.__name__, validity, reward=reward, entity=entity)
def do(self, entity, state) -> Union[None, ActionResult]:
inventory = state[i.INVENTORY].by_entity(entity)
@ -23,16 +29,15 @@ class ItemAction(Action):
state.print(f'{entity.name} just dropped of an item at {drop_off.pos}.')
else:
state.print(f'{entity.name} just tried to drop off at {entity.pos}, but failed.')
reward = r.DROP_OFF_VALID if valid else r.DROP_OFF_FAIL
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, reward=reward)
return self.get_dropoff_result(valid, entity)
elif items := state[i.ITEM].by_pos(entity.pos):
item = items[0]
item.change_parent_collection(inventory)
item.set_pos(c.VALUE_NO_POS)
state.print(f'{entity.name} just picked up an item at {entity.pos}')
return ActionResult(entity=entity, identifier=self._identifier, validity=c.VALID, reward=r.PICK_UP_VALID)
return self.get_result(c.VALID, entity)
else:
state.print(f'{entity.name} just tried to pick up an item at {entity.pos}, but failed.')
return ActionResult(entity=entity, identifier=self._identifier, validity=c.NOT_VALID, reward=r.PICK_UP_FAIL)
return self.get_result(c.NOT_VALID, entity)

View File

@ -12,15 +12,12 @@ from marl_factory_grid.utils import helpers as h
class MachineAction(Action):
def __init__(self):
super().__init__(m.MACHINE_ACTION)
super().__init__(m.MACHINE_ACTION, m.MAINTAIN_VALID, m.MAINTAIN_FAIL)
def do(self, entity, state) -> Union[None, ActionResult]:
if machine := h.get_first(state[m.MACHINES].by_pos(entity.pos)):
if valid := machine.maintain():
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, reward=marl_factory_grid.modules.machines.constants.MAINTAIN_VALID)
else:
return ActionResult(entity=entity, identifier=self._identifier, validity=valid, reward=marl_factory_grid.modules.machines.constants.MAINTAIN_FAIL)
valid = machine.maintain()
return self.get_result(valid, entity)
else:
return ActionResult(entity=entity, identifier=self._identifier,
validity=c.NOT_VALID, reward=marl_factory_grid.modules.machines.constants.MAINTAIN_FAIL
)
return self.get_result(c.NOT_VALID, entity)

View File

@ -122,16 +122,28 @@ class FactoryConfigParser(object):
def parse_agents_conf(self):
parsed_agents_conf = dict()
base_env_actions = self.default_actions.copy() + [c.MOVE4]
for name in self.agents:
# Actions
conf_actions = self.agents[name]['Actions']
actions = list()
if c.DEFAULTS in self.agents[name]['Actions']:
actions.extend(self.default_actions)
actions.extend(x for x in self.agents[name]['Actions'] if x != c.DEFAULTS)
if isinstance(conf_actions, dict):
conf_kwargs = conf_actions.copy()
conf_actions = list(conf_actions.keys())
elif isinstance(conf_actions, list):
conf_kwargs = {}
if isinstance(conf_actions, dict):
raise ValueError
pass
for action in conf_actions:
if action == c.DEFAULTS:
actions.extend(self.default_actions)
else:
actions.append(action)
parsed_actions = list()
for action in actions:
folder_path = MODULE_PATH if action not in base_env_actions else DEFAULT_PATH
folder_path = MODULE_PATH if action not in [c.MOVE8, c.NOOP, c.MOVE4] else DEFAULT_PATH
folder_path = Path(__file__).parent.parent / folder_path
try:
class_or_classes = locate_and_import_class(action, folder_path)
@ -142,7 +154,7 @@ class FactoryConfigParser(object):
except TypeError:
parsed_actions.append(class_or_classes)
parsed_actions = [x() for x in parsed_actions]
parsed_actions = [x(**conf_kwargs.get(x, {})) for x in parsed_actions]
# Observation
observations = list()

View File

@ -224,8 +224,8 @@ def locate_and_import_class(class_name, folder_path: Union[str, PurePath] = ''):
and x not in ['Entity', 'NamedTuple', 'List', 'Rule', 'Union',
'TickResult', 'ActionResult', 'Action', 'Agent',
'RenderEntity', 'TemplateRule', 'Objects', 'PositionMixin',
'IsBoundMixin', 'EnvObject', 'EnvObjects', 'Dict', 'Any'
]])
'IsBoundMixin', 'EnvObject', 'EnvObjects', 'Dict', 'Any', 'Factory',
'Move8']])
try:
model_class = mod.__getattribute__(class_name)
return model_class

41
test_observations.py Normal file
View File

@ -0,0 +1,41 @@
from pathlib import Path
from random import randint
from tqdm import trange
from marl_factory_grid.environment.factory import Factory
from marl_factory_grid.utils.logging.envmonitor import EnvMonitor
from marl_factory_grid.utils.logging.recorder import EnvRecorder
from marl_factory_grid.utils.plotting.plot_single_runs import plot_single_run
from marl_factory_grid.utils.tools import ConfigExplainer
if __name__ == '__main__':
# Render at each step?
render = True
run_path = Path('study_out')
# Path to config File
path = Path('marl_factory_grid/configs/_obs_test.yaml')
# Env Init
factory = Factory(path)
# RL learn Loop
for episode in trange(10):
_ = factory.reset()
done = False
if render:
factory.render()
action_spaces = factory.action_space
while not done:
a = [randint(0, x.n - 1) for x in action_spaces]
obs_type, _, _, done, info = factory.step(a)
if render:
factory.render()
if done:
print(f'Episode {episode} done...')
break
print('Done!!! Goodbye....')