mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-09-18 00:21:58 +02:00
major redesign ob observations and entittes
This commit is contained in:
@@ -1,100 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from networkx.algorithms.approximation import traveling_salesman as tsp
|
||||
|
||||
from environments.factory.base.objects import Agent
|
||||
from environments.helpers import points_to_graph
|
||||
from environments import helpers as h
|
||||
|
||||
from environments.helpers import Constants as BaseConstants
|
||||
from environments.helpers import EnvActions as BaseActions
|
||||
|
||||
|
||||
class Constants(BaseConstants):
|
||||
DIRT = 'DirtPile'
|
||||
|
||||
|
||||
class Actions(BaseActions):
|
||||
CLEAN_UP = 'do_cleanup_action'
|
||||
|
||||
|
||||
a = Actions
|
||||
c = Constants
|
||||
|
||||
future_planning = 7
|
||||
|
||||
|
||||
class TSPDirtAgent(Agent):
|
||||
|
||||
def __init__(self, env, *args,
|
||||
static_problem: bool = True, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.static_problem = static_problem
|
||||
self.local_optimization = True
|
||||
self._env = env
|
||||
self._floortile_graph = points_to_graph(self._env[c.FLOOR].positions,
|
||||
allow_euclidean_connections=self._env._actions.allow_diagonal_movement,
|
||||
allow_manhattan_connections=self._env._actions.allow_square_movement)
|
||||
self._static_route = None
|
||||
|
||||
def predict(self, *_, **__):
|
||||
if self._env[c.DIRT].by_pos(self.pos) is not None:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = a.CLEAN_UP
|
||||
elif any('door' in x.name.lower() for x in self.tile.guests):
|
||||
door = next(x for x in self.tile.guests if 'door' in x.name.lower())
|
||||
if door.is_closed:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = h.EnvActions.USE_DOOR
|
||||
else:
|
||||
action = self._predict_move()
|
||||
else:
|
||||
action = self._predict_move()
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action_obj = next(action_i for action_name, action_i in self._env.named_action_space.items() if action_name == action)
|
||||
return action_obj
|
||||
|
||||
def _predict_move(self):
|
||||
if len(self._env[c.DIRT]) >= 1:
|
||||
if self.static_problem:
|
||||
if not self._static_route:
|
||||
self._static_route = self.calculate_tsp_route()
|
||||
else:
|
||||
pass
|
||||
next_pos = self._static_route.pop(0)
|
||||
while next_pos == self.pos:
|
||||
next_pos = self._static_route.pop(0)
|
||||
else:
|
||||
if not self._static_route:
|
||||
self._static_route = self.calculate_tsp_route()[:7]
|
||||
next_pos = self._static_route.pop(0)
|
||||
while next_pos == self.pos:
|
||||
next_pos = self._static_route.pop(0)
|
||||
|
||||
diff = np.subtract(next_pos, self.pos)
|
||||
# Retrieve action based on the pos dif (like in: What do i have to do to get there?)
|
||||
try:
|
||||
action = next(action for action, pos_diff in h.ACTIONMAP.items()
|
||||
if (diff == pos_diff).all())
|
||||
except StopIteration:
|
||||
print('This Should not happen!')
|
||||
else:
|
||||
action = int(np.random.randint(self._env.action_space.n))
|
||||
return action
|
||||
|
||||
def calculate_tsp_route(self):
|
||||
if self.local_optimization:
|
||||
nodes = \
|
||||
[self.pos] + \
|
||||
[x for x in self._env[c.DIRT].positions if max(abs(np.subtract(x, self.pos))) < 3]
|
||||
try:
|
||||
while len(nodes) < 7:
|
||||
nodes += [next(x for x in self._env[c.DIRT].positions if x not in nodes)]
|
||||
except StopIteration:
|
||||
nodes = [self.pos] + self._env[c.DIRT].positions
|
||||
|
||||
else:
|
||||
nodes = [self.pos] + self._env[c.DIRT].positions
|
||||
route = tsp.traveling_salesman_problem(self._floortile_graph,
|
||||
nodes=nodes, cycle=True, method=tsp.greedy_tsp)
|
||||
return route
|
@@ -1,5 +1,5 @@
|
||||
import torch
|
||||
from typing import Union, List
|
||||
from typing import Union, List, Dict
|
||||
import numpy as np
|
||||
from torch.distributions import Categorical
|
||||
from algorithms.marl.memory import MARLActorCriticMemory
|
||||
@@ -74,7 +74,7 @@ class BaseActorCritic:
|
||||
actions = [Categorical(logits=logits).sample().item() for logits in out[nms.LOGITS]]
|
||||
return actions
|
||||
|
||||
def init_hidden(self) -> dict[ListOrTensor]:
|
||||
def init_hidden(self) -> Dict[str, ListOrTensor]:
|
||||
pass
|
||||
|
||||
def forward(self,
|
||||
@@ -82,7 +82,7 @@ class BaseActorCritic:
|
||||
actions: ListOrTensor,
|
||||
hidden_actor: ListOrTensor,
|
||||
hidden_critic: ListOrTensor
|
||||
) -> dict[ListOrTensor]:
|
||||
) -> Dict[str, ListOrTensor]:
|
||||
pass
|
||||
|
||||
@torch.no_grad()
|
||||
|
@@ -39,7 +39,7 @@ class LoopIAC(BaseActorCritic):
|
||||
def forward(self, observations, actions, hidden_actor, hidden_critic):
|
||||
outputs = [
|
||||
net(
|
||||
self._as_torch(observations[ag_i]).unsqueeze(0).unsqueeze(0), # agents x time
|
||||
self._as_torch(observations[ag_i]).unsqueeze(0).unsqueeze(0), # agent x time
|
||||
self._as_torch(actions[ag_i]).unsqueeze(0),
|
||||
hidden_actor[ag_i],
|
||||
hidden_critic[ag_i]
|
||||
|
@@ -46,7 +46,7 @@ class LoopMAPPO(LoopSNAC):
|
||||
|
||||
# monte carlo returns
|
||||
mc_returns = self.monte_carlo_returns(batch[nms.REWARD], batch[nms.DONE], gamma)
|
||||
mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) #todo: norm across agents ok?
|
||||
mc_returns = (mc_returns - mc_returns.mean()) / (mc_returns.std() + 1e-8) #todo: norm across agent ok?
|
||||
advantages = mc_returns - out[nms.CRITIC][:, :-1]
|
||||
|
||||
# policy loss
|
||||
|
@@ -120,7 +120,7 @@ class MARLActorCriticMemory(object):
|
||||
|
||||
def __getattr__(self, attr):
|
||||
all_attrs = [getattr(mem, attr) for mem in self.memories]
|
||||
return torch.cat(all_attrs, 0) # agents x time ...
|
||||
return torch.cat(all_attrs, 0) # agent x time ...
|
||||
|
||||
def chunk_dataloader(self, chunk_len, k):
|
||||
datasets = [ExperienceChunks(mem, chunk_len, k) for mem in self.memories]
|
||||
|
95
algorithms/static/TSP_base_agent.py
Normal file
95
algorithms/static/TSP_base_agent.py
Normal file
@@ -0,0 +1,95 @@
|
||||
from random import choice
|
||||
|
||||
import numpy as np
|
||||
|
||||
from networkx.algorithms.approximation import traveling_salesman as tsp
|
||||
|
||||
from environment.utils.helpers import points_to_graph
|
||||
|
||||
from modules.doors import constants as do
|
||||
from environment import constants as c
|
||||
from environment.utils.helpers import MOVEMAP
|
||||
|
||||
from abc import abstractmethod, ABC
|
||||
|
||||
future_planning = 7
|
||||
|
||||
|
||||
class TSPBaseAgent(ABC):
|
||||
|
||||
def __init__(self, state, agent_i, static_problem: bool = True):
|
||||
self.static_problem = static_problem
|
||||
self.local_optimization = True
|
||||
self._env = state
|
||||
self.state = self._env.state[c.AGENT][agent_i]
|
||||
self._floortile_graph = points_to_graph(self._env[c.FLOOR].positions)
|
||||
self._static_route = None
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, *_, **__) -> int:
|
||||
return 0
|
||||
|
||||
def _use_door_or_move(self, door, target):
|
||||
if door.is_closed:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = do.ACTION_DOOR_USE
|
||||
else:
|
||||
action = self._predict_move(target)
|
||||
return action
|
||||
|
||||
def calculate_tsp_route(self, target_identifier):
|
||||
positions = [x for x in self._env.state[target_identifier].positions if x != c.VALUE_NO_POS]
|
||||
if self.local_optimization:
|
||||
nodes = \
|
||||
[self.state.pos] + \
|
||||
[x for x in positions if max(abs(np.subtract(x, self.state.pos))) < 3]
|
||||
try:
|
||||
while len(nodes) < 7:
|
||||
nodes += [next(x for x in positions if x not in nodes)]
|
||||
except StopIteration:
|
||||
nodes = [self.state.pos] + positions
|
||||
|
||||
else:
|
||||
nodes = [self.state.pos] + positions
|
||||
route = tsp.traveling_salesman_problem(self._floortile_graph,
|
||||
nodes=nodes, cycle=True, method=tsp.greedy_tsp)
|
||||
return route
|
||||
|
||||
def _door_is_close(self):
|
||||
try:
|
||||
return next(y for x in self.state.tile.neighboring_floor for y in x.guests if do.DOOR in y.name)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def _has_targets(self, target_identifier):
|
||||
return bool(len([x for x in self._env.state[target_identifier] if x.pos != c.VALUE_NO_POS]) >= 1)
|
||||
|
||||
def _predict_move(self, target_identifier):
|
||||
if self._has_targets(target_identifier):
|
||||
if self.static_problem:
|
||||
if not self._static_route:
|
||||
self._static_route = self.calculate_tsp_route(target_identifier)
|
||||
else:
|
||||
pass
|
||||
next_pos = self._static_route.pop(0)
|
||||
while next_pos == self.state.pos:
|
||||
next_pos = self._static_route.pop(0)
|
||||
else:
|
||||
if not self._static_route:
|
||||
self._static_route = self.calculate_tsp_route(target_identifier)[:7]
|
||||
next_pos = self._static_route.pop(0)
|
||||
while next_pos == self.state.pos:
|
||||
next_pos = self._static_route.pop(0)
|
||||
|
||||
diff = np.subtract(next_pos, self.state.pos)
|
||||
# Retrieve action based on the pos dif (like in: What do I have to do to get there?)
|
||||
try:
|
||||
action = next(action for action, pos_diff in MOVEMAP.items() if np.all(diff == pos_diff))
|
||||
except StopIteration:
|
||||
print(f'diff: {diff}')
|
||||
print('This Should not happen!')
|
||||
action = choice(self.state.actions).name
|
||||
else:
|
||||
action = choice(self.state.actions).name
|
||||
# noinspection PyUnboundLocalVariable
|
||||
return action
|
27
algorithms/static/TSP_dirt_agent.py
Normal file
27
algorithms/static/TSP_dirt_agent.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from algorithms.static.TSP_base_agent import TSPBaseAgent
|
||||
|
||||
from modules.clean_up import constants as di
|
||||
|
||||
future_planning = 7
|
||||
|
||||
|
||||
class TSPDirtAgent(TSPBaseAgent):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(TSPDirtAgent, self).__init__(*args, **kwargs)
|
||||
|
||||
def predict(self, *_, **__):
|
||||
if self._env.state[di.DIRT].by_pos(self.state.pos) is not None:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = di.CLEAN_UP
|
||||
elif door := self._door_is_close():
|
||||
action = self._use_door_or_move(door, di.DIRT)
|
||||
else:
|
||||
action = self._predict_move(di.DIRT)
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
try:
|
||||
action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
|
||||
except (StopIteration, UnboundLocalError):
|
||||
print('Will not happen')
|
||||
raise EnvironmentError
|
||||
return action_obj
|
59
algorithms/static/TSP_item_agent.py
Normal file
59
algorithms/static/TSP_item_agent.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import numpy as np
|
||||
|
||||
from algorithms.static.TSP_base_agent import TSPBaseAgent
|
||||
|
||||
from modules.items import constants as i
|
||||
|
||||
future_planning = 7
|
||||
inventory_size = 3
|
||||
|
||||
MODE_GET = 'Mode_Get'
|
||||
MODE_BRING = 'Mode_Bring'
|
||||
|
||||
|
||||
class TSPItemAgent(TSPBaseAgent):
|
||||
|
||||
def __init__(self, *args, mode=MODE_GET, **kwargs):
|
||||
super(TSPItemAgent, self).__init__(*args, **kwargs)
|
||||
self.mode = mode
|
||||
|
||||
def predict(self, *_, **__):
|
||||
if self._env.state[i.ITEM].by_pos(self.state.pos) is not None:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = i.ITEM_ACTION
|
||||
elif self._env.state[i.DROP_OFF].by_pos(self.state.pos) is not None:
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
action = i.ITEM_ACTION
|
||||
elif door := self._door_is_close():
|
||||
action = self._use_door_or_move(door, i.DROP_OFF if self.mode == MODE_BRING else i.ITEM)
|
||||
else:
|
||||
action = self._choose()
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
try:
|
||||
action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
|
||||
except (StopIteration, UnboundLocalError):
|
||||
print('Will not happen')
|
||||
raise EnvironmentError
|
||||
# noinspection PyUnboundLocalVariable
|
||||
if self.mode == MODE_BRING and len(self._env[i.INVENTORY].by_entity(self.state)):
|
||||
pass
|
||||
elif self.mode == MODE_BRING and not len(self._env[i.INVENTORY].by_entity(self.state)):
|
||||
self.mode = MODE_GET
|
||||
elif self.mode == MODE_GET and len(self._env[i.INVENTORY].by_entity(self.state)) > inventory_size:
|
||||
self.mode = MODE_BRING
|
||||
else:
|
||||
pass
|
||||
return action_obj
|
||||
|
||||
def _choose(self):
|
||||
target = i.DROP_OFF if self.mode == MODE_BRING else i.ITEM
|
||||
if len(self._env.state[i.ITEM]) >= 1:
|
||||
action = self._predict_move(target)
|
||||
|
||||
elif len(self._env[i.INVENTORY].by_entity(self.state)):
|
||||
self.mode = MODE_BRING
|
||||
action = self._predict_move(target)
|
||||
else:
|
||||
action = int(np.random.randint(self._env.action_space.n))
|
||||
# noinspection PyUnboundLocalVariable
|
||||
return action
|
32
algorithms/static/TSP_target_agent.py
Normal file
32
algorithms/static/TSP_target_agent.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from algorithms.static.TSP_base_agent import TSPBaseAgent
|
||||
|
||||
from modules.destinations import constants as d
|
||||
from modules.doors import constants as do
|
||||
|
||||
future_planning = 7
|
||||
|
||||
|
||||
class TSPTargetAgent(TSPBaseAgent):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(TSPTargetAgent, self).__init__(*args, **kwargs)
|
||||
|
||||
def _handle_doors(self):
|
||||
|
||||
try:
|
||||
return next(y for x in self.state.tile.neighboring_floor for y in x.guests if do.DOOR in y.name)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def predict(self, *_, **__):
|
||||
if door := self._door_is_close():
|
||||
action = self._use_door_or_move(door, d.DESTINATION)
|
||||
else:
|
||||
action = self._predict_move(d.DESTINATION)
|
||||
# Translate the action_object to an integer to have the same output as any other model
|
||||
try:
|
||||
action_obj = next(action_i for action_i, a in enumerate(self.state.actions) if a.name == action)
|
||||
except (StopIteration, UnboundLocalError):
|
||||
print('Will not happen')
|
||||
return action_obj
|
||||
|
0
algorithms/static/__init__.py
Normal file
0
algorithms/static/__init__.py
Normal file
15
algorithms/static/random_agent.py
Normal file
15
algorithms/static/random_agent.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from random import randint
|
||||
|
||||
from algorithms.static.TSP_base_agent import TSPBaseAgent
|
||||
|
||||
future_planning = 7
|
||||
|
||||
|
||||
class TSPRandomAgent(TSPBaseAgent):
|
||||
|
||||
def __init__(self, n_actions, *args, **kwargs):
|
||||
super(TSPRandomAgent, self).__init__(*args, **kwargs)
|
||||
self.n_action = n_actions
|
||||
|
||||
def predict(self, *_, **__):
|
||||
return randint(0, self.n_action - 1)
|
@@ -1,4 +1,3 @@
|
||||
import re
|
||||
import torch
|
||||
import numpy as np
|
||||
import yaml
|
||||
|
Reference in New Issue
Block a user