added documentation for agents

2026-05-28 13:33:23 +02:00 · 2024-01-04 17:48:32 +01:00
parent a8f86e1c9c
commit 3cd5caed93
5 changed files with 106 additions and 0 deletions
@@ -17,6 +17,16 @@ future_planning = 7
 class TSPBaseAgent(ABC):
    def __init__(self, state, agent_i, static_problem: bool = True):
        """
        Abstract base class for agents in the environment.
        :param state: The environment state
        :type state:
        :param agent_i: Index of the agent
        :type agent_i: int
        :param static_problem: Indicates whether the TSP is a static problem. (Default: True)
        :type static_problem: bool
        """
        self.static_problem = static_problem
        self.local_optimization = True
        self._env = state
@@ -26,9 +36,25 @@ class TSPBaseAgent(ABC):
    @abstractmethod
    def predict(self, *_, **__) -> int:
        """
        Predicts the next action based on the environment state.
        :return: Predicted action.
        :rtype: int
        """
        return 0
    def _use_door_or_move(self, door, target):
        """
        Helper method to decide whether to use a door or move towards a target.
        :param door: Door entity.
        :type door: Door
        :param target: Target type. For example 'Dirt', 'Dropoff' or 'Destination'
        :type target: str
        :return: Action to perform (use door or move).
        """
        if door.is_closed:
            # Translate the action_object to an integer to have the same output as any other model
            action = do.ACTION_DOOR_USE
@@ -37,6 +63,15 @@ class TSPBaseAgent(ABC):
        return action
    def calculate_tsp_route(self, target_identifier):
        """
        Calculate the TSP route to reach a target.
        :param target_identifier: Identifier of the target entity
        :type target_identifier: str
        :return: TSP route
        :rtype: List[int]
        """
        positions = [x for x in self._env.state[target_identifier].positions if x != c.VALUE_NO_POS]
        if self.local_optimization:
            nodes = \
@@ -55,6 +90,15 @@ class TSPBaseAgent(ABC):
        return route
    def _door_is_close(self, state):
        """
        Check if a door is close to the agent's position.
        :param state: Current environment state.
        :type state: Gamestate
        :return: Closest door entity or None if no door is close.
        :rtype: Door | None
        """
        try:
            return next(y for x in state.entities.neighboring_positions(self.state.pos)
                        for y in state.entities.pos_dict[x] if do.DOOR in y.name)
@@ -62,9 +106,27 @@ class TSPBaseAgent(ABC):
            return None
    def _has_targets(self, target_identifier):
        """
        Check if there are targets available in the environment.
        :param target_identifier: Identifier of the target entity.
        :type target_identifier: str
        :return: True if there are targets, False otherwise.
        :rtype: bool
        """
        return bool(len([x for x in self._env.state[target_identifier] if x.pos != c.VALUE_NO_POS]) >= 1)
    def _predict_move(self, target_identifier):
        """
           Predict the next move based on the given target.
           :param target_identifier: Identifier of the target entity.
           :type target_identifier: str
           :return: Predicted action.
           :rtype: int
           """
        if self._has_targets(target_identifier):
            if self.static_problem:
                if not self._static_route:
@@ -8,9 +8,18 @@ future_planning = 7
 class TSPDirtAgent(TSPBaseAgent):
    def __init__(self, *args, **kwargs):
        """
        Initializes a TSPDirtAgent that aims to clean dirt in the environment.
        """
        super(TSPDirtAgent, self).__init__(*args, **kwargs)
    def predict(self, *_, **__):
        """
        Predicts the next action based on the presence of dirt in the environment.
        :return: Predicted action.
        :rtype: int
        """
        if self._env.state[di.DIRT].by_pos(self.state.pos) is not None:
            # Translate the action_object to an integer to have the same output as any other model
            action = di.CLEAN_UP
@@ -14,6 +14,12 @@ MODE_BRING      = 'Mode_Bring'
 class TSPItemAgent(TSPBaseAgent):
    def __init__(self, *args, mode=MODE_GET, **kwargs):
        """
        Initializes a TSPItemAgent that colects items in the environment, stores them in his inventory and drops them off
        at a drop-off location.
        :param mode: Mode of the agent, either MODE_GET or MODE_BRING.
        """
        super(TSPItemAgent, self).__init__(*args, **kwargs)
        self.mode = mode
@@ -46,6 +52,12 @@ class TSPItemAgent(TSPBaseAgent):
        return action_obj
    def _choose(self):
        """
        Internal Usage. Chooses the action based on the agent's mode and the environment state.
        :return: Chosen action.
        :rtype: int
        """
        target = i.DROP_OFF if self.mode == MODE_BRING else i.ITEM
        if len(self._env.state[i.ITEM]) >= 1:
            action = self._predict_move(target)
@@ -9,9 +9,20 @@ future_planning = 7
 class TSPTargetAgent(TSPBaseAgent):
    def __init__(self, *args, **kwargs):
        """
        Initializes a TSPTargetAgent that aims to reach destinations.
        """
        super(TSPTargetAgent, self).__init__(*args, **kwargs)
    def _handle_doors(self, state):
        """
        Internal Usage. Handles the doors in the environment.
        :param state: The current environment state.
        :type state: marl_factory_grid.utils.states.Gamestate
        :return: Closest door entity or None if no doors are close.
        :rtype: marl_factory_grid.environment.entity.object.Entity or None
        """
        try:
            return next(y for x in state.entities.neighboring_positions(self.state.pos)
@@ -8,8 +8,20 @@ future_planning = 7
 class TSPRandomAgent(TSPBaseAgent):
    def __init__(self, n_actions, *args, **kwargs):
        """
        Initializes a TSPRandomAgent that performs random actions from within his action space.
        :param n_actions: Number of possible actions.
        :type n_actions: int
        """
        super(TSPRandomAgent, self).__init__(*args, **kwargs)
        self.n_action = n_actions
    def predict(self, *_, **__):
        """
        Predicts the next action randomly.
        :return: Predicted action.
        :rtype: int
        """
        return randint(0, self.n_action - 1)