added documentation for agents

2025-06-18 18:52:52 +02:00 · 2024-01-04 17:48:32 +01:00
parent a8f86e1c9c
commit 3cd5caed93
5 changed files with 106 additions and 0 deletions
--- a/marl_factory_grid/algorithms/static/TSP_base_agent.py
+++ b/marl_factory_grid/algorithms/static/TSP_base_agent.py
@ -17,6 +17,16 @@ future_planning = 7
 class TSPBaseAgent(ABC):

    def __init__(self, state, agent_i, static_problem: bool = True):
+        """
+        Abstract base class for agents in the environment.
+
+        :param state: The environment state
+        :type state:
+        :param agent_i: Index of the agent
+        :type agent_i: int
+        :param static_problem: Indicates whether the TSP is a static problem. (Default: True)
+        :type static_problem: bool
+        """
        self.static_problem = static_problem
        self.local_optimization = True
        self._env = state
@ -26,9 +36,25 @@ class TSPBaseAgent(ABC):

    @abstractmethod
    def predict(self, *_, **__) -> int:
+        """
+        Predicts the next action based on the environment state.
+
+        :return: Predicted action.
+        :rtype: int
+        """
        return 0

    def _use_door_or_move(self, door, target):
+        """
+        Helper method to decide whether to use a door or move towards a target.
+
+        :param door: Door entity.
+        :type door: Door
+        :param target: Target type. For example 'Dirt', 'Dropoff' or 'Destination'
+        :type target: str
+
+        :return: Action to perform (use door or move).
+        """
        if door.is_closed:
            # Translate the action_object to an integer to have the same output as any other model
            action = do.ACTION_DOOR_USE
@ -37,6 +63,15 @@ class TSPBaseAgent(ABC):
        return action

    def calculate_tsp_route(self, target_identifier):
+        """
+        Calculate the TSP route to reach a target.
+
+        :param target_identifier: Identifier of the target entity
+        :type target_identifier: str
+
+        :return: TSP route
+        :rtype: List[int]
+        """
        positions = [x for x in self._env.state[target_identifier].positions if x != c.VALUE_NO_POS]
        if self.local_optimization:
            nodes = \
@ -55,6 +90,15 @@ class TSPBaseAgent(ABC):
        return route

    def _door_is_close(self, state):
+        """
+        Check if a door is close to the agent's position.
+
+        :param state: Current environment state.
+        :type state: Gamestate
+
+        :return: Closest door entity or None if no door is close.
+        :rtype: Door | None
+        """
        try:
            return next(y for x in state.entities.neighboring_positions(self.state.pos)
                        for y in state.entities.pos_dict[x] if do.DOOR in y.name)
@ -62,9 +106,27 @@ class TSPBaseAgent(ABC):
            return None

    def _has_targets(self, target_identifier):
+        """
+        Check if there are targets available in the environment.
+
+        :param target_identifier: Identifier of the target entity.
+        :type target_identifier: str
+
+        :return: True if there are targets, False otherwise.
+        :rtype: bool
+        """
        return bool(len([x for x in self._env.state[target_identifier] if x.pos != c.VALUE_NO_POS]) >= 1)

    def _predict_move(self, target_identifier):
+        """
+           Predict the next move based on the given target.
+
+           :param target_identifier: Identifier of the target entity.
+           :type target_identifier: str
+
+           :return: Predicted action.
+           :rtype: int
+           """
        if self._has_targets(target_identifier):
            if self.static_problem:
                if not self._static_route:
--- a/marl_factory_grid/algorithms/static/TSP_dirt_agent.py
+++ b/marl_factory_grid/algorithms/static/TSP_dirt_agent.py
@ -8,9 +8,18 @@ future_planning = 7
 class TSPDirtAgent(TSPBaseAgent):

    def __init__(self, *args, **kwargs):
+        """
+        Initializes a TSPDirtAgent that aims to clean dirt in the environment.
+        """
        super(TSPDirtAgent, self).__init__(*args, **kwargs)

    def predict(self, *_, **__):
+        """
+        Predicts the next action based on the presence of dirt in the environment.
+
+        :return: Predicted action.
+        :rtype: int
+        """
        if self._env.state[di.DIRT].by_pos(self.state.pos) is not None:
            # Translate the action_object to an integer to have the same output as any other model
            action = di.CLEAN_UP
--- a/marl_factory_grid/algorithms/static/TSP_item_agent.py
+++ b/marl_factory_grid/algorithms/static/TSP_item_agent.py
@ -14,6 +14,12 @@ MODE_BRING      = 'Mode_Bring'
 class TSPItemAgent(TSPBaseAgent):

    def __init__(self, *args, mode=MODE_GET, **kwargs):
+        """
+        Initializes a TSPItemAgent that colects items in the environment, stores them in his inventory and drops them off
+        at a drop-off location.
+
+        :param mode: Mode of the agent, either MODE_GET or MODE_BRING.
+        """
        super(TSPItemAgent, self).__init__(*args, **kwargs)
        self.mode = mode

@ -46,6 +52,12 @@ class TSPItemAgent(TSPBaseAgent):
        return action_obj

    def _choose(self):
+        """
+        Internal Usage. Chooses the action based on the agent's mode and the environment state.
+
+        :return: Chosen action.
+        :rtype: int
+        """
        target = i.DROP_OFF if self.mode == MODE_BRING else i.ITEM
        if len(self._env.state[i.ITEM]) >= 1:
            action = self._predict_move(target)
--- a/marl_factory_grid/algorithms/static/TSP_target_agent.py
+++ b/marl_factory_grid/algorithms/static/TSP_target_agent.py
@ -9,9 +9,20 @@ future_planning = 7
 class TSPTargetAgent(TSPBaseAgent):

    def __init__(self, *args, **kwargs):
+        """
+        Initializes a TSPTargetAgent that aims to reach destinations.
+        """
        super(TSPTargetAgent, self).__init__(*args, **kwargs)

    def _handle_doors(self, state):
+        """
+        Internal Usage. Handles the doors in the environment.
+
+        :param state: The current environment state.
+        :type state: marl_factory_grid.utils.states.Gamestate
+        :return: Closest door entity or None if no doors are close.
+        :rtype: marl_factory_grid.environment.entity.object.Entity or None
+        """

        try:
            return next(y for x in state.entities.neighboring_positions(self.state.pos)
--- a/marl_factory_grid/algorithms/static/random_agent.py
+++ b/marl_factory_grid/algorithms/static/random_agent.py
@ -8,8 +8,20 @@ future_planning = 7
 class TSPRandomAgent(TSPBaseAgent):

    def __init__(self, n_actions, *args, **kwargs):
+        """
+        Initializes a TSPRandomAgent that performs random actions from within his action space.
+
+        :param n_actions: Number of possible actions.
+        :type n_actions: int
+        """
        super(TSPRandomAgent, self).__init__(*args, **kwargs)
        self.n_action = n_actions

    def predict(self, *_, **__):
+        """
+        Predicts the next action randomly.
+
+        :return: Predicted action.
+        :rtype: int
+        """
        return randint(0, self.n_action - 1)