mirror of
				https://github.com/illiumst/marl-factory-grid.git
				synced 2025-11-01 04:57:26 +01:00 
			
		
		
		
	Stable Baseline Running
This commit is contained in:
		| @@ -1,3 +1,4 @@ | ||||
| import abc | ||||
| from typing import List, Union, Iterable | ||||
|  | ||||
| import gym | ||||
| @@ -61,17 +62,14 @@ class BaseFactory(gym.Env): | ||||
|         self.allow_horizontal_movement = True | ||||
|         self.allow_no_OP = True | ||||
|         self._monitor_list = list() | ||||
|         self._registered_actions = self.movement_actions + int(self.allow_no_OP) | ||||
|         self._registered_actions = self.movement_actions + int(self.allow_no_OP) + self.register_additional_actions() | ||||
|         self.level = h.one_hot_level( | ||||
|             h.parse_level(Path(__file__).parent / h.LEVELS_DIR / f'{level}.txt') | ||||
|         ) | ||||
|         self.slice_strings = {0: 'level', **{i: f'agent#{i}' for i in range(1, self.n_agents+1)}} | ||||
|         self.reset() | ||||
|  | ||||
|     def __init_subclass__(cls): | ||||
|         print(cls) | ||||
|  | ||||
|     def register_additional_actions(self): | ||||
|     def register_additional_actions(self) -> int: | ||||
|         raise NotImplementedError('Please register additional actions ') | ||||
|  | ||||
|     def reset(self) -> (np.ndarray, int, bool, dict): | ||||
| @@ -111,6 +109,8 @@ class BaseFactory(gym.Env): | ||||
|             agent_i_state = AgentState(agent_i, action) | ||||
|             if self._is_moving_action(action): | ||||
|                 pos, valid = self.move_or_colide(agent_i, action) | ||||
|             elif self._is_no_op(action): | ||||
|                 pos, valid = self.agent_i_position(agent_i), True | ||||
|             else: | ||||
|                 pos, valid = self.additional_actions(agent_i, action) | ||||
|             # Update state accordingly | ||||
| @@ -129,10 +129,10 @@ class BaseFactory(gym.Env): | ||||
|         return self.state, self.cumulative_reward, self.done, info | ||||
|  | ||||
|     def _is_moving_action(self, action): | ||||
|         if action < self.movement_actions: | ||||
|             return True | ||||
|         else: | ||||
|             return False | ||||
|         return action < self.movement_actions | ||||
|  | ||||
|     def _is_no_op(self, action): | ||||
|         return self.allow_no_OP and (action - self.movement_actions) == 0 | ||||
|  | ||||
|     def check_all_collisions(self, agent_states: List[AgentState], collisions: int) -> np.ndarray: | ||||
|         collision_vecs = np.zeros((len(agent_states), collisions))  # n_agents x n_slices | ||||
|   | ||||
| @@ -1,49 +1,157 @@ | ||||
| from collections import OrderedDict | ||||
| from dataclasses import dataclass | ||||
| from typing import List | ||||
| import random | ||||
|  | ||||
| import numpy as np | ||||
| from environments.factory.base_factory import BaseFactory, FactoryMonitor | ||||
|  | ||||
| from environments.factory.base_factory import BaseFactory, AgentState | ||||
| from environments import helpers as h | ||||
|  | ||||
| from environments.factory.renderer import Renderer | ||||
| from environments.factory.renderer import Entity | ||||
| from environments.logging.monitor import MonitorCallback | ||||
|  | ||||
| DIRT_INDEX = -1 | ||||
|  | ||||
|  | ||||
| @dataclass | ||||
| class DirtProperties: | ||||
|     clean_amount = 0.25 | ||||
|     max_spawn_ratio = 0.1 | ||||
|     gain_amount = 0.1 | ||||
|     spawn_frequency = 5 | ||||
|  | ||||
|  | ||||
| class SimpleFactory(BaseFactory): | ||||
|     def __init__(self, *args, max_dirt=5, **kwargs): | ||||
|         self.max_dirt = max_dirt | ||||
|  | ||||
|     def register_additional_actions(self): | ||||
|         return 1 | ||||
|  | ||||
|     def _is_clean_up_action(self, action): | ||||
|         return self.action_space.n - 1 == action | ||||
|  | ||||
|     def __init__(self, *args, dirt_properties: DirtProperties, **kwargs): | ||||
|         self._dirt_properties = dirt_properties | ||||
|         super(SimpleFactory, self).__init__(*args, **kwargs) | ||||
|         self.slice_strings.update({self.state.shape[0]-1: 'dirt'}) | ||||
|         self.renderer = None  # expensive - dont use it when not required ! | ||||
|  | ||||
|     def spawn_dirt(self): | ||||
|         free_for_dirt = self.free_cells | ||||
|         for x, y in free_for_dirt[:self.max_dirt]:  # randomly distribute dirt across the grid | ||||
|             self.state[-1, x, y] = 1 | ||||
|     def render(self): | ||||
|         if not self.renderer:  # lazy init | ||||
|             height, width = self.state.shape[1:] | ||||
|             self.renderer = Renderer(width, height, view_radius=2) | ||||
|  | ||||
|     def reset(self): | ||||
|         state, r, done, _ = super().reset() | ||||
|         dirt      = [Entity('dirt', [x, y], min(0.15+self.state[DIRT_INDEX, x, y], 1.5), 'scale') | ||||
|                      for x, y in np.argwhere(self.state[DIRT_INDEX] > h.IS_FREE_CELL)] | ||||
|         walls     = [Entity('wall', pos) for pos in np.argwhere(self.state[h.LEVEL_IDX] > h.IS_FREE_CELL)] | ||||
|  | ||||
|         def asset_str(agent): | ||||
|             cols = ' '.join([self.slice_strings[j] for j in agent.collisions]) | ||||
|             if 'agent' in cols: | ||||
|                 return 'agent_collision' | ||||
|             elif not agent.action_valid or 'level' in cols or 'agent' in cols: | ||||
|                 return f'agent{agent.i + 1}violation' | ||||
|             elif self._is_clean_up_action(agent.action): | ||||
|                 return f'agent{agent.i + 1}valid' | ||||
|             else: | ||||
|                 return f'agent{agent.i + 1}' | ||||
|  | ||||
|         agents = {f'agent{i+1}': [Entity(asset_str(agent), agent.pos)] | ||||
|                   for i, agent in enumerate(self.agent_states)} | ||||
|         self.renderer.render(OrderedDict(dirt=dirt, wall=walls, **agents)) | ||||
|  | ||||
|     def spawn_dirt(self) -> None: | ||||
|         free_for_dirt = self.free_cells(excluded_slices=DIRT_INDEX) | ||||
|  | ||||
|         # randomly distribute dirt across the grid | ||||
|         n_dirt_tiles = int(random.uniform(0, self._dirt_properties.max_spawn_ratio) * len(free_for_dirt)) | ||||
|         for x, y in free_for_dirt[:n_dirt_tiles]: | ||||
|             self.state[DIRT_INDEX, x, y] += self._dirt_properties.gain_amount | ||||
|  | ||||
|     def clean_up(self, pos: (int, int)) -> ((int, int), bool): | ||||
|         new_dirt_amount = self.state[DIRT_INDEX][pos] - self._dirt_properties.clean_amount | ||||
|         cleanup_was_sucessfull: bool | ||||
|         if self.state[DIRT_INDEX][pos] == h.IS_FREE_CELL: | ||||
|             cleanup_was_sucessfull = False | ||||
|             return pos, cleanup_was_sucessfull | ||||
|         else: | ||||
|             cleanup_was_sucessfull = True | ||||
|             self.state[DIRT_INDEX][pos] = max(new_dirt_amount, h.IS_FREE_CELL) | ||||
|             return pos, cleanup_was_sucessfull | ||||
|  | ||||
|     def step(self, actions): | ||||
|         _, _, _, info = super(SimpleFactory, self).step(actions) | ||||
|         if not self.next_dirt_spawn: | ||||
|             self.spawn_dirt() | ||||
|             self.next_dirt_spawn = self._dirt_properties.spawn_frequency | ||||
|         else: | ||||
|             self.next_dirt_spawn -= 1 | ||||
|         return self.state, self.cumulative_reward, self.done, info | ||||
|  | ||||
|     def additional_actions(self, agent_i: int, action: int) -> ((int, int), bool): | ||||
|         if action != self._is_moving_action(action): | ||||
|             if self._is_clean_up_action(action): | ||||
|                 agent_i_pos = self.agent_i_position(agent_i) | ||||
|                 _, valid = self.clean_up(agent_i_pos) | ||||
|                 if valid: | ||||
|                     print(f'Agent {agent_i} did just clean up some dirt at {agent_i_pos}.') | ||||
|                     self.monitor.add('dirt_cleaned', self._dirt_properties.clean_amount) | ||||
|                 else: | ||||
|                     print(f'Agent {agent_i} just tried to clean up some dirt at {agent_i_pos}, but was unsucsessfull.') | ||||
|                     self.monitor.add('failed_cleanup_attempt', 1) | ||||
|                 return agent_i_pos, valid | ||||
|             else: | ||||
|                 raise RuntimeError('This should not happen!!!') | ||||
|         else: | ||||
|             raise RuntimeError('This should not happen!!!') | ||||
|  | ||||
|     def reset(self) -> (np.ndarray, int, bool, dict): | ||||
|         _ = super().reset()  # state, reward, done, info ... = | ||||
|         dirt_slice = np.zeros((1, *self.state.shape[1:])) | ||||
|         self.state = np.concatenate((self.state, dirt_slice))  # dirt is now the last slice | ||||
|         self.spawn_dirt() | ||||
|         # Always: This should return state | ||||
|         self.next_dirt_spawn = self._dirt_properties.spawn_frequency | ||||
|         return self.state | ||||
|  | ||||
|     def calculate_reward(self, agent_states): | ||||
|     def calculate_reward(self, agent_states: List[AgentState]) -> (int, dict): | ||||
|         # TODO: What reward to use? | ||||
|         current_dirt_amount = self.state[DIRT_INDEX].sum() | ||||
|         dirty_tiles = len(np.nonzero(self.state[DIRT_INDEX])) | ||||
|  | ||||
|         try: | ||||
|             this_step_reward = -(dirty_tiles / current_dirt_amount) | ||||
|         except ZeroDivisionError: | ||||
|             this_step_reward = 0 | ||||
|  | ||||
|         for agent_state in agent_states: | ||||
|             collisions = agent_state.collisions | ||||
|             entities = [self.slice_strings[entity] for entity in collisions] | ||||
|             if entities: | ||||
|                 for entity in entities: | ||||
|                     self.monitor.add(f'agent_{agent_state.i}_collision_{entity}', 1) | ||||
|             print(f't = {self.steps}\tAgent {agent_state.i} has collisions with ' | ||||
|                       f'{entities}') | ||||
|         return 0, {} | ||||
|                   f'{[self.slice_strings[entity] for entity in collisions if entity != self.string_slices["dirt"]]}') | ||||
|             if self._is_clean_up_action(agent_state.action) and agent_state.action_valid: | ||||
|                 this_step_reward += 1 | ||||
|  | ||||
|             for entity in collisions: | ||||
|                 if entity != self.string_slices["dirt"]: | ||||
|                     self.monitor.add(f'agent_{agent_state.i}_vs_{self.slice_strings[entity]}', 1) | ||||
|         self.monitor.set('dirt_amount', current_dirt_amount) | ||||
|         self.monitor.set('dirty_tiles', dirty_tiles) | ||||
|         return this_step_reward, {} | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     import random | ||||
|     factory = SimpleFactory(n_agents=1, max_dirt=8) | ||||
|     monitor_list = list() | ||||
|     for epoch in range(5): | ||||
|         random_actions = [random.randint(0, 7) for _ in range(200)] | ||||
|         state, r, done, _ = factory.reset() | ||||
|         for action in random_actions: | ||||
|             state, r, done, info = factory.step(action) | ||||
|         monitor_list.append(factory.monitor) | ||||
|  | ||||
|         print(f'Factory run done, reward is:\n    {r}') | ||||
|         print(f'There have been the following collisions: \n {dict(factory.monitor)}') | ||||
|     render = True | ||||
|  | ||||
|     dirt_props = DirtProperties() | ||||
|     factory = SimpleFactory(n_agents=2, dirt_properties=dirt_props) | ||||
|     with MonitorCallback(factory): | ||||
|         for epoch in range(100): | ||||
|             random_actions = [(random.randint(0, 8), random.randint(0, 8)) for _ in range(200)] | ||||
|             env_state, reward, done_bool, _ = factory.reset() | ||||
|             for agent_i_action in random_actions: | ||||
|                 env_state, reward, done_bool, info_obj = factory.step(agent_i_action) | ||||
|                 if render: | ||||
|                     factory.render() | ||||
|                 if done_bool: | ||||
|                     break | ||||
|             print(f'Factory run {epoch} done, reward is:\n    {reward}') | ||||
|   | ||||
| @@ -1,158 +0,0 @@ | ||||
| from collections import OrderedDict | ||||
| from dataclasses import dataclass | ||||
| from typing import List | ||||
| import random | ||||
|  | ||||
| import numpy as np | ||||
|  | ||||
| from environments.factory.base_factory import BaseFactory, AgentState | ||||
| from environments import helpers as h | ||||
|  | ||||
| from environments.factory.renderer import Renderer | ||||
| from environments.factory.renderer import Entity | ||||
| from environments.logging.monitor import MonitorCallback | ||||
|  | ||||
| DIRT_INDEX = -1 | ||||
|  | ||||
|  | ||||
| @dataclass | ||||
| class DirtProperties: | ||||
|     clean_amount = 0.25 | ||||
|     max_spawn_ratio = 0.1 | ||||
|     gain_amount = 0.1 | ||||
|     spawn_frequency = 5 | ||||
|  | ||||
|  | ||||
| class GettingDirty(BaseFactory): | ||||
|  | ||||
|     def register_additional_actions(self): | ||||
|         self._registered_actions += 1 | ||||
|         return True | ||||
|  | ||||
|     def _is_clean_up_action(self, action): | ||||
|         return self.action_space.n - 1 == action | ||||
|  | ||||
|     def __init__(self, *args, dirt_properties: DirtProperties, **kwargs): | ||||
|         self._dirt_properties = dirt_properties | ||||
|         super(GettingDirty, self).__init__(*args, **kwargs) | ||||
|         self.slice_strings.update({self.state.shape[0]-1: 'dirt'}) | ||||
|         self.renderer = None  # expensive - dont use it when not required ! | ||||
|  | ||||
|     def render(self): | ||||
|         if not self.renderer:  # lazy init | ||||
|             height, width = self.state.shape[1:] | ||||
|             self.renderer = Renderer(width, height, view_radius=2) | ||||
|  | ||||
|         dirt      = [Entity('dirt', [x, y], min(0.15+self.state[DIRT_INDEX, x, y], 1.5), 'scale') | ||||
|                      for x, y in np.argwhere(self.state[DIRT_INDEX] > h.IS_FREE_CELL)] | ||||
|         walls     = [Entity('wall', pos) for pos in np.argwhere(self.state[h.LEVEL_IDX] > h.IS_FREE_CELL)] | ||||
|  | ||||
|         def asset_str(agent): | ||||
|             cols = ' '.join([self.slice_strings[j] for j in agent.collisions]) | ||||
|             if 'agent' in cols: | ||||
|                 return 'agent_collision' | ||||
|             elif not agent.action_valid or 'level' in cols or 'agent' in cols: | ||||
|                 return f'agent{agent.i + 1}violation' | ||||
|             elif self._is_clean_up_action(agent.action): | ||||
|                 return f'agent{agent.i + 1}valid' | ||||
|             else: | ||||
|                 return f'agent{agent.i + 1}' | ||||
|  | ||||
|         agents = {f'agent{i+1}': [Entity(asset_str(agent), agent.pos)] | ||||
|                   for i, agent in enumerate(self.agent_states)} | ||||
|         self.renderer.render(OrderedDict(dirt=dirt, wall=walls, **agents)) | ||||
|  | ||||
|     def spawn_dirt(self) -> None: | ||||
|         free_for_dirt = self.free_cells(excluded_slices=DIRT_INDEX) | ||||
|  | ||||
|         # randomly distribute dirt across the grid | ||||
|         n_dirt_tiles = int(random.uniform(0, self._dirt_properties.max_spawn_ratio) * len(free_for_dirt)) | ||||
|         for x, y in free_for_dirt[:n_dirt_tiles]: | ||||
|             self.state[DIRT_INDEX, x, y] += self._dirt_properties.gain_amount | ||||
|  | ||||
|     def clean_up(self, pos: (int, int)) -> ((int, int), bool): | ||||
|         new_dirt_amount = self.state[DIRT_INDEX][pos] - self._dirt_properties.clean_amount | ||||
|         cleanup_was_sucessfull: bool | ||||
|         if self.state[DIRT_INDEX][pos] == h.IS_FREE_CELL: | ||||
|             cleanup_was_sucessfull = False | ||||
|             return pos, cleanup_was_sucessfull | ||||
|         else: | ||||
|             cleanup_was_sucessfull = True | ||||
|             self.state[DIRT_INDEX][pos] = max(new_dirt_amount, h.IS_FREE_CELL) | ||||
|             return pos, cleanup_was_sucessfull | ||||
|  | ||||
|     def step(self, actions): | ||||
|         _, _, _, info = super(GettingDirty, self).step(actions) | ||||
|         if not self.next_dirt_spawn: | ||||
|             self.spawn_dirt() | ||||
|             self.next_dirt_spawn = self._dirt_properties.spawn_frequency | ||||
|         else: | ||||
|             self.next_dirt_spawn -= 1 | ||||
|         return self.state, self.cumulative_reward, self.done, info | ||||
|  | ||||
|     def additional_actions(self, agent_i: int, action: int) -> ((int, int), bool): | ||||
|         if action != self._is_moving_action(action): | ||||
|             if self._is_clean_up_action(action): | ||||
|                 agent_i_pos = self.agent_i_position(agent_i) | ||||
|                 _, valid = self.clean_up(agent_i_pos) | ||||
|                 if valid: | ||||
|                     print(f'Agent {agent_i} did just clean up some dirt at {agent_i_pos}.') | ||||
|                     self.monitor.add('dirt_cleaned', self._dirt_properties.clean_amount) | ||||
|                 else: | ||||
|                     print(f'Agent {agent_i} just tried to clean up some dirt at {agent_i_pos}, but was unsucsessfull.') | ||||
|                     self.monitor.add('failed_cleanup_attempt', 1) | ||||
|                 return agent_i_pos, valid | ||||
|             else: | ||||
|                 raise RuntimeError('This should not happen!!!') | ||||
|         else: | ||||
|             raise RuntimeError('This should not happen!!!') | ||||
|  | ||||
|     def reset(self) -> (np.ndarray, int, bool, dict): | ||||
|         _ = super().reset()  # state, reward, done, info ... = | ||||
|         dirt_slice = np.zeros((1, *self.state.shape[1:])) | ||||
|         self.state = np.concatenate((self.state, dirt_slice))  # dirt is now the last slice | ||||
|         self.spawn_dirt() | ||||
|         self.next_dirt_spawn = self._dirt_properties.spawn_frequency | ||||
|         return self.state | ||||
|  | ||||
|     def calculate_reward(self, agent_states: List[AgentState]) -> (int, dict): | ||||
|         # TODO: What reward to use? | ||||
|         current_dirt_amount = self.state[DIRT_INDEX].sum() | ||||
|         dirty_tiles = len(np.nonzero(self.state[DIRT_INDEX])) | ||||
|  | ||||
|         try: | ||||
|             this_step_reward = -(dirty_tiles / current_dirt_amount) | ||||
|         except ZeroDivisionError: | ||||
|             this_step_reward = 0 | ||||
|  | ||||
|         for agent_state in agent_states: | ||||
|             collisions = agent_state.collisions | ||||
|             print(f't = {self.steps}\tAgent {agent_state.i} has collisions with ' | ||||
|                   f'{[self.slice_strings[entity] for entity in collisions if entity != self.string_slices["dirt"]]}') | ||||
|             if self._is_clean_up_action(agent_state.action) and agent_state.action_valid: | ||||
|                 this_step_reward += 1 | ||||
|  | ||||
|             for entity in collisions: | ||||
|                 if entity != self.string_slices["dirt"]: | ||||
|                     self.monitor.add(f'agent_{agent_state.i}_vs_{self.slice_strings[entity]}', 1) | ||||
|         self.monitor.set('dirt_amount', current_dirt_amount) | ||||
|         self.monitor.set('dirty_tiles', dirty_tiles) | ||||
|         return this_step_reward, {} | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     render = True | ||||
|  | ||||
|     dirt_props = DirtProperties() | ||||
|     factory = GettingDirty(n_agents=2, dirt_properties=dirt_props) | ||||
|     with MonitorCallback(factory): | ||||
|         for epoch in range(100): | ||||
|             random_actions = [(random.randint(0, 8), random.randint(0, 8)) for _ in range(200)] | ||||
|             env_state, reward, done_bool, _ = factory.reset() | ||||
|             for agent_i_action in random_actions: | ||||
|                 env_state, reward, done_bool, info_obj = factory.step(agent_i_action) | ||||
|                 if render: | ||||
|                     factory.render() | ||||
|                 if done_bool: | ||||
|                     break | ||||
|             print(f'Factory run {epoch} done, reward is:\n    {reward}') | ||||
		Reference in New Issue
	
	Block a user
	 steffen-illium
					steffen-illium