Monitor Class

This commit is contained in:
steffen-illium 2021-05-14 11:54:59 +02:00
parent 86204a6266
commit 18be35dc19
3 changed files with 89 additions and 30 deletions

View File

@ -1,22 +1,21 @@
import random from collections import defaultdict
from typing import Tuple, List, Union, Iterable from typing import List
import numpy as np import numpy as np
from pathlib import Path from pathlib import Path
from attr import dataclass
from environments import helpers as h from environments import helpers as h
@dataclass
class AgentState: class AgentState:
i: int
action: int
pos = None def __init__(self, i: int, action: int):
collision_vector = None self.i = i
action_valid = None self.action = action
self.pos = None
self.collision_vector = None
self.action_valid = None
@property @property
def collisions(self): def collisions(self):
@ -30,12 +29,51 @@ class AgentState:
raise AttributeError(f'"{key}" cannot be updated, this attr is not a part of {self.__class__.__name__}') raise AttributeError(f'"{key}" cannot be updated, this attr is not a part of {self.__class__.__name__}')
class FactoryMonitor:
def __init__(self, env):
self._env = env
self._monitor = defaultdict(lambda: defaultdict(lambda: 0))
def __iter__(self):
for key, value in self._monitor.items():
yield key, dict(value)
def add(self, key, value, step=None):
assert step is None or step >= 1 # Is this good practice?
step = step or self._env.steps
self._monitor[key][step] = list(self._monitor[key].values())[-1] + value
return self._monitor[key][step]
def set(self, key, value, step=None):
assert step is None or step >= 1 # Is this good practice?
step = step or self._env.steps
self._monitor[key][step] = value
return self._monitor[key][step]
def reduce(self, key, value, step=None):
assert step is None or step >= 1 # Is this good practice?
step = step or self._env.steps
self._monitor[key][step] = list(self._monitor[key].values())[-1] - value
def to_dict(self):
return dict(self)
def to_pd_dataframe(self):
import pandas as pd
return pd.DataFrame.from_dict(self.to_dict())
class BaseFactory: class BaseFactory:
@property @property
def movement_actions(self): def movement_actions(self):
return (int(self.allow_vertical_movement) + int(self.allow_horizontal_movement)) * 4 return (int(self.allow_vertical_movement) + int(self.allow_horizontal_movement)) * 4
@property
def string_slices(self):
return {value: key for key, value in self.slice_strings.items()}
def __init__(self, level='simple', n_agents=1, max_steps=1e3): def __init__(self, level='simple', n_agents=1, max_steps=1e3):
self.n_agents = n_agents self.n_agents = n_agents
self.max_steps = max_steps self.max_steps = max_steps
@ -45,11 +83,13 @@ class BaseFactory:
h.parse_level(Path(__file__).parent / h.LEVELS_DIR / f'{level}.txt') h.parse_level(Path(__file__).parent / h.LEVELS_DIR / f'{level}.txt')
) )
self.slice_strings = {0: 'level', **{i: f'agent#{i}' for i in range(1, self.n_agents+1)}} self.slice_strings = {0: 'level', **{i: f'agent#{i}' for i in range(1, self.n_agents+1)}}
self.monitor = FactoryMonitor(self)
self.reset() self.reset()
def reset(self): def reset(self):
self.done = False self.done = False
self.steps = 0 self.steps = 0
self.cumulative_reward = 0
# Agent placement ... # Agent placement ...
agents = np.zeros((self.n_agents, *self.level.shape), dtype=np.int8) agents = np.zeros((self.n_agents, *self.level.shape), dtype=np.int8)
floor_tiles = np.argwhere(self.level == h.IS_FREE_CELL) floor_tiles = np.argwhere(self.level == h.IS_FREE_CELL)
@ -62,7 +102,7 @@ class BaseFactory:
# Returns State, Reward, Done, Info # Returns State, Reward, Done, Info
return self.state, 0, self.done, {} return self.state, 0, self.done, {}
def additional_actions(self, agent_i, action) -> ((int, int), bool): def additional_actions(self, agent_i: int, action: int) -> ((int, int), bool):
raise NotImplementedError raise NotImplementedError
def step(self, actions): def step(self, actions):
@ -86,10 +126,11 @@ class BaseFactory:
states[i].update(collision_vector=collision_vec) states[i].update(collision_vector=collision_vec)
reward, info = self.calculate_reward(states) reward, info = self.calculate_reward(states)
self.cumulative_reward += reward
if self.steps >= self.max_steps: if self.steps >= self.max_steps:
self.done = True self.done = True
return self.state, reward, self.done, info return self.state, self.cumulative_reward, self.done, info
def _is_moving_action(self, action): def _is_moving_action(self, action):
if action < self.movement_actions: if action < self.movement_actions:

View File

@ -22,8 +22,11 @@ class SimpleFactory(BaseFactory):
def calculate_reward(self, agent_states): def calculate_reward(self, agent_states):
for agent_state in agent_states: for agent_state in agent_states:
collisions = agent_state.collisions collisions = agent_state.collisions
entities = [self.slice_strings[entity] for entity in collisions]
for entity in entities:
self.monitor.add(f'{entity}_collisions', 1)
print(f't = {self.steps}\tAgent {agent_state.i} has collisions with ' print(f't = {self.steps}\tAgent {agent_state.i} has collisions with '
f'{[self.slice_strings[entity] for entity in collisions]}') f'{entities}')
return 0, {} return 0, {}
@ -33,3 +36,6 @@ if __name__ == '__main__':
random_actions = [random.randint(0, 7) for _ in range(200)] random_actions = [random.randint(0, 7) for _ in range(200)]
for action in random_actions: for action in random_actions:
state, r, done, _ = factory.step(action) state, r, done, _ = factory.step(action)
print(f'Factory run done, reward is:\n {r}')
print(f'There have been the following collisions: \n {dict(factory.monitor)}')

View File

@ -1,9 +1,10 @@
from collections import defaultdict
from typing import List
import numpy as np import numpy as np
from attr import dataclass from attr import dataclass
from environments.factory.base_factory import BaseFactory from environments.factory.base_factory import BaseFactory, AgentState
from collections import namedtuple
from typing import Iterable
from environments import helpers as h from environments import helpers as h
DIRT_INDEX = -1 DIRT_INDEX = -1
@ -16,9 +17,8 @@ class DirtProperties:
class GettingDirty(BaseFactory): class GettingDirty(BaseFactory):
@property def _is_clean_up_action(self, action):
def _clean_up_action(self): return self.movement_actions + 1 - 1 == action
return self.movement_actions + 1 - 1
def __init__(self, *args, dirt_properties: DirtProperties, **kwargs): def __init__(self, *args, dirt_properties: DirtProperties, **kwargs):
self._dirt_properties = dirt_properties self._dirt_properties = dirt_properties
@ -43,16 +43,20 @@ class GettingDirty(BaseFactory):
self.state[DIRT_INDEX][pos] = max(new_dirt_amount, h.IS_FREE_CELL) self.state[DIRT_INDEX][pos] = max(new_dirt_amount, h.IS_FREE_CELL)
return pos, cleanup_was_sucessfull return pos, cleanup_was_sucessfull
def additional_actions(self, agent_i, action) -> ((int, int), bool): def additional_actions(self, agent_i: int, action: int) -> ((int, int), bool):
if action != self._is_moving_action(action): if action != self._is_moving_action(action):
if action == self._clean_up_action: if self._is_clean_up_action(action):
agent_i_pos = self.agent_i_position(agent_i) agent_i_pos = self.agent_i_position(agent_i)
_, valid = self.clean_up(agent_i_pos) _, valid = self.clean_up(agent_i_pos)
if valid: if valid:
print(f'Agent {agent_i} did just clean up some dirt at {agent_i_pos}.') print(f'Agent {agent_i} did just clean up some dirt at {agent_i_pos}.')
self.monitor.add('dirt_cleaned', self._dirt_properties.clean_amount)
else: else:
print(f'Agent {agent_i} just tried to clean up some dirt at {agent_i_pos}, but was unsucsessfull.') print(f'Agent {agent_i} just tried to clean up some dirt at {agent_i_pos}, but was unsucsessfull.')
self.monitor.add('failed_attempts', 1)
return agent_i_pos, valid return agent_i_pos, valid
else:
raise RuntimeError('This should not happen!!!')
else: else:
raise RuntimeError('This should not happen!!!') raise RuntimeError('This should not happen!!!')
@ -63,18 +67,26 @@ class GettingDirty(BaseFactory):
self.state = np.concatenate((self.state, dirt_slice)) # dirt is now the last slice self.state = np.concatenate((self.state, dirt_slice)) # dirt is now the last slice
self.spawn_dirt() self.spawn_dirt()
def calculate_reward(self, collisions_vecs: np.ndarray, actions: Iterable[int]) -> (int, dict): def calculate_reward(self, agent_states: List[AgentState]) -> (int, dict):
for agent_i, cols in enumerate(collisions_vecs): this_step_reward = 0
cols = np.argwhere(cols != 0).flatten() for agent_state in agent_states:
print(f't = {self.steps}\tAgent {agent_i} has collisions with ' collisions = agent_state.collisions
f'{[self.slice_strings[entity] for entity in cols if entity != self.state.shape[0]]}') print(f't = {self.steps}\tAgent {agent_state.i} has collisions with '
return 0, {} f'{[self.slice_strings[entity] for entity in collisions if entity != self.string_slices["dirt"]]}')
if self._is_clean_up_action(agent_state.action) and agent_state.action_valid:
this_step_reward += 1
self.monitor.set('dirt_amount', self.state[DIRT_INDEX].sum())
self.monitor.set('dirty_tiles', len(np.nonzero(self.state[DIRT_INDEX])))
return this_step_reward, {}
if __name__ == '__main__': if __name__ == '__main__':
import random import random
dirt_props = DirtProperties() dirt_props = DirtProperties()
factory = GettingDirty(n_agents=1, dirt_properties=dirt_props) factory = GettingDirty(n_agents=1, dirt_properties=dirt_props)
random_actions = [random.randint(0, 8) for _ in range(200)] random_actions = [random.randint(0, 8) for _ in range(2000)]
for action in random_actions: for random_action in random_actions:
state, r, done, _ = factory.step(action) state, r, done, _ = factory.step(random_action)
print(f'Factory run done, reward is:\n {r}')
print(f'The following running stats have been recorded:\n{dict(factory.monitor)}')