Monitor Class

This commit is contained in:
steffen-illium 2021-05-14 11:54:59 +02:00
parent 86204a6266
commit 18be35dc19
3 changed files with 89 additions and 30 deletions

View File

@ -1,22 +1,21 @@
import random
from typing import Tuple, List, Union, Iterable
from collections import defaultdict
from typing import List
import numpy as np
from pathlib import Path
from attr import dataclass
from environments import helpers as h
@dataclass
class AgentState:
i: int
action: int
pos = None
collision_vector = None
action_valid = None
def __init__(self, i: int, action: int):
self.i = i
self.action = action
self.pos = None
self.collision_vector = None
self.action_valid = None
@property
def collisions(self):
@ -30,12 +29,51 @@ class AgentState:
raise AttributeError(f'"{key}" cannot be updated, this attr is not a part of {self.__class__.__name__}')
class FactoryMonitor:
def __init__(self, env):
self._env = env
self._monitor = defaultdict(lambda: defaultdict(lambda: 0))
def __iter__(self):
for key, value in self._monitor.items():
yield key, dict(value)
def add(self, key, value, step=None):
assert step is None or step >= 1 # Is this good practice?
step = step or self._env.steps
self._monitor[key][step] = list(self._monitor[key].values())[-1] + value
return self._monitor[key][step]
def set(self, key, value, step=None):
assert step is None or step >= 1 # Is this good practice?
step = step or self._env.steps
self._monitor[key][step] = value
return self._monitor[key][step]
def reduce(self, key, value, step=None):
assert step is None or step >= 1 # Is this good practice?
step = step or self._env.steps
self._monitor[key][step] = list(self._monitor[key].values())[-1] - value
def to_dict(self):
return dict(self)
def to_pd_dataframe(self):
import pandas as pd
return pd.DataFrame.from_dict(self.to_dict())
class BaseFactory:
@property
def movement_actions(self):
return (int(self.allow_vertical_movement) + int(self.allow_horizontal_movement)) * 4
@property
def string_slices(self):
return {value: key for key, value in self.slice_strings.items()}
def __init__(self, level='simple', n_agents=1, max_steps=1e3):
self.n_agents = n_agents
self.max_steps = max_steps
@ -45,11 +83,13 @@ class BaseFactory:
h.parse_level(Path(__file__).parent / h.LEVELS_DIR / f'{level}.txt')
)
self.slice_strings = {0: 'level', **{i: f'agent#{i}' for i in range(1, self.n_agents+1)}}
self.monitor = FactoryMonitor(self)
self.reset()
def reset(self):
self.done = False
self.steps = 0
self.cumulative_reward = 0
# Agent placement ...
agents = np.zeros((self.n_agents, *self.level.shape), dtype=np.int8)
floor_tiles = np.argwhere(self.level == h.IS_FREE_CELL)
@ -62,7 +102,7 @@ class BaseFactory:
# Returns State, Reward, Done, Info
return self.state, 0, self.done, {}
def additional_actions(self, agent_i, action) -> ((int, int), bool):
def additional_actions(self, agent_i: int, action: int) -> ((int, int), bool):
raise NotImplementedError
def step(self, actions):
@ -86,10 +126,11 @@ class BaseFactory:
states[i].update(collision_vector=collision_vec)
reward, info = self.calculate_reward(states)
self.cumulative_reward += reward
if self.steps >= self.max_steps:
self.done = True
return self.state, reward, self.done, info
return self.state, self.cumulative_reward, self.done, info
def _is_moving_action(self, action):
if action < self.movement_actions:

View File

@ -22,8 +22,11 @@ class SimpleFactory(BaseFactory):
def calculate_reward(self, agent_states):
for agent_state in agent_states:
collisions = agent_state.collisions
entities = [self.slice_strings[entity] for entity in collisions]
for entity in entities:
self.monitor.add(f'{entity}_collisions', 1)
print(f't = {self.steps}\tAgent {agent_state.i} has collisions with '
f'{[self.slice_strings[entity] for entity in collisions]}')
f'{entities}')
return 0, {}
@ -33,3 +36,6 @@ if __name__ == '__main__':
random_actions = [random.randint(0, 7) for _ in range(200)]
for action in random_actions:
state, r, done, _ = factory.step(action)
print(f'Factory run done, reward is:\n {r}')
print(f'There have been the following collisions: \n {dict(factory.monitor)}')

View File

@ -1,9 +1,10 @@
from collections import defaultdict
from typing import List
import numpy as np
from attr import dataclass
from environments.factory.base_factory import BaseFactory
from collections import namedtuple
from typing import Iterable
from environments.factory.base_factory import BaseFactory, AgentState
from environments import helpers as h
DIRT_INDEX = -1
@ -16,9 +17,8 @@ class DirtProperties:
class GettingDirty(BaseFactory):
@property
def _clean_up_action(self):
return self.movement_actions + 1 - 1
def _is_clean_up_action(self, action):
return self.movement_actions + 1 - 1 == action
def __init__(self, *args, dirt_properties: DirtProperties, **kwargs):
self._dirt_properties = dirt_properties
@ -43,16 +43,20 @@ class GettingDirty(BaseFactory):
self.state[DIRT_INDEX][pos] = max(new_dirt_amount, h.IS_FREE_CELL)
return pos, cleanup_was_sucessfull
def additional_actions(self, agent_i, action) -> ((int, int), bool):
def additional_actions(self, agent_i: int, action: int) -> ((int, int), bool):
if action != self._is_moving_action(action):
if action == self._clean_up_action:
if self._is_clean_up_action(action):
agent_i_pos = self.agent_i_position(agent_i)
_, valid = self.clean_up(agent_i_pos)
if valid:
print(f'Agent {agent_i} did just clean up some dirt at {agent_i_pos}.')
self.monitor.add('dirt_cleaned', self._dirt_properties.clean_amount)
else:
print(f'Agent {agent_i} just tried to clean up some dirt at {agent_i_pos}, but was unsucsessfull.')
self.monitor.add('failed_attempts', 1)
return agent_i_pos, valid
else:
raise RuntimeError('This should not happen!!!')
else:
raise RuntimeError('This should not happen!!!')
@ -63,18 +67,26 @@ class GettingDirty(BaseFactory):
self.state = np.concatenate((self.state, dirt_slice)) # dirt is now the last slice
self.spawn_dirt()
def calculate_reward(self, collisions_vecs: np.ndarray, actions: Iterable[int]) -> (int, dict):
for agent_i, cols in enumerate(collisions_vecs):
cols = np.argwhere(cols != 0).flatten()
print(f't = {self.steps}\tAgent {agent_i} has collisions with '
f'{[self.slice_strings[entity] for entity in cols if entity != self.state.shape[0]]}')
return 0, {}
def calculate_reward(self, agent_states: List[AgentState]) -> (int, dict):
this_step_reward = 0
for agent_state in agent_states:
collisions = agent_state.collisions
print(f't = {self.steps}\tAgent {agent_state.i} has collisions with '
f'{[self.slice_strings[entity] for entity in collisions if entity != self.string_slices["dirt"]]}')
if self._is_clean_up_action(agent_state.action) and agent_state.action_valid:
this_step_reward += 1
self.monitor.set('dirt_amount', self.state[DIRT_INDEX].sum())
self.monitor.set('dirty_tiles', len(np.nonzero(self.state[DIRT_INDEX])))
return this_step_reward, {}
if __name__ == '__main__':
import random
dirt_props = DirtProperties()
factory = GettingDirty(n_agents=1, dirt_properties=dirt_props)
random_actions = [random.randint(0, 8) for _ in range(200)]
for action in random_actions:
state, r, done, _ = factory.step(action)
random_actions = [random.randint(0, 8) for _ in range(2000)]
for random_action in random_actions:
state, r, done, _ = factory.step(random_action)
print(f'Factory run done, reward is:\n {r}')
print(f'The following running stats have been recorded:\n{dict(factory.monitor)}')