initial commit

This commit is contained in:
Si11ium
2020-04-08 14:59:42 +02:00
parent c15ee64688
commit f0262e1895
19 changed files with 1021 additions and 0 deletions

0
utils/__init__.py Normal file
View File

135
utils/config.py Normal file
View File

@ -0,0 +1,135 @@
import ast
from argparse import Namespace
from collections import defaultdict
from configparser import ConfigParser
from pathlib import Path
from ml_lib.models.generators.cnn import CNNRouteGeneratorModel
from ml_lib.models.generators.cnn_discriminated import CNNRouteGeneratorDiscriminated
from ml_lib.models.homotopy_classification.cnn_based import ConvHomDetector
from ml_lib.utils.model_io import ModelParameters
from ml_lib.utils.transforms import AsArray
def is_jsonable(x):
import json
try:
json.dumps(x)
return True
except TypeError:
return False
class Config(ConfigParser):
# TODO: Do this programmatically; This did not work:
# Initialize Default Sections
# for section in self.default_sections:
# self.__setattr__(section, property(lambda x :x._get_namespace_for_section(section))
@property
def model_class(self):
model_dict = dict(ConvHomDetector=ConvHomDetector,
CNNRouteGenerator=CNNRouteGeneratorModel,
CNNRouteGeneratorDiscriminated=CNNRouteGeneratorDiscriminated
)
try:
return model_dict[self.get('model', 'type')]
except KeyError as e:
raise KeyError(rf'The model alias you provided ("{self.get("model", "type")}") does not exist! \n'
f'Try one of these:\n{list(model_dict.keys())}')
@property
def main(self):
return self._get_namespace_for_section('main')
@property
def model(self):
return self._get_namespace_for_section('model')
@property
def train(self):
return self._get_namespace_for_section('train')
@property
def data(self):
return self._get_namespace_for_section('data')
@property
def project(self):
return self._get_namespace_for_section('project')
###################################################
@property
def model_paramters(self):
return ModelParameters(self.model, self.train, self.data)
@property
def tags(self, ):
return [f'{key}: {val}' for key, val in self.serializable.items()]
@property
def serializable(self):
return {f'{section}_{key}': val for section, params in self._sections.items()
for key, val in params.items() if is_jsonable(val)}
@property
def as_dict(self):
return self._sections
def _get_namespace_for_section(self, item):
return Namespace(**{key: self.get(item, key) for key in self[item]})
def __init__(self, **kwargs):
super(Config, self).__init__(**kwargs)
pass
@staticmethod
def _sort_combined_section_key_mapping(dict_obj):
sorted_dict = defaultdict(dict)
for key in dict_obj:
section, *attr_name = key.split('_')
attr_name = '_'.join(attr_name)
value = str(dict_obj[key])
sorted_dict[section][attr_name] = value
# noinspection PyTypeChecker
return dict(sorted_dict)
@classmethod
def read_namespace(cls, namespace: Namespace):
sorted_dict = cls._sort_combined_section_key_mapping(namespace.__dict__)
new_config = cls()
new_config.read_dict(sorted_dict)
return new_config
def update(self, mapping):
sorted_dict = self._sort_combined_section_key_mapping(mapping)
for section in sorted_dict:
if self.has_section(section):
pass
else:
self.add_section(section)
for option, value in sorted_dict[section].items():
self.set(section, option, value)
return self
def get(self, *args, **kwargs):
item = super(Config, self).get(*args, **kwargs)
try:
return ast.literal_eval(item)
except SyntaxError:
return item
except ValueError:
return item
def write(self, filepath, **kwargs):
path = Path(filepath, exist_ok=True)
path.parent.mkdir(parents=True, exist_ok=True)
with path.open('w') as configfile:
super().write(configfile)
return True

108
utils/logging.py Normal file
View File

@ -0,0 +1,108 @@
from pathlib import Path
from pytorch_lightning.loggers.base import LightningLoggerBase
from pytorch_lightning.loggers.neptune import NeptuneLogger
from pytorch_lightning.loggers.test_tube import TestTubeLogger
from ml_lib.utils.config import Config
import numpy as np
class Logger(LightningLoggerBase):
media_dir = 'media'
@property
def experiment(self):
if self.debug:
return self.testtubelogger.experiment
else:
return self.neptunelogger.experiment
@property
def log_dir(self):
return Path(self.testtubelogger.experiment.get_logdir()).parent
@property
def name(self):
return self.config.model.type
@property
def project_name(self):
return f"{self.config.project.owner}/{self.config.project.name}"
@property
def version(self):
return self.config.get('main', 'seed')
@property
def outpath(self):
# ToDo: Add further path modification such as dataset config etc.
return Path(self.config.train.outpath) / self.config.data.mode
def __init__(self, config: Config):
"""
params (dict|None): Optional. Parameters of the experiment. After experiment creation params are read-only.
Parameters are displayed in the experiments Parameters section and each key-value pair can be
viewed in experiments view as a column.
properties (dict|None): Optional default is {}. Properties of the experiment.
They are editable after experiment is created. Properties are displayed in the experiments Details and
each key-value pair can be viewed in experiments view as a column.
tags (list|None): Optional default []. Must be list of str. Tags of the experiment.
They are editable after experiment is created (see: append_tag() and remove_tag()).
Tags are displayed in the experiments Details and can be viewed in experiments view as a column.
"""
super(Logger, self).__init__()
self.config = config
self.debug = self.config.main.debug
self._testtube_kwargs = dict(save_dir=self.outpath, version=self.version, name=self.name)
self._neptune_kwargs = dict(offline_mode=self.debug,
api_key=self.config.project.neptune_key,
project_name=self.project_name,
upload_source_files=list())
self.neptunelogger = NeptuneLogger(**self._neptune_kwargs)
self.testtubelogger = TestTubeLogger(**self._testtube_kwargs)
def log_hyperparams(self, params):
self.neptunelogger.log_hyperparams(params)
self.testtubelogger.log_hyperparams(params)
pass
def log_metrics(self, metrics, step=None):
self.neptunelogger.log_metrics(metrics, step=step)
self.testtubelogger.log_metrics(metrics, step=step)
pass
def close(self):
self.testtubelogger.close()
self.neptunelogger.close()
def log_config_as_ini(self):
self.config.write(self.log_dir / 'config.ini')
def log_metric(self, metric_name, metric_value, **kwargs):
self.testtubelogger.log_metrics(dict(metric_name=metric_value))
self.neptunelogger.log_metric(metric_name, metric_value, **kwargs)
def log_image(self, name, image, **kwargs):
self.neptunelogger.log_image(name, image, **kwargs)
step = kwargs.get('step', None)
name = f'{step}_{name}' if step is not None else name
image.savefig(self.log_dir / self.media_dir / name)
def save(self):
self.testtubelogger.save()
self.neptunelogger.save()
def finalize(self, status):
self.testtubelogger.finalize(status)
self.neptunelogger.finalize(status)
self.log_config_as_ini()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.finalize('success')
pass

61
utils/model_io.py Normal file
View File

@ -0,0 +1,61 @@
from argparse import Namespace
from pathlib import Path
import torch
from natsort import natsorted
from torch import nn
# Hyperparamter Object
class ModelParameters(Namespace):
_activations = dict(
leaky_relu=nn.LeakyReLU,
relu=nn.ReLU,
sigmoid=nn.Sigmoid,
tanh=nn.Tanh
)
def __init__(self, model_param, train_param, data_param):
self.model_param = model_param
self.train_param = train_param
self.data_param = data_param
kwargs = vars(model_param)
kwargs.update(vars(train_param))
kwargs.update(vars(data_param))
super(ModelParameters, self).__init__(**kwargs)
def __getattribute__(self, item):
if item == 'activation':
try:
return self._activations[item]
except KeyError:
return nn.ReLU
return super(ModelParameters, self).__getattribute__(item)
class SavedLightningModels(object):
@classmethod
def load_checkpoint(cls, models_root_path, model=None, n=-1, tags_file_path=''):
assert models_root_path.exists(), f'The path {models_root_path.absolute()} does not exist!'
found_checkpoints = list(Path(models_root_path).rglob('*.ckpt'))
found_checkpoints = natsorted(found_checkpoints, key=lambda y: y.name)
if model is None:
model = torch.load(models_root_path / 'model_class.obj')
assert model is not None
return cls(weights=found_checkpoints[n], model=model)
def __init__(self, **kwargs):
self.weights: str = kwargs.get('weights', '')
self.model = kwargs.get('model', None)
assert self.model is not None
def restore(self):
pretrained_model = self.model.load_from_checkpoint(self.weights)
pretrained_model.eval()
pretrained_model.freeze()
return pretrained_model

25
utils/parallel.py Normal file
View File

@ -0,0 +1,25 @@
import multiprocessing as mp
import time
def run_n_in_parallel(f, n, processes=0, **kwargs):
processes = processes if processes else mp.cpu_count()
output = mp.Queue()
kwargs.update(output=output)
# Setup a list of processes that we want to run
processes = [mp.Process(target=f, kwargs=kwargs) for _ in range(n)]
# Run processes
results = []
for p in processes:
p.start()
while len(results) != n:
time.sleep(1)
# Get process results from the output queue
results.extend([output.get() for _ in processes])
# Exit the completed processes
for p in processes:
p.join()
return results

23
utils/tools.py Normal file
View File

@ -0,0 +1,23 @@
import pickle
import shelve
from pathlib import Path
def write_to_shelve(file_path, value):
check_path(file_path)
file_path.parent.mkdir(exist_ok=True, parents=True)
with shelve.open(str(file_path), protocol=pickle.HIGHEST_PROTOCOL) as f:
new_key = str(len(f))
f[new_key] = value
f.close()
def load_from_shelve(file_path, key):
check_path(file_path)
with shelve.open(str(file_path)) as d:
return d[key]
def check_path(file_path):
assert isinstance(file_path, Path)
assert str(file_path).endswith('.pik')

11
utils/transforms.py Normal file
View File

@ -0,0 +1,11 @@
import numpy as np
class AsArray(object):
def __init__(self, width, height):
self.width = width
self.height = height
def __call__(self, x):
array = np.zeros((self.width, self.height))
return array