diff --git a/audio_toolset/audio_augmentation.py b/audio_toolset/audio_augmentation.py new file mode 100644 index 0000000..c858984 --- /dev/null +++ b/audio_toolset/audio_augmentation.py @@ -0,0 +1,58 @@ +import numpy as np + + +class NoiseInjection(object): + + def __init__(self, noise_factor: float, sigma=0.5, mu=0.5): + assert noise_factor > 0, f'max_shift_ratio has to be greater then 0, but was: {noise_factor}.' + self.mu = mu + self.sigma = sigma + self.noise_factor = noise_factor + + def __call__(self, x: np.ndarray): + noise = np.random.normal(loc=self.mu, scale=self.sigma, size=x.shape) + augmented_data = x + self.noise_factor * noise + # Cast back to same data type + augmented_data = augmented_data.astype(x.dtype) + return augmented_data + + +class LoudnessManipulator(object): + + def __init__(self, max_factor: float): + assert 1 > max_factor > 0, f'max_shift_ratio has to be between [0,1], but was: {max_factor}.' + + self.max_factor = max_factor + + def __call__(self, x: np.ndarray): + augmented_data = x + x * (np.random.random() * self.max_factor) + # Cast back to same data type + augmented_data = augmented_data.astype(x.dtype) + return augmented_data + + +class ShiftTime(object): + + valid_shifts = ['right', 'left', 'any'] + + def __init__(self, max_shift_ratio: float, shift_direction: str = 'any'): + assert 1 > max_shift_ratio > 0, f'max_shift_ratio has to be between [0,1], but was: {max_shift_ratio}.' + assert shift_direction.lower() in self.valid_shifts, f'shift_direction has to be one of: {self.valid_shifts}' + self.max_shift_ratio = max_shift_ratio + self.shift_direction = shift_direction.lower() + + def __call__(self, x: np.ndarray): + shift = np.random.randint(max(int(self.max_shift_ratio * x.shape[-1]), 1)) + if self.shift_direction == 'right': + shift = -1 * shift + elif self.shift_direction == 'any': + direction = np.random.choice([1, -1], 1) + shift = direction * shift + augmented_data = np.roll(x, shift) + # Set to silence for heading/ tailing + shift = int(shift) + if shift > 0: + augmented_data[:shift] = 0 + else: + augmented_data[shift:] = 0 + return augmented_data diff --git a/audio_toolset/audio_io.py b/audio_toolset/audio_io.py index 82f1575..c34f4c9 100644 --- a/audio_toolset/audio_io.py +++ b/audio_toolset/audio_io.py @@ -1,5 +1,4 @@ import librosa -import torch from scipy.signal import butter, lfilter import numpy as np @@ -36,31 +35,34 @@ class MFCC(object): class NormalizeLocal(object): def __init__(self): - self.cache: torch.Tensor + self.cache: np.ndarray pass - def __call__(self, x: torch.Tensor): + def __call__(self, x: np.ndarray): mean = x.mean() - std = x.std() + std = x.std() + 0.0001 - x = x.__sub__(mean).__div__(std) - x[torch.isnan(x)] = 0 - x[torch.isinf(x)] = 0 + # Pytorch Version: + # x = x.__sub__(mean).__div__(std) + # Numpy Version + x = (x - mean) / std + x[np.isnan(x)] = 0 + x[np.isinf(x)] = 0 return x class NormalizeMelband(object): def __init__(self): - self.cache: torch.Tensor + self.cache: np.ndarray pass - def __call__(self, x: torch.Tensor): + def __call__(self, x: np.ndarray): mean = x.mean(-1).unsqueeze(-1) std = x.std(-1).unsqueeze(-1) x = x.__sub__(mean).__div__(std) - x[torch.isnan(x)] = 0 - x[torch.isinf(x)] = 0 + x[np.isnan(x)] = 0 + x[np.isinf(x)] = 0 return x diff --git a/modules/utils.py b/modules/utils.py index ad0560f..1a18b3d 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -24,17 +24,6 @@ class F_x(object): class ShapeMixin: - @property - def shape(self): - x = torch.randn(self.in_shape).unsqueeze(0) - output: torch.Tensor = self(x) - return output.shape[1:] - - -# Utility - Modules -################### -class Flatten(nn.Module): - @property def shape(self): try: @@ -45,6 +34,11 @@ class Flatten(nn.Module): print(e) return -1 + +# Utility - Modules +################### +class Flatten(ShapeMixin, nn.Module): + def __init__(self, in_shape, to=-1): assert isinstance(to, int) or isinstance(to, tuple) super(Flatten, self).__init__() @@ -172,29 +166,6 @@ class LightningBaseModule(pl.LightningModule, ABC): self.apply(weight_initializer) -class BaseModuleMixin_Dataloaders(ABC): - - # Dataloaders - # ================================================================================ - # Train Dataloader - def train_dataloader(self): - return DataLoader(dataset=self.dataset.train_dataset, shuffle=True, - batch_size=self.params.batch_size, - num_workers=self.params.worker) - - # Test Dataloader - def test_dataloader(self): - return DataLoader(dataset=self.dataset.test_dataset, shuffle=True, - batch_size=self.params.batch_size, - num_workers=self.params.worker) - - # Validation Dataloader - def val_dataloader(self): - return DataLoader(dataset=self.dataset.val_dataset, shuffle=True, - batch_size=self.params.batch_size, - num_workers=self.params.worker) - - class FilterLayer(nn.Module): def __init__(self): @@ -253,7 +224,7 @@ class HorizontalSplitter(nn.Module): self.in_shape = in_shape self.channel, self.height, self.width = self.in_shape - self.new_height = (self.height // self.n) + 1 if self.height % self.n != 0 else 0 + self.new_height = (self.height // self.n) + (1 if self.height % self.n != 0 else 0) self.shape = (self.channel, self.new_height, self.width) self.autopad = AutoPadToShape(self.shape) diff --git a/utils/config.py b/utils/config.py index 20bbc2a..5892604 100644 --- a/utils/config.py +++ b/utils/config.py @@ -7,8 +7,7 @@ from argparse import Namespace, ArgumentParser from collections import defaultdict from configparser import ConfigParser from pathlib import Path - -from ml_lib.utils.model_io import ModelParameters +import hashlib def is_jsonable(x): @@ -22,6 +21,30 @@ def is_jsonable(x): class Config(ConfigParser, ABC): + @property + def name(self): + short_name = "".join(c for c in self.model.type if c.isupper()) + return f'{short_name}_{self.fingerprint}' + + @property + def version(self): + return f'version_{self.main.seed}' + + @property + def exp_path(self): + return Path(self.train.outpath) / self.model.type / self.name + + @property + def fingerprint(self): + h = hashlib.md5() + params = deepcopy(self.as_dict) + del params['model']['type'] + del params['data']['worker'] + del params['main'] + h.update(str(params).encode()) + fingerprint = h.hexdigest() + return fingerprint + @property def _model_weight_init(self): mod = __import__('torch.nn.init', fromlist=[self.model.weight_init]) @@ -33,8 +56,8 @@ class Config(ConfigParser, ABC): This is function is supposed to return a dict, which holds a mapping from string model names to model classes Example: - from models.binary_classifier import BinaryClassifier - return dict(BinaryClassifier=BinaryClassifier, + from models.binary_classifier import ConvClassifier + return dict(ConvClassifier=ConvClassifier, ) :return: """ @@ -46,8 +69,7 @@ class Config(ConfigParser, ABC): try: return self._model_map[self.model.type] except KeyError: - raise KeyError(rf'The model alias you provided ("{self.get("model", "type")}") does not exist! \n' - f'Try one of these:\n{list(self._model_map.keys())}') + raise KeyError(rf'The model alias you provided ("{self.get("model", "type")}") does not exist! Try one of these: {list(self._model_map.keys())}') # TODO: Do this programmatically; This did not work: # Initialize Default Sections as Property @@ -83,6 +105,7 @@ class Config(ConfigParser, ABC): params.update(self.train.__dict__) assert all(key not in list(params.keys()) for key in self.data.__dict__) params.update(self.data.__dict__) + params.update(exp_path=str(self.exp_path), exp_fingerprint=str(self.fingerprint)) return params @property @@ -134,7 +157,6 @@ class Config(ConfigParser, ABC): new_config.read_dict(sorted_dict) return new_config - def build_model(self): return self.model_class(self.model_paramters) diff --git a/utils/logging.py b/utils/logging.py index 84c1949..11c2a5b 100644 --- a/utils/logging.py +++ b/utils/logging.py @@ -25,7 +25,7 @@ class Logger(LightningLoggerBase, ABC): @property def name(self): - return self.config.model.type + return self.config.name @property def project_name(self): @@ -37,7 +37,11 @@ class Logger(LightningLoggerBase, ABC): @property def outpath(self): - raise NotImplementedError + return Path(self.config.train.outpath) / self.config.model.type + + @property + def exp_path(self): + return Path(self.outpath) / self.name def __init__(self, config: Config): """ @@ -58,10 +62,12 @@ class Logger(LightningLoggerBase, ABC): self._testtube_kwargs = dict(save_dir=self.outpath, version=self.version, name=self.name) self._neptune_kwargs = dict(offline_mode=self.debug, api_key=self.config.project.neptune_key, + experiment_name=self.name, project_name=self.project_name, upload_source_files=list()) self.neptunelogger = NeptuneLogger(**self._neptune_kwargs) self.testtubelogger = TestTubeLogger(**self._testtube_kwargs) + self.log_config_as_ini() def log_hyperparams(self, params): self.neptunelogger.log_hyperparams(params) @@ -80,6 +86,10 @@ class Logger(LightningLoggerBase, ABC): def log_config_as_ini(self): self.config.write(self.log_dir / 'config.ini') + def log_text(self, name, text, step_nb=0, **kwargs): + # TODO Implement Offline variant. + self.neptunelogger.log_text(name, text, step_nb) + def log_metric(self, metric_name, metric_value, **kwargs): self.testtubelogger.log_metrics(dict(metric_name=metric_value)) self.neptunelogger.log_metric(metric_name, metric_value, **kwargs) @@ -97,7 +107,6 @@ class Logger(LightningLoggerBase, ABC): def finalize(self, status): self.testtubelogger.finalize(status) self.neptunelogger.finalize(status) - self.log_config_as_ini() def __enter__(self): return self diff --git a/utils/transforms.py b/utils/transforms.py new file mode 100644 index 0000000..945bca1 --- /dev/null +++ b/utils/transforms.py @@ -0,0 +1,8 @@ +from torchvision.transforms import ToTensor as TorchvisionToTensor + + +class ToTensor(TorchvisionToTensor): + + def __call__(self, pic): + tensor = super(ToTensor, self).__call__(pic).float() + return tensor \ No newline at end of file