From c6fdaa24aacfe2b7b10e6fcc531ef48610225d99 Mon Sep 17 00:00:00 2001 From: Si11ium Date: Tue, 1 Dec 2020 16:37:15 +0100 Subject: [PATCH] Audio Dataset --- audio_toolset/audio_augmentation.py | 3 +++ audio_toolset/audio_io.py | 18 +++++++++++++++++ audio_toolset/mel_dataset.py | 30 +++++++++++++++++++++++++++++ audio_toolset/mel_transforms.py | 6 ++++++ modules/util.py | 13 +++++++++++-- utils/config.py | 11 ++++++++++- utils/tools.py | 6 +++--- 7 files changed, 81 insertions(+), 6 deletions(-) create mode 100644 audio_toolset/mel_dataset.py diff --git a/audio_toolset/audio_augmentation.py b/audio_toolset/audio_augmentation.py index 17ea0ef..be2296f 100644 --- a/audio_toolset/audio_augmentation.py +++ b/audio_toolset/audio_augmentation.py @@ -15,6 +15,9 @@ class Speed(object): # noinspection PyTypeChecker self.max_amount = min(max(0, max_amount), 1) + def __repr__(self): + return f'{self.__class__.__name__}({self.__dict__})' + def __call__(self, x): if self.speed_min == 1 and self.speed_max == 1: return x diff --git a/audio_toolset/audio_io.py b/audio_toolset/audio_io.py index b57038c..12acefb 100644 --- a/audio_toolset/audio_io.py +++ b/audio_toolset/audio_io.py @@ -37,6 +37,9 @@ class MFCC(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) + def __repr__(self): + return f'{self.__class__.__name__}({self.__dict__})' + def __call__(self, y): mfcc = librosa.feature.mfcc(y, **self.__dict__) return mfcc @@ -47,6 +50,9 @@ class NormalizeLocal(object): self.cache: np.ndarray pass + def __repr__(self): + return f'{self.__class__.__name__}({self.__dict__})' + def __call__(self, x: np.ndarray): mean = x.mean() std = x.std() + 0.0001 @@ -65,6 +71,9 @@ class NormalizeMelband(object): self.cache: np.ndarray pass + def __repr__(self): + return f'{self.__class__.__name__}({self.__dict__})' + def __call__(self, x: np.ndarray): mean = x.mean(-1).unsqueeze(-1) std = x.std(-1).unsqueeze(-1) @@ -98,6 +107,9 @@ class PowerToDB(object): def __init__(self, running_max=False): self.running_max = 0 if running_max else None + def __repr__(self): + return f'{self.__class__.__name__}({self.__dict__})' + def __call__(self, x): if self.running_max is not None: self.running_max = max(np.max(x), self.running_max) @@ -109,6 +121,9 @@ class LowPass(object): def __init__(self, sr=16000): self.sr = sr + def __repr__(self): + return f'{self.__class__.__name__}({self.__dict__})' + def __call__(self, x): return butter_lowpass_filter(x, 1000, 1) @@ -117,6 +132,9 @@ class MelToImage(object): def __init__(self): pass + def __repr__(self): + return f'{self.__class__.__name__}({self.__dict__})' + def __call__(self, x): # Source to Solution: https://stackoverflow.com/a/57204349 mels = np.log(x + 1e-9) # add small number to avoid log(0) diff --git a/audio_toolset/mel_dataset.py b/audio_toolset/mel_dataset.py new file mode 100644 index 0000000..d2b4536 --- /dev/null +++ b/audio_toolset/mel_dataset.py @@ -0,0 +1,30 @@ +from pathlib import Path + +import numpy as np +from torch.utils.data import Dataset + + +class TorchMelDataset(Dataset): + def __init__(self, identifier, mel_path, segment_len, hop_len, label, padding=0, transform=None): + self.padding = padding + self.path = next(iter(Path(mel_path).glob(f'{identifier}_*'))) + self.segment_len = segment_len + self.m, self.n = str(self.path).split('_')[-2:] # get spectrogram dimensions + self.n = int(self.n.split('.', 1)[0]) # remove .npy + self.m, self.n = (int(i) for i in (self.m, self.n)) + self.offsets = list(range(0, self.n - segment_len, hop_len)) + self.label = label + self.transform = transform + + def __getitem__(self, item): + start = self.offsets[item] + mel_spec = np.load(str(self.path), allow_pickle=True) + if self.padding > 0: + mel_spec = np.pad(mel_spec, pad_width=[(0, 0), (self.padding // 2, self.padding // 2)], mode='mean') + snippet = mel_spec[:, start: start + self.segment_len] + if self.transform: + snippet = self.transform(snippet) + return snippet, self.label + + def __len__(self): + return len(self.offsets) \ No newline at end of file diff --git a/audio_toolset/mel_transforms.py b/audio_toolset/mel_transforms.py index 9aa470a..829f195 100644 --- a/audio_toolset/mel_transforms.py +++ b/audio_toolset/mel_transforms.py @@ -8,6 +8,9 @@ class Normalize(object): def __init__(self, min_db_level: Union[int, float]): self.min_db_level = min_db_level + def __repr__(self): + return f'{self.__class__.__name__}({self.__dict__})' + def __call__(self, s: np.ndarray) -> np.ndarray: return np.clip((s - self.min_db_level) / -self.min_db_level, 0, 1) @@ -17,5 +20,8 @@ class DeNormalize(object): def __init__(self, min_db_level: Union[int, float]): self.min_db_level = min_db_level + def __repr__(self): + return f'{self.__class__.__name__}({self.__dict__})' + def __call__(self, s: np.ndarray) -> np.ndarray: return (np.clip(s, 0, 1) * -self.min_db_level) + self.min_db_level diff --git a/modules/util.py b/modules/util.py index 7554941..4ab4300 100644 --- a/modules/util.py +++ b/modules/util.py @@ -1,5 +1,3 @@ -from typing import List - from functools import reduce from abc import ABC @@ -13,6 +11,7 @@ from torch.nn import functional as F, Unfold # Utility - Modules ################### from ..utils.model_io import ModelParameters +from ..utils.tools import locate_and_import_class try: import pytorch_lightning as pl @@ -45,6 +44,15 @@ try: def size(self): return self.shape + @property + def dataset_class(self): + try: + return locate_and_import_class(self.params.class_name, folder_path='datasets') + except AttributeError as e: + raise AttributeError(f'The dataset alias you provided ("{self.params.class_name}") ' + + f'was not found!\n' + + f'{e}') + def save_to_disk(self, model_path): Path(model_path, exist_ok=True).mkdir(parents=True, exist_ok=True) if not (model_path / 'model_class.obj').exists(): @@ -83,6 +91,7 @@ try: except ImportError: module_types = (nn.Module,) + pl = None pass # Maybe post a hint to install pytorch-lightning. diff --git a/utils/config.py b/utils/config.py index 700f856..06f8635 100644 --- a/utils/config.py +++ b/utils/config.py @@ -92,12 +92,21 @@ class Config(ConfigParser, ABC): @property def model_class(self): try: - return locate_and_import_class(self.model.type) + return locate_and_import_class(self.model.type, folder_path='models') except AttributeError as e: raise AttributeError(f'The model alias you provided ("{self.get("model", "type")}") ' + f'was not found!\n' + f'{e}') + @property + def data_class(self): + try: + return locate_and_import_class(self.data.class_name, folder_path='datasets') + except AttributeError as e: + raise AttributeError(f'The dataset alias you provided ("{self.get("data", "class_name")}") ' + + f'was not found!\n' + + f'{e}') + # -------------------------------------------------- # TODO: Do this programmatically; This did not work: # Initialize Default Sections as Property diff --git a/utils/tools.py b/utils/tools.py index f93663e..a85b495 100644 --- a/utils/tools.py +++ b/utils/tools.py @@ -41,10 +41,10 @@ def check_path(file_path): assert str(file_path).endswith('.pik') -def locate_and_import_class(class_name, models_location: Union[str, PurePath] = 'models', forceload=False): +def locate_and_import_class(class_name, folder_path: Union[str, PurePath] = ''): """Locate an object by name or dotted path, importing as necessary.""" - models_location = Path(models_location) - module_paths = [x for x in models_location.rglob('*.py') if x.is_file() and '__init__' not in x.name] + folder_path = Path(folder_path) + module_paths = [x for x in folder_path.rglob('*.py') if x.is_file() and '__init__' not in x.name] for module_path in module_paths: mod = importlib.import_module('.'.join([x.replace('.py', '') for x in module_path.parts])) try: