BandwiseBinaryClassifier is work in progress; TODO: Shape Piping.

2020-05-04 18:45:12 +02:00
parent 6d8fbd7184
commit f285200917
6 changed files with 123 additions and 94 deletions
--- a/audio_toolset/audio_io.py
+++ b/audio_toolset/audio_io.py
@ -1,10 +1,18 @@
 import librosa
+from librosa import display
 import torch
 from scipy.signal import butter, lfilter

 from ml_lib.modules.utils import AutoPad
 import numpy as np

+
+def scale_minmax(x, min=0.0, max=1.0):
+    x_std = (x - x.min()) / (x.max() - x.min())
+    x_scaled = x_std * (max - min) + min
+    return x_scaled
+
+
 def butter_lowpass(cutoff, sr, order=5):
    nyq = 0.5 * sr
    normal_cutoff = cutoff / nyq
@ -58,37 +66,54 @@ class NormalizeMelband(object):
        return x


-class AutoPadToShape(object):
-    def __init__(self, shape):
-        self.shape = shape
-
-    def __call__(self, x):
-        if not torch.is_tensor(x):
-            x = torch.as_tensor(x)
-        embedding = torch.zeros(self.shape)
-        embedding[: x.shape] = x
-        return embedding
-
-    def __repr__(self):
-        return f'AutoPadTransform({self.shape})'
-
-
-class Melspectogram(object):
-    def __init__(self, **kwargs):
-        self.__dict__.update(kwargs)
+class AudioToMel(object):
+    def __init__(self, amplitude_to_db=False, power_to_db=False, **kwargs):
+        assert not all([amplitude_to_db, power_to_db]), "Choose amplitude_to_db or power_to_db, not both!"
+        self.mel_kwargs = kwargs
+        self.amplitude_to_db = amplitude_to_db
+        self.power_to_db = power_to_db

    def __call__(self, y):
-        mel = librosa.feature.melspectrogram(y, **self.__dict__)
-        mel = librosa.amplitude_to_db(mel, ref=np.max)
+        mel = librosa.feature.melspectrogram(y, **self.mel_kwargs)
+        if self.amplitude_to_db:
+            mel = librosa.amplitude_to_db(mel, ref=np.max)
+        if self.power_to_db:
+            mel = librosa.power_to_db(mel, ref=np.max)
        return mel

    def __repr__(self):
        return f'MelSpectogram({self.__dict__})'


+class PowerToDB(object):
+    def __init__(self, running_max=False):
+        self.running_max = 0 if running_max else None
+
+    def __call__(self, x):
+        if self.running_max is not None:
+            self.running_max = max(np.max(x), self.running_max)
+            return librosa.power_to_db(x, ref=self.running_max)
+        return librosa.power_to_db(x, ref=np.max)
+
+
 class LowPass(object):
    def __init__(self, sr=16000):
        self.sr = sr

    def __call__(self, x):
-        return butter_lowpass_filter(x, 1000, 1)
+        return butter_lowpass_filter(x, 1000, 1)
+
+
+class MelToImage(object):
+    def __init__(self):
+        pass
+
+    def __call__(self, x):
+        # Source to Solution: https://stackoverflow.com/a/57204349
+        mels = np.log(x + 1e-9)  # add small number to avoid log(0)
+
+        # min-max scale to fit inside 8-bit range
+        img = scale_minmax(mels, 0, 255).astype(np.uint8)
+        img = np.flip(img, axis=0)  # put low frequencies at the bottom in image
+        img = 255 - img  # invert. make black==more energy
+        return img
--- a/modules/blocks.py
+++ b/modules/blocks.py
@ -1,22 +1,15 @@
 from typing import Union
 import warnings

-import torch
 from torch import nn
-from ml_lib.modules.utils import AutoPad, Interpolate
+from ml_lib.modules.utils import AutoPad, Interpolate, ShapeMixin


 #
 # Sub - Modules
 ###################

-class ConvModule(nn.Module):
-
-    @property
-    def shape(self):
-        x = torch.randn(self.in_shape).unsqueeze(0)
-        output = self(x)
-        return output.shape[1:]
+class ConvModule(ShapeMixin, nn.Module):

    def __init__(self, in_shape, conv_filters, conv_kernel, activation: nn.Module = nn.ELU, pooling_size=None,
                 bias=True, norm=False, dropout: Union[int, float] = 0,
@ -51,13 +44,7 @@ class ConvModule(nn.Module):
        return tensor


-class DeConvModule(nn.Module):
-
-    @property
-    def shape(self):
-        x = torch.randn(self.in_shape).unsqueeze(0)
-        output = self(x)
-        return output.shape[1:]
+class DeConvModule(ShapeMixin, nn.Module):

    def __init__(self, in_shape, conv_filters, conv_kernel, conv_stride=1, conv_padding=0,
                 dropout: Union[int, float] = 0, autopad=0,
@ -91,13 +78,7 @@ class DeConvModule(nn.Module):
        return tensor


-class ResidualModule(nn.Module):
-
-    @property
-    def shape(self):
-        x = torch.randn(self.in_shape).unsqueeze(0)
-        output = self(x)
-        return output.shape[1:]
+class ResidualModule(ShapeMixin, nn.Module):

    def __init__(self, in_shape, module_class, n, activation=None, **module_parameters):
        assert n >= 1
@ -118,13 +99,7 @@ class ResidualModule(nn.Module):
        return tensor


-class RecurrentModule(nn.Module):
-
-    @property
-    def shape(self):
-        x = torch.randn(self.in_shape).unsqueeze(0)
-        output = self(x)
-        return output.shape[1:]
+class RecurrentModule(ShapeMixin, nn.Module):

    def __init__(self, in_shape, hidden_size, num_layers=1, cell_type=nn.GRU, bias=True, dropout=0):
        super(RecurrentModule, self).__init__()
--- a/modules/losses.py
+++ b/modules/losses.py
@ -1,23 +0,0 @@
-from typing import List
-
-import torch
-from torch import nn
-
-from ml_lib.modules.utils import FlipTensor
-from ml_lib.objects.map import MapStorage, Map
-from ml_lib.objects.trajectory import Trajectory
-
-
-class BinaryHomotopicLoss(nn.Module):
-    def __init__(self, map_storage: MapStorage):
-        super(BinaryHomotopicLoss, self).__init__()
-        self.map_storage = map_storage
-        self.flipper = FlipTensor()
-
-    def forward(self, x: torch.Tensor, y: torch.Tensor, mapnames: str):
-        maps: List[Map] = [self.map_storage[mapname] for mapname in mapnames]
-        for basemap in maps:
-            basemap = basemap.as_2d_array
-
-
-
--- a/modules/model_parts.py
+++ b/modules/model_parts.py
@ -4,6 +4,8 @@
 import torch
 from torch import nn

+from ml_lib.modules.utils import ShapeMixin
+

 class Generator(nn.Module):
    @property
@ -112,12 +114,7 @@ class UnitGenerator(Generator):
        return tensor


-class BaseEncoder(nn.Module):
-    @property
-    def shape(self):
-        x = torch.randn(self.in_shape).unsqueeze(0)
-        output = self(x)
-        return output.shape[1:]
+class BaseEncoder(ShapeMixin, nn.Module):

    # noinspection PyUnresolvedReferences
    def __init__(self, in_shape, lat_dim=256, use_bias=True, use_norm=False, dropout: Union[int, float] = 0,
--- a/modules/utils.py
+++ b/modules/utils.py
@ -1,5 +1,3 @@
-from copy import deepcopy
-
 from abc import ABC
 from pathlib import Path

@ -24,6 +22,15 @@ class F_x(object):
        return x


+class ShapeMixin:
+
+    @property
+    def shape(self):
+        x = torch.randn(self.in_shape).unsqueeze(0)
+        output = self(x)
+        return output.shape[1:]
+
+
 # Utility - Modules
 ###################
 class Flatten(nn.Module):
@ -100,7 +107,7 @@ class LightningBaseModule(pl.LightningModule, ABC):

    @classmethod
    def name(cls):
-        raise NotImplementedError('Give your model a name!')
+        return cls.__name__

    @property
    def shape(self):
@ -218,3 +225,62 @@ class FlipTensor(nn.Module):
        idx = torch.as_tensor(idx).long()
        inverted_tensor = x.index_select(self.dim, idx)
        return inverted_tensor
+
+
+class AutoPadToShape(object):
+    def __init__(self, shape):
+        self.shape = shape
+
+    def __call__(self, x):
+        if not torch.is_tensor(x):
+            x = torch.as_tensor(x)
+        if x.shape == self.shape:
+            return x
+        embedding = torch.zeros(self.shape)
+        embedding[: x.shape] = x
+        return embedding
+
+    def __repr__(self):
+        return f'AutoPadTransform({self.shape})'
+
+
+class HorizontalSplitter(nn.Module):
+
+    def __init__(self, in_shape, n):
+        super(HorizontalSplitter, self).__init__()
+        assert len(in_shape) == 3
+        self.n = n
+        self.in_shape = in_shape
+
+        self.channel, self.height, self.width = self.in_shape
+        self.new_height = (self.height // self.n_horizontal_splits) + 1 if self.height % self.n != 0 else 0
+
+        self.shape = (self.channel, self.new_height, self.width)
+        self.autopad = AutoPadToShape(self.shape)
+
+    def foward(self, x):
+        n_blocks = list()
+        for block_idx in range(self.n):
+            start = (self.channel, block_idx * self.height, self.width)
+            end = (self.channel, (block_idx + 1) * self.height, self.width)
+            block = self.autopad(x[start:end])
+            n_blocks.append(block)
+
+        return tuple(n_blocks)
+
+
+class HorizontalMerger(nn.Module):
+
+    @property
+    def shape(self):
+        merged_shape = self.in_shape[0], self.in_shape[1] * self.n, self.in_shape[2]
+        return merged_shape
+
+    def __init__(self, in_shape, n):
+        super(HorizontalMerger, self).__init__()
+        assert len(in_shape) == 3
+        self.n = n
+        self.in_shape = in_shape
+
+    def forward(self, x):
+        return torch.cat(x, dim=-2)
--- a/utils/transforms.py
+++ b/utils/transforms.py
@ -1,11 +0,0 @@
-import numpy as np
-
-
-class AsArray(object):
-    def __init__(self, width, height):
-        self.width = width
-        self.height = height
-
-    def __call__(self, x):
-        array = np.zeros((self.width, self.height))
-        return array