working commit

2020-03-19 16:59:49 +01:00
parent f4606a7f6c
commit cc9e9b50a4
5 changed files with 112 additions and 20 deletions
--- a/cfg.py
+++ b/cfg.py
@@ -2,11 +2,14 @@ from pathlib import Path
 import torch

 BATCH_SIZE = 128
-NUM_EPOCHS = 10
+NUM_EPOCHS = 50
 NUM_WORKERS = 4
-NUM_SEGMENTS = 5
-NUM_SEGMENT_HOPS = 2
+NUM_SEGMENTS = 80
+NUM_SEGMENT_HOPS = 20
 SEEDS = [42, 1337]
 ALL_DATASET_PATHS = list((Path(__file__).parent.absolute() / 'data' / 'mimii').glob('*/'))

-DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+
+SUB_SPEC_HEIGT = 20
+SUB_SPEC_HOP = SUB_SPEC_HEIGT
--- a/main.py
+++ b/main.py
@@ -3,34 +3,38 @@ if __name__ == '__main__':
    from tqdm import tqdm
    from cfg import *
    from mimii import MIMII
-    from models.ae import AE
+    from models.ae import AE, SubSpecCAE
    import torch.nn as nn
    import torch.optim as optim
    import random
+    from models.layers import Subspectrogram

    torch.manual_seed(42)
    torch.cuda.manual_seed(42)
    np.random.seed(42)
    random.seed(42)

-    dataset_path = ALL_DATASET_PATHS[0]
+    dataset_path = ALL_DATASET_PATHS[5]
    print(f'Training on {dataset_path.name}')
    mimii = MIMII(dataset_path=dataset_path, machine_id=0)
    mimii.to(DEVICE)
-    #mimii.preprocess(n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0)
+    #mimii.preprocess(n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0)  # 80 x 80
+    tfms = Subspectrogram(SUB_SPEC_HEIGT, SUB_SPEC_HOP)

    dl = mimii.train_dataloader(
        segment_len=NUM_SEGMENTS,
        hop_len=NUM_SEGMENT_HOPS,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
-        shuffle=True
+        shuffle=True,
+        transform=tfms
    )

-
-    model = AE(400).to(DEVICE)
+    model = SubSpecCAE().to(DEVICE)
    model.init_weights()

+    # print(model(torch.randn(128, 1, 20, 80).to(DEVICE)).shape)
+
    optimizer = optim.Adam(model.parameters(), lr=0.001)


@@ -39,7 +43,7 @@ if __name__ == '__main__':
        losses = []
        for batch in tqdm(dl):
            data, labels = batch
-            data = data.to(DEVICE)
+            data = data.to(DEVICE)  # torch.Size([128, 4, 20, 80]) batch x subs_specs x height x width

            loss = model.train_loss(data)

@@ -50,7 +54,7 @@ if __name__ == '__main__':
            losses.append(loss.item())
        print(f'Loss: {np.mean(losses)}')

-    auc = mimii.evaluate_model(model, NUM_SEGMENTS, NUM_SEGMENTS)
+    auc = mimii.evaluate_model(model, NUM_SEGMENTS, NUM_SEGMENTS, transform=tfms)
    print(f'AUC: {auc}')


--- a/mimii.py
+++ b/mimii.py
@@ -60,7 +60,7 @@ class MIMII(object):
                np.save(folder/(file.stem + f'_{m}_{n}.npy'), mel_spec_norm)
        return self

-    def train_dataloader(self, segment_len=20, hop_len=5, **kwargs):
+    def train_dataloader(self, segment_len=20, hop_len=5, transform=None, **kwargs):
        # return both!!!
        # todo exclude a part and save for eval
        ds = []
@@ -68,20 +68,22 @@ class MIMII(object):
            ds.append(
                MimiiTorchDataset(path=p, label=l,
                                  segment_len=segment_len,
-                                  hop=hop_len)
+                                  hop=hop_len,
+                                  transform=transform)
            )
        return DataLoader(ConcatDataset(ds), **kwargs)

    def test_dataloader(self, *args, **kwargs):
        raise NotImplementedError('test_dataloader is not supported')

-    def evaluate_model(self, f, segment_len=20, hop_len=5):
+    def evaluate_model(self, f, segment_len=20, hop_len=5, transform=None):
+        f.eval()
        datasets = []
        for p, l in zip(self.test_paths, self.test_labels):
            datasets.append(
                MimiiTorchDataset(path=p, label=l,
                                  segment_len=segment_len,
-                                  hop=hop_len)
+                                  hop=hop_len, transform=transform)
            )
        y_true, y_score = [], []
        with torch.no_grad():
@@ -97,12 +99,13 @@ class MIMII(object):
                    file_preds += preds.cpu().data.tolist()
                y_true.append(labels.max().item())
                y_score .append(np.mean(file_preds))
+        f.train()
        return roc_auc_score(y_true, y_score)



 class MimiiTorchDataset(Dataset):
-    def __init__(self, path, segment_len, hop, label):
+    def __init__(self, path, segment_len, hop, label, transform=None):
        self.path = path
        self.segment_len = segment_len
        self.m, self.n = str(path).split('_')[-2:]  # get spectrogram dimensions
@@ -110,11 +113,14 @@ class MimiiTorchDataset(Dataset):
        self.m, self.n = (int(i) for i in (self.m, self.n))
        self.offsets = list(range(0, self.n - segment_len, hop))
        self.label = label
+        self.transform = transform

    def __getitem__(self, item):
        start = self.offsets[item]
        mel_spec = np.load(self.path)
        snippet = mel_spec[:, start: start + self.segment_len]
+        if self.transform:
+            snippet = self.transform(snippet)
        return snippet, self.label

    def __len__(self):
--- a/models/ae.py
+++ b/models/ae.py
@@ -2,6 +2,23 @@ import torch
 import torch.nn as nn
 import torch.functional as F

+
+class Reshape(nn.Module):
+    def __init__(self, *args):
+        super(Reshape, self).__init__()
+        self.to = args
+
+    def forward(self, x):
+        return x.view(x.shape[0], *self.to)
+
+class Flatten(nn.Module):
+    def __init__(self):
+        super(Flatten, self).__init__()
+
+    def forward(self, x):
+        return x.view(x.shape[0], -1)
+
+
 class AE(nn.Module):
    def __init__(self, in_dim=400):
        super(AE, self).__init__()
@@ -17,7 +34,7 @@ class AE(nn.Module):
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, in_dim),
-            nn.ReLU(),
+            nn.ReLU()
        )

    def forward(self, data):
@@ -45,4 +62,65 @@ class AE(nn.Module):
                if isinstance(m.bias, torch.Tensor):
                    m.bias.data.fill_(0.01)

-        self.apply(_weight_init)
+        self.apply(_weight_init)
+
+
+
+class SubSpecCAE(nn.Module):
+    def __init__(self, F=20, T=80, norm='batch', activation='relu', dropout_prob=0.25):
+        super(SubSpecCAE, self).__init__()
+        self.T = T
+        self.F = F
+        self.activation = activation
+        Norm = nn.BatchNorm2d if norm == 'batch' else nn.InstanceNorm2d
+        Activation = nn.ReLU if activation == 'relu' else nn.LeakyReLU
+        self.encoder = nn.Sequential(
+            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=7, stride=1, padding=3),  # 32 x 20 x 80
+            Norm(32),
+            Activation(),
+            nn.MaxPool2d((F//10, 5)),
+            nn.Dropout(dropout_prob),
+            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=7, stride=1, padding=3),  # 64 x 10 x 16
+            Norm(64),
+            Activation(),
+            nn.MaxPool2d(4, T),
+            nn.Dropout(dropout_prob),
+            Flatten(),
+            nn.Linear(64, 16)
+        )
+        self.decoder = nn.Sequential(
+            nn.Linear(16, 64),
+            Reshape(64, 1, 1),
+            nn.Upsample(size=(10, 16), mode='bilinear', align_corners=False),
+            nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=7, stride=1, padding=3),
+            Norm(32),
+            Activation(),
+            nn.Upsample(size=(20, 80), mode='bilinear', align_corners=False),
+            nn.Dropout(dropout_prob),
+            nn.ConvTranspose2d(in_channels=32, out_channels=1, kernel_size=7, stride=1, padding=3)
+        )
+
+    def forward(self, x):
+        x = x[:,3,:,].unsqueeze(1)  # select a single supspec
+        encoded = self.encoder(x)
+        decoded = self.decoder(encoded)
+        return decoded, x
+
+    def train_loss(self, data):
+        criterion = nn.MSELoss()
+        y_hat, y = self.forward(data)
+        loss = criterion(y_hat, y)
+        return loss
+
+    def test_loss(self, data):
+        y_hat, y = self.forward(data)
+        preds = torch.sum((y_hat - y) ** 2, dim=tuple(range(1, y_hat.dim())))
+        return preds
+
+    def init_weights(self):
+        def weight_init(m):
+            if isinstance(m, nn.Conv2d) or isinstance(m, torch.nn.Linear):
+                torch.nn.init.kaiming_uniform_(m.weight)
+                if m.bias is not None:
+                    m.bias.data.fill_(0.02)
+        self.apply(weight_init)
--- a/models/layers.py
+++ b/models/layers.py
@@ -1,3 +1,4 @@
+import numpy as np
 import torch
 import torch.nn as nn

@@ -9,7 +10,7 @@ class Subspectrogram(object):

    def __call__(self, sample):
        if len(sample.shape) < 3:
-            sample = sample.unsqueeze(0)
+            sample = sample.reshape(1, *sample.shape)
        # sample shape: 1 x num_mels x num_frames
        sub_specs = []
        for i in range(0, sample.shape[1], self.hop_size):