working commit
This commit is contained in:
11
cfg.py
11
cfg.py
@ -2,11 +2,14 @@ from pathlib import Path
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
BATCH_SIZE = 128
|
BATCH_SIZE = 128
|
||||||
NUM_EPOCHS = 10
|
NUM_EPOCHS = 50
|
||||||
NUM_WORKERS = 4
|
NUM_WORKERS = 4
|
||||||
NUM_SEGMENTS = 5
|
NUM_SEGMENTS = 80
|
||||||
NUM_SEGMENT_HOPS = 2
|
NUM_SEGMENT_HOPS = 20
|
||||||
SEEDS = [42, 1337]
|
SEEDS = [42, 1337]
|
||||||
ALL_DATASET_PATHS = list((Path(__file__).parent.absolute() / 'data' / 'mimii').glob('*/'))
|
ALL_DATASET_PATHS = list((Path(__file__).parent.absolute() / 'data' / 'mimii').glob('*/'))
|
||||||
|
|
||||||
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||||
|
|
||||||
|
SUB_SPEC_HEIGT = 20
|
||||||
|
SUB_SPEC_HOP = SUB_SPEC_HEIGT
|
20
main.py
20
main.py
@ -3,34 +3,38 @@ if __name__ == '__main__':
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from cfg import *
|
from cfg import *
|
||||||
from mimii import MIMII
|
from mimii import MIMII
|
||||||
from models.ae import AE
|
from models.ae import AE, SubSpecCAE
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
import random
|
import random
|
||||||
|
from models.layers import Subspectrogram
|
||||||
|
|
||||||
torch.manual_seed(42)
|
torch.manual_seed(42)
|
||||||
torch.cuda.manual_seed(42)
|
torch.cuda.manual_seed(42)
|
||||||
np.random.seed(42)
|
np.random.seed(42)
|
||||||
random.seed(42)
|
random.seed(42)
|
||||||
|
|
||||||
dataset_path = ALL_DATASET_PATHS[0]
|
dataset_path = ALL_DATASET_PATHS[5]
|
||||||
print(f'Training on {dataset_path.name}')
|
print(f'Training on {dataset_path.name}')
|
||||||
mimii = MIMII(dataset_path=dataset_path, machine_id=0)
|
mimii = MIMII(dataset_path=dataset_path, machine_id=0)
|
||||||
mimii.to(DEVICE)
|
mimii.to(DEVICE)
|
||||||
#mimii.preprocess(n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0)
|
#mimii.preprocess(n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0) # 80 x 80
|
||||||
|
tfms = Subspectrogram(SUB_SPEC_HEIGT, SUB_SPEC_HOP)
|
||||||
|
|
||||||
dl = mimii.train_dataloader(
|
dl = mimii.train_dataloader(
|
||||||
segment_len=NUM_SEGMENTS,
|
segment_len=NUM_SEGMENTS,
|
||||||
hop_len=NUM_SEGMENT_HOPS,
|
hop_len=NUM_SEGMENT_HOPS,
|
||||||
batch_size=BATCH_SIZE,
|
batch_size=BATCH_SIZE,
|
||||||
num_workers=NUM_WORKERS,
|
num_workers=NUM_WORKERS,
|
||||||
shuffle=True
|
shuffle=True,
|
||||||
|
transform=tfms
|
||||||
)
|
)
|
||||||
|
|
||||||
|
model = SubSpecCAE().to(DEVICE)
|
||||||
model = AE(400).to(DEVICE)
|
|
||||||
model.init_weights()
|
model.init_weights()
|
||||||
|
|
||||||
|
# print(model(torch.randn(128, 1, 20, 80).to(DEVICE)).shape)
|
||||||
|
|
||||||
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
||||||
|
|
||||||
|
|
||||||
@ -39,7 +43,7 @@ if __name__ == '__main__':
|
|||||||
losses = []
|
losses = []
|
||||||
for batch in tqdm(dl):
|
for batch in tqdm(dl):
|
||||||
data, labels = batch
|
data, labels = batch
|
||||||
data = data.to(DEVICE)
|
data = data.to(DEVICE) # torch.Size([128, 4, 20, 80]) batch x subs_specs x height x width
|
||||||
|
|
||||||
loss = model.train_loss(data)
|
loss = model.train_loss(data)
|
||||||
|
|
||||||
@ -50,7 +54,7 @@ if __name__ == '__main__':
|
|||||||
losses.append(loss.item())
|
losses.append(loss.item())
|
||||||
print(f'Loss: {np.mean(losses)}')
|
print(f'Loss: {np.mean(losses)}')
|
||||||
|
|
||||||
auc = mimii.evaluate_model(model, NUM_SEGMENTS, NUM_SEGMENTS)
|
auc = mimii.evaluate_model(model, NUM_SEGMENTS, NUM_SEGMENTS, transform=tfms)
|
||||||
print(f'AUC: {auc}')
|
print(f'AUC: {auc}')
|
||||||
|
|
||||||
|
|
||||||
|
16
mimii.py
16
mimii.py
@ -60,7 +60,7 @@ class MIMII(object):
|
|||||||
np.save(folder/(file.stem + f'_{m}_{n}.npy'), mel_spec_norm)
|
np.save(folder/(file.stem + f'_{m}_{n}.npy'), mel_spec_norm)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def train_dataloader(self, segment_len=20, hop_len=5, **kwargs):
|
def train_dataloader(self, segment_len=20, hop_len=5, transform=None, **kwargs):
|
||||||
# return both!!!
|
# return both!!!
|
||||||
# todo exclude a part and save for eval
|
# todo exclude a part and save for eval
|
||||||
ds = []
|
ds = []
|
||||||
@ -68,20 +68,22 @@ class MIMII(object):
|
|||||||
ds.append(
|
ds.append(
|
||||||
MimiiTorchDataset(path=p, label=l,
|
MimiiTorchDataset(path=p, label=l,
|
||||||
segment_len=segment_len,
|
segment_len=segment_len,
|
||||||
hop=hop_len)
|
hop=hop_len,
|
||||||
|
transform=transform)
|
||||||
)
|
)
|
||||||
return DataLoader(ConcatDataset(ds), **kwargs)
|
return DataLoader(ConcatDataset(ds), **kwargs)
|
||||||
|
|
||||||
def test_dataloader(self, *args, **kwargs):
|
def test_dataloader(self, *args, **kwargs):
|
||||||
raise NotImplementedError('test_dataloader is not supported')
|
raise NotImplementedError('test_dataloader is not supported')
|
||||||
|
|
||||||
def evaluate_model(self, f, segment_len=20, hop_len=5):
|
def evaluate_model(self, f, segment_len=20, hop_len=5, transform=None):
|
||||||
|
f.eval()
|
||||||
datasets = []
|
datasets = []
|
||||||
for p, l in zip(self.test_paths, self.test_labels):
|
for p, l in zip(self.test_paths, self.test_labels):
|
||||||
datasets.append(
|
datasets.append(
|
||||||
MimiiTorchDataset(path=p, label=l,
|
MimiiTorchDataset(path=p, label=l,
|
||||||
segment_len=segment_len,
|
segment_len=segment_len,
|
||||||
hop=hop_len)
|
hop=hop_len, transform=transform)
|
||||||
)
|
)
|
||||||
y_true, y_score = [], []
|
y_true, y_score = [], []
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
@ -97,12 +99,13 @@ class MIMII(object):
|
|||||||
file_preds += preds.cpu().data.tolist()
|
file_preds += preds.cpu().data.tolist()
|
||||||
y_true.append(labels.max().item())
|
y_true.append(labels.max().item())
|
||||||
y_score .append(np.mean(file_preds))
|
y_score .append(np.mean(file_preds))
|
||||||
|
f.train()
|
||||||
return roc_auc_score(y_true, y_score)
|
return roc_auc_score(y_true, y_score)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class MimiiTorchDataset(Dataset):
|
class MimiiTorchDataset(Dataset):
|
||||||
def __init__(self, path, segment_len, hop, label):
|
def __init__(self, path, segment_len, hop, label, transform=None):
|
||||||
self.path = path
|
self.path = path
|
||||||
self.segment_len = segment_len
|
self.segment_len = segment_len
|
||||||
self.m, self.n = str(path).split('_')[-2:] # get spectrogram dimensions
|
self.m, self.n = str(path).split('_')[-2:] # get spectrogram dimensions
|
||||||
@ -110,11 +113,14 @@ class MimiiTorchDataset(Dataset):
|
|||||||
self.m, self.n = (int(i) for i in (self.m, self.n))
|
self.m, self.n = (int(i) for i in (self.m, self.n))
|
||||||
self.offsets = list(range(0, self.n - segment_len, hop))
|
self.offsets = list(range(0, self.n - segment_len, hop))
|
||||||
self.label = label
|
self.label = label
|
||||||
|
self.transform = transform
|
||||||
|
|
||||||
def __getitem__(self, item):
|
def __getitem__(self, item):
|
||||||
start = self.offsets[item]
|
start = self.offsets[item]
|
||||||
mel_spec = np.load(self.path)
|
mel_spec = np.load(self.path)
|
||||||
snippet = mel_spec[:, start: start + self.segment_len]
|
snippet = mel_spec[:, start: start + self.segment_len]
|
||||||
|
if self.transform:
|
||||||
|
snippet = self.transform(snippet)
|
||||||
return snippet, self.label
|
return snippet, self.label
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
|
82
models/ae.py
82
models/ae.py
@ -2,6 +2,23 @@ import torch
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.functional as F
|
import torch.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class Reshape(nn.Module):
|
||||||
|
def __init__(self, *args):
|
||||||
|
super(Reshape, self).__init__()
|
||||||
|
self.to = args
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x.view(x.shape[0], *self.to)
|
||||||
|
|
||||||
|
class Flatten(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(Flatten, self).__init__()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x.view(x.shape[0], -1)
|
||||||
|
|
||||||
|
|
||||||
class AE(nn.Module):
|
class AE(nn.Module):
|
||||||
def __init__(self, in_dim=400):
|
def __init__(self, in_dim=400):
|
||||||
super(AE, self).__init__()
|
super(AE, self).__init__()
|
||||||
@ -17,7 +34,7 @@ class AE(nn.Module):
|
|||||||
nn.Linear(64, 64),
|
nn.Linear(64, 64),
|
||||||
nn.ReLU(),
|
nn.ReLU(),
|
||||||
nn.Linear(64, in_dim),
|
nn.Linear(64, in_dim),
|
||||||
nn.ReLU(),
|
nn.ReLU()
|
||||||
)
|
)
|
||||||
|
|
||||||
def forward(self, data):
|
def forward(self, data):
|
||||||
@ -45,4 +62,65 @@ class AE(nn.Module):
|
|||||||
if isinstance(m.bias, torch.Tensor):
|
if isinstance(m.bias, torch.Tensor):
|
||||||
m.bias.data.fill_(0.01)
|
m.bias.data.fill_(0.01)
|
||||||
|
|
||||||
self.apply(_weight_init)
|
self.apply(_weight_init)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class SubSpecCAE(nn.Module):
|
||||||
|
def __init__(self, F=20, T=80, norm='batch', activation='relu', dropout_prob=0.25):
|
||||||
|
super(SubSpecCAE, self).__init__()
|
||||||
|
self.T = T
|
||||||
|
self.F = F
|
||||||
|
self.activation = activation
|
||||||
|
Norm = nn.BatchNorm2d if norm == 'batch' else nn.InstanceNorm2d
|
||||||
|
Activation = nn.ReLU if activation == 'relu' else nn.LeakyReLU
|
||||||
|
self.encoder = nn.Sequential(
|
||||||
|
nn.Conv2d(in_channels=1, out_channels=32, kernel_size=7, stride=1, padding=3), # 32 x 20 x 80
|
||||||
|
Norm(32),
|
||||||
|
Activation(),
|
||||||
|
nn.MaxPool2d((F//10, 5)),
|
||||||
|
nn.Dropout(dropout_prob),
|
||||||
|
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=7, stride=1, padding=3), # 64 x 10 x 16
|
||||||
|
Norm(64),
|
||||||
|
Activation(),
|
||||||
|
nn.MaxPool2d(4, T),
|
||||||
|
nn.Dropout(dropout_prob),
|
||||||
|
Flatten(),
|
||||||
|
nn.Linear(64, 16)
|
||||||
|
)
|
||||||
|
self.decoder = nn.Sequential(
|
||||||
|
nn.Linear(16, 64),
|
||||||
|
Reshape(64, 1, 1),
|
||||||
|
nn.Upsample(size=(10, 16), mode='bilinear', align_corners=False),
|
||||||
|
nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=7, stride=1, padding=3),
|
||||||
|
Norm(32),
|
||||||
|
Activation(),
|
||||||
|
nn.Upsample(size=(20, 80), mode='bilinear', align_corners=False),
|
||||||
|
nn.Dropout(dropout_prob),
|
||||||
|
nn.ConvTranspose2d(in_channels=32, out_channels=1, kernel_size=7, stride=1, padding=3)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = x[:,3,:,].unsqueeze(1) # select a single supspec
|
||||||
|
encoded = self.encoder(x)
|
||||||
|
decoded = self.decoder(encoded)
|
||||||
|
return decoded, x
|
||||||
|
|
||||||
|
def train_loss(self, data):
|
||||||
|
criterion = nn.MSELoss()
|
||||||
|
y_hat, y = self.forward(data)
|
||||||
|
loss = criterion(y_hat, y)
|
||||||
|
return loss
|
||||||
|
|
||||||
|
def test_loss(self, data):
|
||||||
|
y_hat, y = self.forward(data)
|
||||||
|
preds = torch.sum((y_hat - y) ** 2, dim=tuple(range(1, y_hat.dim())))
|
||||||
|
return preds
|
||||||
|
|
||||||
|
def init_weights(self):
|
||||||
|
def weight_init(m):
|
||||||
|
if isinstance(m, nn.Conv2d) or isinstance(m, torch.nn.Linear):
|
||||||
|
torch.nn.init.kaiming_uniform_(m.weight)
|
||||||
|
if m.bias is not None:
|
||||||
|
m.bias.data.fill_(0.02)
|
||||||
|
self.apply(weight_init)
|
@ -1,3 +1,4 @@
|
|||||||
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
|
||||||
@ -9,7 +10,7 @@ class Subspectrogram(object):
|
|||||||
|
|
||||||
def __call__(self, sample):
|
def __call__(self, sample):
|
||||||
if len(sample.shape) < 3:
|
if len(sample.shape) < 3:
|
||||||
sample = sample.unsqueeze(0)
|
sample = sample.reshape(1, *sample.shape)
|
||||||
# sample shape: 1 x num_mels x num_frames
|
# sample shape: 1 x num_mels x num_frames
|
||||||
sub_specs = []
|
sub_specs = []
|
||||||
for i in range(0, sample.shape[1], self.hop_size):
|
for i in range(0, sample.shape[1], self.hop_size):
|
||||||
|
Reference in New Issue
Block a user