big update
This commit is contained in:
parent
0f325676e5
commit
482f45df87
4
cfg.py
4
cfg.py
@ -2,11 +2,11 @@ from pathlib import Path
|
||||
import torch
|
||||
|
||||
BATCH_SIZE = 96
|
||||
NUM_EPOCHS = 100
|
||||
NUM_EPOCHS = 75
|
||||
NUM_WORKERS = 4
|
||||
NUM_SEGMENTS = 80
|
||||
NUM_SEGMENT_HOPS = 15
|
||||
SEEDS = [42, 1337, 666, 2012, 1e42]
|
||||
SEEDS = [42, 1337, 666]
|
||||
ALL_DATASET_PATHS = list((Path(__file__).parent.absolute() / 'data' / 'mimii').glob('*/'))
|
||||
|
||||
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
||||
|
18
clean.py
Normal file
18
clean.py
Normal file
@ -0,0 +1,18 @@
|
||||
# scores on band level per file
|
||||
# merge scores using GMM/IsoForest
|
||||
# combine combine using mean
|
||||
# compute roc auc
|
||||
|
||||
# 1. inputs: scorer function 80x80 -> 1x7
|
||||
# 2. GMM: 1x7 -> 1x1
|
||||
# 3. mean over all snippets
|
||||
# single score
|
||||
|
||||
class Scorer:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
class Evaluator:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
24
main.py
24
main.py
@ -9,7 +9,8 @@ if __name__ == '__main__':
|
||||
import torch.optim as optim
|
||||
from models.layers import Subspectrogram
|
||||
|
||||
def train(dataset_path, machine_id, band, norm, seed):
|
||||
|
||||
def train(dataset_path, machine_id, band, norm='batch', loss_fn='mse', seed=42):
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed(seed)
|
||||
np.random.seed(seed)
|
||||
@ -32,12 +33,12 @@ if __name__ == '__main__':
|
||||
transform=tfms
|
||||
)
|
||||
|
||||
model = SubSpecCAE(norm=norm, band=band).to(DEVICE)
|
||||
model = SubSpecCAE(norm=norm, loss_fn=loss_fn, band=band).to(DEVICE)
|
||||
model.init_weights()
|
||||
|
||||
# print(model(torch.randn(128, 1, 20, 80).to(DEVICE)).shape)
|
||||
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.0005)
|
||||
|
||||
|
||||
for epoch in range(NUM_EPOCHS):
|
||||
@ -60,17 +61,18 @@ if __name__ == '__main__':
|
||||
print(f'AUC: {auc}, Machine: {machine_id}, Band: {band}, Norm: {norm}, Seed: {seed}')
|
||||
return auc
|
||||
|
||||
|
||||
loss_fn = 'mse'
|
||||
results = []
|
||||
for norm in ('instance', 'batch'):
|
||||
for norm in ['batch']:
|
||||
for seed in SEEDS:
|
||||
for dataset_path in ALL_DATASET_PATHS:
|
||||
for machine_id in [0, 2, 4, 6]:
|
||||
for band in range(7):
|
||||
auc = train(dataset_path, machine_id, band, norm, seed)
|
||||
results.append([dataset_path.name, machine_id, seed, band, norm, auc])
|
||||
with open(f'results_{norm}.pkl', 'wb') as f:
|
||||
pickle.dump(results, f)
|
||||
if '-6_dB' in dataset_path.name:
|
||||
for machine_id in [4]:
|
||||
for band in range(7):
|
||||
auc = train(dataset_path, machine_id, band, norm, loss_fn, seed)
|
||||
results.append([dataset_path.name, machine_id, seed, band, norm, auc])
|
||||
with open(f'results2_hard_{norm}_{loss_fn}.pkl', 'wb') as f:
|
||||
pickle.dump(results, f)
|
||||
|
||||
|
||||
|
||||
|
79
main2.py
Normal file
79
main2.py
Normal file
@ -0,0 +1,79 @@
|
||||
if __name__ == '__main__':
|
||||
import numpy as np
|
||||
import random
|
||||
from tqdm import tqdm
|
||||
from cfg import *
|
||||
from mimii import MIMII
|
||||
from models.ae import GlobalCAE
|
||||
import pickle
|
||||
import torch.optim as optim
|
||||
from models.layers import Subspectrogram
|
||||
|
||||
|
||||
def train(dataset_path, machine_id, norm='batch', loss_fn='mse', seed=42):
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
torch.backends.cudnn.deterministic = True
|
||||
random.seed(seed)
|
||||
|
||||
print(f'Training on {dataset_path.name}')
|
||||
mimii = MIMII(dataset_path=dataset_path, machine_id=machine_id)
|
||||
mimii.to(DEVICE)
|
||||
#mimii.preprocess(n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0) # 80 x 80
|
||||
|
||||
dl = mimii.train_dataloader(
|
||||
segment_len=NUM_SEGMENTS,
|
||||
hop_len=NUM_SEGMENT_HOPS,
|
||||
batch_size=BATCH_SIZE,
|
||||
num_workers=NUM_WORKERS,
|
||||
shuffle=True
|
||||
)
|
||||
|
||||
model = GlobalCAE(norm=norm).to(DEVICE)
|
||||
model.init_weights()
|
||||
|
||||
# print(model(torch.randn(128, 1, 20, 80).to(DEVICE)).shape)
|
||||
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.0005)
|
||||
|
||||
|
||||
for epoch in range(NUM_EPOCHS):
|
||||
print(f'EPOCH #{epoch+1}')
|
||||
losses = []
|
||||
for batch in tqdm(dl):
|
||||
data, labels = batch
|
||||
data = data.to(DEVICE) # torch.Size([128, 4, 20, 80]) batch x subs_specs x height x width
|
||||
|
||||
loss = model.train_loss(data)
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
losses.append(loss.item())
|
||||
print(f'Loss: {np.mean(losses)}')
|
||||
|
||||
auc = mimii.evaluate_model(model, NUM_SEGMENTS, NUM_SEGMENTS)
|
||||
print(f'AUC: {auc}, Machine: {machine_id}, Norm: {norm}, Seed: {seed}')
|
||||
return auc
|
||||
|
||||
loss_fn = 'mse'
|
||||
results = []
|
||||
for norm in ['batch']:
|
||||
for seed in SEEDS:
|
||||
for dataset_path in ALL_DATASET_PATHS:
|
||||
if '-6_dB' in dataset_path.name:
|
||||
for machine_id in [4]:
|
||||
auc = train(dataset_path, machine_id, norm, loss_fn, seed)
|
||||
results.append([dataset_path.name, machine_id, seed, norm, auc])
|
||||
with open(f'TODO_hard_{norm}_{loss_fn}.pkl', 'wb') as f:
|
||||
pickle.dump(results, f)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
81
main3.py
Normal file
81
main3.py
Normal file
@ -0,0 +1,81 @@
|
||||
if __name__ == '__main__':
|
||||
import numpy as np
|
||||
import random
|
||||
from tqdm import tqdm
|
||||
from cfg import *
|
||||
from mimii import MIMII
|
||||
from models.ae import FullSubSpecCAE, SubSpecCAE
|
||||
import pickle
|
||||
import torch.optim as optim
|
||||
from models.layers import Subspectrogram
|
||||
|
||||
|
||||
def train(dataset_path, machine_id, norm='batch', seed=42):
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
torch.backends.cudnn.deterministic = True
|
||||
random.seed(seed)
|
||||
|
||||
print(f'Training on {dataset_path.name}')
|
||||
mimii = MIMII(dataset_path=dataset_path, machine_id=machine_id)
|
||||
mimii.to(DEVICE)
|
||||
#mimii.preprocess(n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0) # 80 x 80
|
||||
tfms = Subspectrogram(SUB_SPEC_HEIGT, SUB_SPEC_HOP)
|
||||
|
||||
dl = mimii.train_dataloader(
|
||||
segment_len=NUM_SEGMENTS,
|
||||
hop_len=NUM_SEGMENT_HOPS,
|
||||
batch_size=BATCH_SIZE,
|
||||
num_workers=NUM_WORKERS,
|
||||
shuffle=True,
|
||||
transform=tfms
|
||||
)
|
||||
|
||||
model = FullSubSpecCAE().to(DEVICE)
|
||||
model.init_weights()
|
||||
|
||||
# print(model(torch.randn(128, 1, 20, 80).to(DEVICE)).shape)
|
||||
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.0005)
|
||||
|
||||
|
||||
for epoch in range(NUM_EPOCHS):
|
||||
print(f'EPOCH #{epoch+1}')
|
||||
losses = []
|
||||
for batch in tqdm(dl):
|
||||
data, labels = batch
|
||||
data = data.to(DEVICE) # torch.Size([128, 4, 20, 80]) batch x subs_specs x height x width
|
||||
|
||||
loss = model.train_loss(data)
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
losses.append(loss.item())
|
||||
print(f'Loss: {np.mean(losses)}')
|
||||
|
||||
auc = mimii.evaluate_model(model, NUM_SEGMENTS, NUM_SEGMENTS, transform=tfms)
|
||||
print(f'AUC: {auc}, Machine: {machine_id}, Norm: {norm}, Seed: {seed}')
|
||||
return auc
|
||||
|
||||
loss_fn = 'mse'
|
||||
results = []
|
||||
for norm in ['batch']:
|
||||
for seed in SEEDS:
|
||||
for dataset_path in ALL_DATASET_PATHS:
|
||||
if '-6_dB' in dataset_path.name:
|
||||
for machine_id in [4]:
|
||||
auc = train(dataset_path, machine_id, norm, seed)
|
||||
results.append([dataset_path.name, machine_id, seed, -1, norm, auc])
|
||||
with open(f'full_hard_{norm}_{loss_fn}.pkl', 'wb') as f:
|
||||
pickle.dump(results, f)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
11
mimii.py
11
mimii.py
@ -71,11 +71,7 @@ class MIMII(object):
|
||||
)
|
||||
return DataLoader(ConcatDataset(ds), **kwargs)
|
||||
|
||||
def test_dataloader(self, *args, **kwargs):
|
||||
raise NotImplementedError('test_dataloader is not supported')
|
||||
|
||||
def evaluate_model(self, f, segment_len=20, hop_len=5, transform=None):
|
||||
f.eval()
|
||||
def test_datasets(self, segment_len=20, hop_len=5, transform=None):
|
||||
datasets = []
|
||||
for p, l in zip(self.test_paths, self.test_labels):
|
||||
datasets.append(
|
||||
@ -83,6 +79,11 @@ class MIMII(object):
|
||||
segment_len=segment_len,
|
||||
hop=hop_len, transform=transform)
|
||||
)
|
||||
return datasets
|
||||
|
||||
def evaluate_model(self, f, segment_len=20, hop_len=5, transform=None):
|
||||
f.eval()
|
||||
datasets = self.test_datasets(segment_len, hop_len, transform)
|
||||
y_true, y_score = [], []
|
||||
with torch.no_grad():
|
||||
for dataset in tqdm(datasets):
|
||||
|
163
models/ae.py
163
models/ae.py
@ -1,3 +1,4 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.functional as F
|
||||
@ -67,42 +68,176 @@ class AE(nn.Module):
|
||||
|
||||
|
||||
class SubSpecCAE(nn.Module):
|
||||
def __init__(self, F=20, T=80, norm='batch', activation='relu', dropout_prob=0.25, band=0):
|
||||
def __init__(self, F=20, T=80, norm='batch',
|
||||
activation='relu', dropout_prob=0.25):
|
||||
super(SubSpecCAE, self).__init__()
|
||||
self.T = T
|
||||
self.F = F
|
||||
self.activation = activation
|
||||
self.band = band
|
||||
self.loss_fn = nn.MSELoss()
|
||||
Norm = nn.BatchNorm2d if norm == 'batch' else nn.InstanceNorm2d
|
||||
Activation = nn.ReLU if activation == 'relu' else nn.LeakyReLU
|
||||
a, b = 20, 40
|
||||
self.encoder = nn.Sequential(
|
||||
nn.Conv2d(in_channels=1, out_channels=32, kernel_size=7, stride=1, padding=3), # 32 x 20 x 80
|
||||
Norm(32),
|
||||
nn.Conv2d(in_channels=1, out_channels=a, kernel_size=7, stride=1, padding=3), # 32 x 20 x 80
|
||||
Norm(a),
|
||||
Activation(),
|
||||
nn.MaxPool2d((F//10, 5)),
|
||||
nn.Dropout(dropout_prob),
|
||||
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=7, stride=1, padding=3), # 64 x 10 x 16
|
||||
Norm(64),
|
||||
nn.Conv2d(in_channels=a, out_channels=b, kernel_size=7, stride=1, padding=3), # 64 x 10 x 16
|
||||
Norm(b),
|
||||
Activation(),
|
||||
nn.MaxPool2d(4, T),
|
||||
nn.Dropout(dropout_prob),
|
||||
Flatten(),
|
||||
nn.Linear(64, 16)
|
||||
nn.Linear(b, 16)
|
||||
)
|
||||
|
||||
self.decoder = nn.Sequential(
|
||||
nn.Linear(16, 64),
|
||||
Reshape(64, 1, 1),
|
||||
nn.Linear(16, b),
|
||||
Reshape(b, 1, 1),
|
||||
nn.Upsample(size=(10, 16), mode='bilinear', align_corners=False),
|
||||
nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=7, stride=1, padding=3),
|
||||
Norm(32),
|
||||
nn.ConvTranspose2d(in_channels=b, out_channels=a, kernel_size=7, stride=1, padding=3),
|
||||
Norm(a),
|
||||
Activation(),
|
||||
nn.Upsample(size=(20, 80), mode='bilinear', align_corners=False),
|
||||
nn.Dropout(dropout_prob),
|
||||
nn.ConvTranspose2d(in_channels=32, out_channels=1, kernel_size=7, stride=1, padding=3)
|
||||
nn.ConvTranspose2d(in_channels=a, out_channels=1, kernel_size=7, stride=1, padding=3),
|
||||
nn.Sigmoid()
|
||||
)
|
||||
|
||||
def forward(self, sub_x):
|
||||
#x = x[:, self.band, :,].unsqueeze(1) # select a single supspec
|
||||
encoded = self.encoder(sub_x)
|
||||
decoded = self.decoder(encoded)
|
||||
return decoded, sub_x
|
||||
|
||||
def init_weights(self):
|
||||
def weight_init(m):
|
||||
if isinstance(m, nn.Conv2d) or isinstance(m, torch.nn.Linear):
|
||||
torch.nn.init.kaiming_uniform_(m.weight)
|
||||
if m.bias is not None:
|
||||
m.bias.data.fill_(0.2)
|
||||
self.apply(weight_init)
|
||||
|
||||
|
||||
class FullSubSpecCAE(nn.Module):
|
||||
def __init__(self, F=20, T=80,
|
||||
norm='batch', activation='relu',
|
||||
dropout_prob=0.25, weight_sharing=False,
|
||||
sub_bands=[0, 1, 2, 3, 4, 5, 6]):
|
||||
super(FullSubSpecCAE, self).__init__()
|
||||
self.bands = sub_bands
|
||||
self.weight_sharing = weight_sharing
|
||||
self.aes = nn.ModuleList([
|
||||
SubSpecCAE(F, T, norm, activation, dropout_prob) for band in range(
|
||||
1 if weight_sharing else len(sub_bands)
|
||||
)
|
||||
])
|
||||
for ae in self.aes: ae.init_weights()
|
||||
self.loss_fn = nn.MSELoss()
|
||||
|
||||
def select_sub_ae(self, band):
|
||||
if self.weight_sharing:
|
||||
return self.aes[0]
|
||||
else:
|
||||
return self.aes[band]
|
||||
|
||||
def select_sub_band(self, x, band):
|
||||
return x[:, band, :, ].unsqueeze(1)
|
||||
|
||||
def forward(self, x):
|
||||
y_hat, y = [], []
|
||||
for band in self.bands:
|
||||
sub_ae = self.select_sub_ae(band)
|
||||
sub_x = self.select_sub_band(x, band)
|
||||
decoded, target = sub_ae(sub_x)
|
||||
y.append(target)
|
||||
y_hat.append(decoded)
|
||||
decoded = torch.cat(y_hat, dim=1)
|
||||
y = torch.cat(y, dim=1)
|
||||
return decoded, y
|
||||
|
||||
def train_loss(self, data):
|
||||
y_hat, y = self.forward(data) # torch.Size([96, 7, 20, 80])
|
||||
loss = self.complete_mse(y_hat, y)
|
||||
return loss
|
||||
|
||||
def test_loss(self, data):
|
||||
y_hat, y = self.forward(data)
|
||||
preds = torch.sum((y_hat - y) ** 2, dim=tuple(range(1, y_hat.dim())))
|
||||
return preds
|
||||
|
||||
def sub_band_mse(self, y_hat, y):
|
||||
losses = []
|
||||
for band in self.bands:
|
||||
sub_y = self.select_sub_band(y, band)
|
||||
sub_y_hat = self.select_sub_band(y_hat, band)
|
||||
sub_loss = torch.mean((sub_y_hat - sub_y) ** 2, dim=tuple(range(1, sub_y.dim()))) # torch.Size([96])
|
||||
losses.append(sub_loss)
|
||||
losses = torch.stack(losses, dim=1) # torch.Size([96, 7])
|
||||
return losses
|
||||
|
||||
def complete_mse(self, y_hat, y):
|
||||
return self.sub_band_mse(y_hat, y).mean(dim=0).sum()
|
||||
|
||||
def gather_predictions(self, dataloader):
|
||||
device = next(self.parameters()).device
|
||||
predictions = []
|
||||
labels = []
|
||||
self.eval()
|
||||
with torch.no_grad():
|
||||
for batch in dataloader:
|
||||
data, l = batch
|
||||
data = data.to(device)
|
||||
y_hat, y = self.forward(data)
|
||||
mse = self.sub_band_mse(y_hat, y) # 96 x 7
|
||||
predictions.append(mse)
|
||||
labels += l.tolist()
|
||||
predictions = torch.cat(predictions).cpu().numpy()
|
||||
self.train()
|
||||
return predictions, labels
|
||||
|
||||
|
||||
|
||||
class GlobalCAE(nn.Module):
|
||||
def __init__(self, F=20, T=80, norm='batch', activation='relu', dropout_prob=0.25):
|
||||
super(GlobalCAE, self).__init__()
|
||||
self.activation = activation
|
||||
Norm = nn.BatchNorm2d if norm == 'batch' else nn.InstanceNorm2d
|
||||
Activation = nn.ReLU if activation == 'relu' else nn.LeakyReLU
|
||||
self.encoder = nn.Sequential(
|
||||
nn.Conv2d(in_channels=1, out_channels=20, kernel_size=8, stride=2), # 32 x 20 x 80
|
||||
Norm(20),
|
||||
Activation(),
|
||||
nn.Dropout(dropout_prob),
|
||||
nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5, stride=2), # 64 x 10 x 16
|
||||
Norm(40),
|
||||
Activation(),
|
||||
nn.Dropout(dropout_prob),
|
||||
nn.Conv2d(in_channels=40, out_channels=60, kernel_size=3, stride=2), # 64 x 10 x 16
|
||||
Norm(60),
|
||||
Flatten(),
|
||||
nn.Linear(60*8*8, 64)
|
||||
|
||||
)
|
||||
self.decoder = nn.Sequential(
|
||||
nn.Linear(64, 60*8*8),
|
||||
Reshape(60, 8, 8),
|
||||
nn.ConvTranspose2d(in_channels=60, out_channels=40, kernel_size=3, stride=2),
|
||||
Norm(40),
|
||||
Activation(),
|
||||
nn.Dropout(dropout_prob),
|
||||
nn.ConvTranspose2d(in_channels=40, out_channels=20, kernel_size=5, stride=2),
|
||||
Norm(20),
|
||||
Activation(),
|
||||
nn.Dropout(dropout_prob),
|
||||
nn.ConvTranspose2d(in_channels=20, out_channels=1, kernel_size=8, stride=2),
|
||||
nn.Sigmoid()
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = x[:,self.band,:,].unsqueeze(1) # select a single supspec
|
||||
x = x.unsqueeze(1)
|
||||
encoded = self.encoder(x)
|
||||
decoded = self.decoder(encoded)
|
||||
return decoded, x
|
||||
@ -123,5 +258,5 @@ class SubSpecCAE(nn.Module):
|
||||
if isinstance(m, nn.Conv2d) or isinstance(m, torch.nn.Linear):
|
||||
torch.nn.init.kaiming_uniform_(m.weight)
|
||||
if m.bias is not None:
|
||||
m.bias.data.fill_(0.02)
|
||||
m.bias.data.fill_(0.01)
|
||||
self.apply(weight_init)
|
2
models/utils.py
Normal file
2
models/utils.py
Normal file
@ -0,0 +1,2 @@
|
||||
def count_parameters(model):
|
||||
return sum(p.numel() for p in model.parameters() if p.requires_grad)
|
101
new_main.py
Normal file
101
new_main.py
Normal file
@ -0,0 +1,101 @@
|
||||
if __name__ == '__main__':
|
||||
import numpy as np
|
||||
import random
|
||||
from tqdm import tqdm
|
||||
from cfg import *
|
||||
from mimii import MIMII
|
||||
from models.ae import FullSubSpecCAE, SubSpecCAE
|
||||
import pickle
|
||||
import torch.optim as optim
|
||||
from models.layers import Subspectrogram
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||
from sklearn.metrics import roc_auc_score
|
||||
from torch.utils.data import DataLoader
|
||||
from sklearn.ensemble import IsolationForest
|
||||
from sklearn.mixture import GaussianMixture
|
||||
|
||||
|
||||
def train(dataset_path, machine_id, norm='batch', seed=42):
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
torch.backends.cudnn.deterministic = True
|
||||
random.seed(seed)
|
||||
|
||||
print(f'Training on {dataset_path.name}')
|
||||
mimii = MIMII(dataset_path=dataset_path, machine_id=machine_id)
|
||||
mimii.to(DEVICE)
|
||||
#mimii.preprocess(n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0) # 80 x 80
|
||||
tfms = Subspectrogram(SUB_SPEC_HEIGT, SUB_SPEC_HOP)
|
||||
|
||||
dl = mimii.train_dataloader(
|
||||
segment_len=NUM_SEGMENTS,
|
||||
hop_len=NUM_SEGMENT_HOPS,
|
||||
batch_size=BATCH_SIZE,
|
||||
num_workers=NUM_WORKERS,
|
||||
shuffle=True,
|
||||
transform=tfms
|
||||
)
|
||||
|
||||
model = FullSubSpecCAE(weight_sharing=False).to(DEVICE)
|
||||
|
||||
# print(model(torch.randn(128, 1, 20, 80).to(DEVICE)).shape)
|
||||
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.0005)
|
||||
|
||||
|
||||
for epoch in range(NUM_EPOCHS):
|
||||
print(f'EPOCH #{epoch+1}')
|
||||
losses = []
|
||||
for batch in tqdm(dl):
|
||||
data, labels = batch
|
||||
data = data.to(DEVICE) # torch.Size([128, 4, 20, 80]) batch x subs_specs x height x width
|
||||
|
||||
loss = model.train_loss(data)
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
losses.append(loss.item())
|
||||
print(f'Loss: {np.mean(losses)}')
|
||||
|
||||
preds, _ = model.gather_predictions(dl)
|
||||
scaler = MinMaxScaler()
|
||||
scaler.fit(preds)
|
||||
forest = IsolationForest()
|
||||
forest.fit(preds)
|
||||
|
||||
datasets = mimii.test_datasets(NUM_SEGMENTS, NUM_SEGMENTS, transform=tfms)
|
||||
y_true, y_score = [], []
|
||||
for dataset in tqdm(datasets):
|
||||
loader = DataLoader(dataset, batch_size=len(dataset), shuffle=False, num_workers=2)
|
||||
preds, labels = model.gather_predictions(loader)
|
||||
preds = scaler.transform(preds)
|
||||
y_true.append(labels[0]) # always the same
|
||||
score = forest.decision_function(preds) # 7x7 -> 1 one score for the whole dataset
|
||||
y_score.append(score.mean())
|
||||
auc = roc_auc_score(y_true, y_score)
|
||||
print(f'AUC: {auc}, Dataset: {dataset_path.name}, Machine: {machine_id}, Norm: {norm}, Seed: {seed}')
|
||||
return auc
|
||||
|
||||
|
||||
loss_fn = 'mse'
|
||||
results = []
|
||||
for norm in ['batch']:
|
||||
for seed in SEEDS:
|
||||
for dataset_path in ALL_DATASET_PATHS:
|
||||
if '-6_dB' in dataset_path.name:
|
||||
for machine_id in [4]:
|
||||
auc = train(dataset_path, machine_id, norm, seed)
|
||||
results.append([dataset_path.name, machine_id, seed, -1, norm, auc])
|
||||
with open(f'full234_hard_{norm}_{loss_fn}.pkl', 'wb') as f:
|
||||
pickle.dump(results, f)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
0
plots/__init__.py
Normal file
0
plots/__init__.py
Normal file
60
plots/normal_vs_abnormal.py
Normal file
60
plots/normal_vs_abnormal.py
Normal file
@ -0,0 +1,60 @@
|
||||
import numpy as np
|
||||
import matplotlib as mpl
|
||||
import matplotlib.pyplot as plt
|
||||
import librosa
|
||||
import librosa.display
|
||||
# set LaTeX font
|
||||
# ==============
|
||||
nice_fonts = {
|
||||
# Use LaTeX to write all text
|
||||
"text.usetex": True,
|
||||
"font.family": "serif",
|
||||
# Use 10pt font in plots, to match 10pt font in document
|
||||
"axes.labelsize": 16, # default 10
|
||||
"font.size": 16, # default 10
|
||||
# Make the legend/label fonts a little smaller
|
||||
"legend.fontsize": 14, # default 8
|
||||
"xtick.labelsize": 14, # default 8
|
||||
"ytick.labelsize": 14, # default 8
|
||||
}
|
||||
mpl.rcParams.update(nice_fonts, )
|
||||
mpl.rcParams['axes.unicode_minus'] = False
|
||||
|
||||
# TODO LaTex
|
||||
fan_normal = '../data/mimii/-6_dB_fan/id_04/normal/00000042.wav'
|
||||
fan_anomaly = '../data/mimii/-6_dB_fan/id_04/abnormal/00000048.wav'
|
||||
|
||||
pump_normal = '../data/mimii/-6_dB_pump/id_04/normal/00000042.wav'
|
||||
pump_anomaly = '../data/mimii/-6_dB_pump/id_04/abnormal/00000042.wav'
|
||||
|
||||
slider_normal = '../data/mimii/-6_dB_slider/id_04/normal/00000042.wav'
|
||||
slider_anomaly = '../data/mimii/-6_dB_slider/id_04/abnormal/00000042.wav'
|
||||
|
||||
valve_normal = '../data/mimii/-6_dB_valve/id_04/normal/00000042.wav'
|
||||
valve_anomaly = '../data/mimii/-6_dB_valve/id_04/abnormal/00000042.wav'
|
||||
|
||||
fig = plt.figure(figsize=(17, 5))
|
||||
for i, (p, title) in enumerate(zip([fan_normal, pump_normal, slider_normal, valve_normal, fan_anomaly, pump_anomaly, slider_anomaly, valve_anomaly],
|
||||
['Fan', 'Pump', 'Slider', 'Valve', 'Fan', 'Pump', 'Slider', 'Valve'])):
|
||||
plt.subplot(2, 4, i+1)
|
||||
audio, sr = librosa.load(p, sr=None, mono=True)
|
||||
# n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0
|
||||
S = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0)
|
||||
S_dB = librosa.power_to_db(S, ref=np.max)
|
||||
librosa.display.specshow(S_dB, x_axis='s' if i == 4 else 'off', hop_length=256,
|
||||
y_axis='mel' if i==4 else 'off', sr=16000, cmap='viridis')
|
||||
if i < 4:
|
||||
plt.title(title)
|
||||
else:
|
||||
plt.title(title + ' malfunction')
|
||||
|
||||
cbar_ax = fig.add_axes([0.94, 0.18, 0.015, 0.7])
|
||||
cmap = mpl.cm.viridis
|
||||
norm = mpl.colors.Normalize(vmin=0, vmax=-80)
|
||||
cb1 = mpl.colorbar.ColorbarBase(cbar_ax, cmap=cmap,
|
||||
norm=norm,
|
||||
orientation='vertical',
|
||||
format='%+2.0f dB')
|
||||
plt.tight_layout()
|
||||
fig.subplots_adjust(right=0.93)
|
||||
plt.savefig('normal_vs_abnormal.png')
|
64
plots/playground.py
Normal file
64
plots/playground.py
Normal file
@ -0,0 +1,64 @@
|
||||
import librosa
|
||||
from matplotlib import pyplot as plt
|
||||
import numpy as np
|
||||
from models.utils import count_parameters
|
||||
from models.ae import SubSpecCAE, AE, GlobalCAE
|
||||
from cfg import ALL_DATASET_PATHS
|
||||
import librosa.display
|
||||
import librosa.feature
|
||||
import seaborn as sns
|
||||
|
||||
purple = (126/255, 87/255, 194/255)
|
||||
params_subspecae = count_parameters(SubSpecCAE())
|
||||
params_ae = count_parameters(AE())
|
||||
params_globalcae = count_parameters(GlobalCAE())
|
||||
print(f'#Parameters SubSpecCAE: {params_subspecae}')
|
||||
print(f'#Parameters AE: {params_ae}')
|
||||
print(f'#SubSpecAe/#AE: {(params_subspecae/params_ae):.2f}')
|
||||
print(f'#GlobalCAE: {params_globalcae}')
|
||||
print(f'#GlobalCAE/#SubSpecCAE: {params_globalcae/params_subspecae}')
|
||||
|
||||
|
||||
fig = plt.figure(figsize=(10, 5))
|
||||
slider_normal = '../data/mimii/-6_dB_slider/id_04/normal/00000042.wav'
|
||||
audio, sr = librosa.load(slider_normal)
|
||||
waveplot = librosa.display.waveplot(audio, sr=sr)
|
||||
plt.tight_layout()
|
||||
plt.savefig('wavplot.png')
|
||||
|
||||
ids = range(0)
|
||||
specs = []
|
||||
i = 0
|
||||
audios = list((ALL_DATASET_PATHS[1] / 'id_00' / 'normal').glob('*.wav'))
|
||||
print(str(audios[80]))
|
||||
audio, sr = librosa.load(str(audios[80]), sr=None)
|
||||
mel_spec = librosa.feature.melspectrogram(audio, sr=sr, n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0)
|
||||
mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
|
||||
librosa.display.specshow(mel_spec_db, hop_length=256, x_axis='s', y_axis='mel', cmap='viridis', sr=sr)
|
||||
plt.savefig('notadjusted.png')
|
||||
plt.clf()
|
||||
|
||||
centroids = []
|
||||
for p in audios:
|
||||
audio, sr = librosa.load(str(p), sr=None)
|
||||
spectral_centroids = librosa.feature.spectral_centroid(audio, sr=sr)[0]
|
||||
centroids += spectral_centroids.tolist()
|
||||
|
||||
sns.distplot(centroids, hist=True, kde=True,
|
||||
color=purple,
|
||||
hist_kws={'edgecolor':'black'},
|
||||
kde_kws={'linewidth': 2})
|
||||
plt.xlabel('Occurence counts')
|
||||
plt.ylabel('Density')
|
||||
plt.title('Spectral centroid distribution')
|
||||
plt.tight_layout()
|
||||
plt.savefig('hist.png')
|
||||
|
||||
def get_bands(centroids, n_mels=80):
|
||||
std = np.std(centroids)
|
||||
mu = np.mean(centroids)
|
||||
return int((mu-3*std)/n_mels), int((mu+3*std)/n_mels)
|
||||
|
||||
print(get_bands(centroids))
|
||||
|
||||
|
0
transfer_learning/__init__.py
Normal file
0
transfer_learning/__init__.py
Normal file
149
transfer_learning/extractors.py
Normal file
149
transfer_learning/extractors.py
Normal file
@ -0,0 +1,149 @@
|
||||
import torch
|
||||
import torchvision
|
||||
import torchvision.transforms as transforms
|
||||
from torch.utils.data import Dataset
|
||||
import torch.nn as nn
|
||||
from PIL import Image
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class FeatureExtractor:
|
||||
supported_extractors = ['resnet18', 'resnet34', 'resnet50',
|
||||
'alexnet_fc6', 'alexnet_fc7', 'vgg16',
|
||||
'densenet121', 'inception_v3', 'squeezenet']
|
||||
|
||||
def __init__(self, version='resnet18', device='cpu'):
|
||||
assert version.lower() in self.supported_extractors
|
||||
self.device = device
|
||||
self.version = version
|
||||
self.F = self.__choose_feature_extractor(version)
|
||||
for param in self.F.parameters():
|
||||
param.requires_grad = False
|
||||
self.F.eval()
|
||||
self.input_size = (299, 299) if version.lower() == 'inception' else (224, 224)
|
||||
self.transforms = transforms.Compose([
|
||||
transforms.Resize(self.input_size),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225])
|
||||
])
|
||||
|
||||
def to(self, device):
|
||||
self.device = device
|
||||
self.F = self.F.to(self.device)
|
||||
return self
|
||||
|
||||
def __choose_feature_extractor(self, version):
|
||||
if 'resnet' in version.lower():
|
||||
v = int(version[-2:])
|
||||
if v == 18:
|
||||
resnet = torchvision.models.resnet18(pretrained=True)
|
||||
elif v == 34:
|
||||
resnet = torchvision.models.resnet34(pretrained=True)
|
||||
elif v == 50:
|
||||
resnet = torchvision.models.resnet50(pretrained=True)
|
||||
return nn.Sequential(*list(resnet.children())[:-1])
|
||||
elif 'alexnet' in version.lower():
|
||||
v = int(version[-1])
|
||||
alexnet = torchvision.models.alexnet(pretrained=True)
|
||||
if v == 7:
|
||||
f = nn.Sequential(*list(alexnet.classifier.children())[:-2])
|
||||
elif v == 6:
|
||||
f = nn.Sequential(*list(alexnet.classifier.children())[:-5])
|
||||
alexnet.classifier = f
|
||||
return alexnet
|
||||
elif version.lower() == 'vgg16':
|
||||
vgg = torchvision.models.vgg16_bn(pretrained=True)
|
||||
classifier = list(
|
||||
vgg.classifier.children())[:4]
|
||||
vgg.classifier = nn.Sequential(*classifier)
|
||||
return vgg
|
||||
elif version.lower() == 'densenet121':
|
||||
densenet = torchvision.models.densenet121(pretrained=True)
|
||||
avg_pool = nn.AvgPool2d(kernel_size=7)
|
||||
densenet = nn.Sequential(*list(densenet.children())[:-1])
|
||||
densenet.add_module('avg_pool', avg_pool)
|
||||
return densenet
|
||||
elif version.lower() == 'inception_v3':
|
||||
inception = torchvision.models.inception_v3(pretrained=True)
|
||||
f = nn.Sequential(*list(inception.children())[:-1])
|
||||
f._modules.pop('13')
|
||||
f.add_module('global average', nn.AvgPool2d(26))
|
||||
return f
|
||||
elif version.lower() == 'squeezenet':
|
||||
squeezenet = torchvision.models.squeezenet1_1(pretrained=True)
|
||||
f = torch.nn.Sequential(
|
||||
squeezenet.features,
|
||||
torch.nn.AdaptiveAvgPool2d(output_size=(2, 2))
|
||||
)
|
||||
return f
|
||||
else:
|
||||
raise NotImplementedError('The feature extractor you requested is not yet supported')
|
||||
|
||||
|
||||
@property
|
||||
def feature_size(self):
|
||||
x = torch.randn(size=(1, 3, *self.input_size)).to(self.device)
|
||||
return self.F(x).squeeze().shape[0]
|
||||
|
||||
def __call__(self, batch):
|
||||
batch = self.transforms(batch)
|
||||
if len(batch.shape) <= 3:
|
||||
batch = batch.unsqueeze(0)
|
||||
return self.F(batch).view(batch.shape[0], -1).squeeze()
|
||||
|
||||
def from_image_folder(self, folder_path, extension='jpg'):
|
||||
sorted_files = sorted(list(folder_path.glob(f'*.{extension}')))
|
||||
split_names = [x.stem.split('_') for x in sorted_files]
|
||||
names = [x[0] for x in split_names]
|
||||
seq_ids = [x[1] for x in split_names]
|
||||
X = []
|
||||
for i, p_img in enumerate(tqdm(sorted_files)):
|
||||
x = Image.open(p_img)
|
||||
features = self(x)
|
||||
X.append([names[i], seq_ids[i]] + features.tolist())
|
||||
return pd.DataFrame(X, columns=['name', 'seq_id', *(f'feature_{i}' for i in range(self.feature_size))])
|
||||
|
||||
|
||||
class AudioTransferLearningImageDataset(Dataset):
|
||||
def __init__(self, root_or_files, extension='jpg', input_size=224):
|
||||
self.root_or_files = root_or_files
|
||||
if type(root_or_files) == list:
|
||||
self.files = root_or_files
|
||||
else:
|
||||
self.files = list(self.root.glob(f'*.{extension}'))
|
||||
self.transforms = transforms.Compose([
|
||||
transforms.Resize(input_size),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
||||
std=[0.229, 0.224, 0.225])
|
||||
])
|
||||
|
||||
def process_name(self, name):
|
||||
split_name = name.stem.split('_')
|
||||
return split_name[0], split_name[1] #name, seq_id
|
||||
|
||||
def __getitem__(self, item):
|
||||
p_img = self.files[item]
|
||||
x = Image.open(p_img)
|
||||
x = self.transforms(x)
|
||||
name, seq_id = self.process_name(p_img)
|
||||
return x, name, seq_id
|
||||
|
||||
def __len__(self):
|
||||
return len(self.files)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from pathlib import Path
|
||||
version='resnet18'
|
||||
extractor = FeatureExtractor(version=version)
|
||||
models = ['slider', 'pump', 'fan']
|
||||
model_ids = [0, 2, 4, 6]
|
||||
for model in models:
|
||||
for model_id in model_ids:
|
||||
df = extractor.from_image_folder(Path( f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal/melspec_images/'))
|
||||
df.to_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal/{version}_features.csv'), index=False)
|
||||
del df
|
||||
df = extractor.from_image_folder(Path( f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal/melspec_images/'))
|
||||
df.to_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal/{version}_features.csv'), index=False)
|
134
transfer_learning/main.py
Normal file
134
transfer_learning/main.py
Normal file
@ -0,0 +1,134 @@
|
||||
from cfg import ALL_DATASET_PATHS
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
|
||||
from sklearn.ensemble import IsolationForest
|
||||
from sklearn.svm import OneClassSVM
|
||||
from sklearn.neighbors import KernelDensity
|
||||
from sklearn.metrics import roc_auc_score
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.pipeline import Pipeline
|
||||
from transfer_learning.extractors import AudioTransferLearningImageDataset, FeatureExtractor
|
||||
from torch.utils.data import DataLoader
|
||||
import torch.nn as nn
|
||||
from torch.optim import Adam
|
||||
from tqdm import tqdm
|
||||
from transfer_learning.my_model import MyModel
|
||||
import random
|
||||
np.random.seed(1337)
|
||||
random.seed(1337)
|
||||
|
||||
# Switch
|
||||
OURS = False
|
||||
# Parameters
|
||||
leave_n_out = 150
|
||||
model = 'fan'
|
||||
model_id = 0
|
||||
|
||||
# get all wav files
|
||||
wavs_normal = Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal').glob('*.wav')
|
||||
wavs_abnormal = Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal').glob('*.wav')
|
||||
|
||||
# as list + shuffle
|
||||
normal_files = list(wavs_normal)
|
||||
abnormal_files = list(wavs_abnormal)
|
||||
random.shuffle(normal_files)
|
||||
random.shuffle(abnormal_files)
|
||||
|
||||
normal_train_files = normal_files[leave_n_out:]
|
||||
normal_test_files = normal_files[:leave_n_out]
|
||||
|
||||
abnormal_test_files = abnormal_files[:leave_n_out]
|
||||
|
||||
print(len(normal_train_files), len(normal_test_files), len(normal_files))
|
||||
print(len(abnormal_test_files))
|
||||
|
||||
if not OURS:
|
||||
normal_df = pd.read_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal/resnet18_features.csv'))
|
||||
abnormal_df = pd.read_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal/resnet18_features.csv'))
|
||||
|
||||
print(normal_df.shape, abnormal_df.shape)
|
||||
|
||||
normal_trainset = normal_df[normal_df.name.isin([int(x.stem.split('_')[0]) for x in normal_train_files])]
|
||||
normal_testset = normal_df[normal_df.name.isin([int(x.stem.split('_')[0]) for x in normal_test_files])]
|
||||
abnormal_testset = abnormal_df[abnormal_df.name.isin([int(x.stem.split('_')[0]) for x in abnormal_test_files])]
|
||||
|
||||
print(f'normal train: {normal_trainset.shape}')
|
||||
print(f'normal test: {normal_testset.shape}')
|
||||
print(f'abnormal test: {abnormal_testset.shape}')
|
||||
|
||||
only_features = lambda x: x[:, 2:]
|
||||
|
||||
print(f'#Normal files:{len(normal_files)}\t#Abnormal files: {len(abnormal_files)}')
|
||||
print(f'#Normal test snippets normal: {len(normal_testset)}, #Abnormal test snippets: {len(abnormal_testset)}')
|
||||
|
||||
#model = BayesianGaussianMixture(n_components=64, max_iter=150, covariance_type='diag')
|
||||
#model = GaussianMixture(n_components=64, covariance_type='diag')
|
||||
#model = OneClassSVM(nu=1e-2)
|
||||
#model = IsolationForest(n_estimators=128, contamination='auto')
|
||||
model = KernelDensity(kernel='gaussian', bandwidth=0.1)
|
||||
scaler = Pipeline(
|
||||
[('Scaler', StandardScaler())]
|
||||
)
|
||||
X = only_features(normal_trainset.values)
|
||||
|
||||
scaler.fit(X)
|
||||
model.fit(scaler.transform(X))
|
||||
|
||||
scores_normal_test = model.score_samples(
|
||||
scaler.transform(only_features(normal_testset.values))
|
||||
)
|
||||
|
||||
scores_abnormal_test = model.score_samples(
|
||||
scaler.transform(only_features(abnormal_testset.values))
|
||||
)
|
||||
|
||||
normal_with_scores = normal_testset[['name', 'seq_id']].copy()
|
||||
normal_with_scores['score'] = -scores_normal_test
|
||||
normal_with_scores['label'] = 0
|
||||
normal_grouped = normal_with_scores.groupby(by=['name']).mean()
|
||||
|
||||
abnormal_with_scores = abnormal_testset[['name', 'seq_id']].copy()
|
||||
abnormal_with_scores['score'] = -scores_abnormal_test
|
||||
abnormal_with_scores['label'] = 0
|
||||
abnormal_grouped = abnormal_with_scores.groupby(by=['name']).mean()
|
||||
|
||||
scores_normal_grouped = normal_grouped.score.values.tolist()
|
||||
scores_abnormal_grouped = abnormal_grouped.score.values.tolist()
|
||||
|
||||
labels_normal = [0] * len(scores_normal_grouped)
|
||||
labels_abnormal = [1] * len(scores_abnormal_grouped)
|
||||
labels = labels_normal + labels_abnormal
|
||||
|
||||
scores = scores_normal_grouped + scores_abnormal_grouped
|
||||
|
||||
auc = roc_auc_score(y_score=scores, y_true=labels)
|
||||
print(auc)
|
||||
|
||||
else:
|
||||
dataset = AudioTransferLearningImageDataset(root_or_files=normal_train_files)
|
||||
dataloader = DataLoader(dataset, batch_size=80, shuffle=True)
|
||||
F = FeatureExtractor(version='resnet18').to('cuda')
|
||||
feature_size = F.feature_size
|
||||
criterion = nn.MSELoss()
|
||||
my_model = MyModel(F).to('cuda')
|
||||
|
||||
for e in range(0):
|
||||
losses = []
|
||||
for batch in tqdm(dataloader):
|
||||
imgs, names, seq_ids = batch
|
||||
imgs = imgs.to('cuda')
|
||||
prediction, target = my_model(imgs)
|
||||
loss = criterion(prediction, target)
|
||||
my_model.optimizer.zero_grad()
|
||||
loss.backward()
|
||||
my_model.optimizer.step()
|
||||
losses.append(loss.item())
|
||||
print(sum(losses)/len(losses))
|
||||
|
||||
# test
|
||||
dataset = AudioTransferLearningImageDataset(root_or_files=normal_test_files)
|
||||
dataloader = DataLoader(dataset, batch_size=80, shuffle=False)
|
||||
my_model.scores_from_dataloader(dataloader)
|
87
transfer_learning/my_model.py
Normal file
87
transfer_learning/my_model.py
Normal file
@ -0,0 +1,87 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.optim import Adam
|
||||
|
||||
|
||||
class MyModel(nn.Module):
|
||||
def __init__(self, feature_extractor):
|
||||
super(MyModel, self).__init__()
|
||||
self.feature_extractor = feature_extractor
|
||||
feature_size = feature_extractor.feature_size
|
||||
self.noise = nn.Sequential(
|
||||
nn.Linear(feature_size, feature_size // 2),
|
||||
nn.ELU(),
|
||||
nn.Linear(feature_size // 2, feature_size // 4)
|
||||
)
|
||||
for p in self.noise.parameters():
|
||||
p.requires_grad = False
|
||||
self.noise.eval()
|
||||
|
||||
self.student = nn.Sequential(
|
||||
nn.Linear(feature_size, feature_size // 2),
|
||||
nn.ELU(),
|
||||
nn.Linear(feature_size // 2, feature_size // 4),
|
||||
nn.ELU(),
|
||||
nn.Linear(feature_size // 4, feature_size // 4)
|
||||
)
|
||||
self.optimizer = Adam(self.student.parameters(), lr=0.0001, weight_decay=1e-7, amsgrad=True)
|
||||
|
||||
def forward(self, imgs):
|
||||
features = self.feature_extractor.F(imgs).squeeze()
|
||||
target = self.noise(features)
|
||||
prediction = self.student(features)
|
||||
return target, prediction
|
||||
|
||||
|
||||
def scores_from_dataloader(self, dataloader):
|
||||
scores = []
|
||||
with torch.no_grad():
|
||||
for batch in dataloader:
|
||||
imgs, names, seq_ids = batch
|
||||
imgs = imgs.to('cuda')
|
||||
target, prediction = self.forward(imgs)
|
||||
preds = torch.sum((prediction - target) ** 2, dim=tuple(range(1, target.dim())))
|
||||
print(preds.shape)
|
||||
|
||||
|
||||
class HyperFraud(nn.Module):
|
||||
def __init__(self, hidden_dim=256):
|
||||
super(HyperFraud, self).__init__()
|
||||
self.hidden_dim = hidden_dim
|
||||
self.mean = torch.randn(size=(1, 512))
|
||||
self.std = torch.randn(size=(1, 512))
|
||||
|
||||
self.W_forget = nn.Sequential(
|
||||
nn.Linear(512, hidden_dim)
|
||||
)
|
||||
self.U_forget = nn.Sequential(
|
||||
nn.Linear(hidden_dim, hidden_dim)
|
||||
)
|
||||
self.W_hidden = nn.Sequential(
|
||||
nn.Linear(512, 256)
|
||||
)
|
||||
self.U_hidden = nn.Sequential(
|
||||
nn.Linear(hidden_dim, 256)
|
||||
)
|
||||
self.b_forget = nn.Parameter(torch.randn(1, hidden_dim))
|
||||
self.b_hidden = nn.Parameter(torch.randn(1, hidden_dim))
|
||||
|
||||
def forward(self, data, max_seq_len=10):
|
||||
# data. batch x seqs x dim
|
||||
# random seq sampling
|
||||
h_prev = [torch.zeros(size=(data.shape[0], self.hidden_dim))]
|
||||
for i in range(0, max_seq_len):
|
||||
x_t = data[:, i]
|
||||
h_t_prev = h_prev[-1]
|
||||
W_x_t = self.W_forget(x_t)
|
||||
U_h_prev = self.U_forget(h_t_prev)
|
||||
forget_t = torch.sigmoid(W_x_t + U_h_prev + self.b_forget)
|
||||
h_t = forget_t * h_t_prev + (1.0 - forget_t) * torch.tanh(self.W_hidden(x_t) + self.U_hidden(forget_t * h_t_prev) + self.b_hidden)
|
||||
h_prev.append(h_t)
|
||||
return torch.stack(h_prev[1:], dim=1)
|
||||
|
||||
|
||||
#hf = HyperFraud()
|
||||
#rand_input = torch.randn(size=(42, 10, 512))
|
||||
|
||||
#print(hf(rand_input).shape)
|
82
transfer_learning/preprocessor.py
Normal file
82
transfer_learning/preprocessor.py
Normal file
@ -0,0 +1,82 @@
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import librosa
|
||||
import librosa.display
|
||||
from matplotlib import pyplot as plt
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class Preprocessor:
|
||||
def __init__(self, sr=16000, n_mels=64, n_fft=1024, hop_length=256, chunk_size=64, chunk_hop=32, cmap='viridis'):
|
||||
self.sr = sr
|
||||
self.n_fft = n_fft
|
||||
self.n_mels = n_mels
|
||||
self.hop_length = hop_length
|
||||
self.chunk_size = chunk_size
|
||||
self.chunk_hop = chunk_hop
|
||||
self.cmap = cmap
|
||||
|
||||
def process_audio(self, path, out_folder=None):
|
||||
mel_spec = self.to_mel_spec(path)
|
||||
for count, i in enumerate(range(0, mel_spec.shape[1], self.chunk_hop)):
|
||||
try:
|
||||
chunk = mel_spec[:, i:i+self.chunk_size]
|
||||
out_path = out_folder / f'{path.stem}_{count}.jpg'
|
||||
self.mel_spec_to_img(chunk, out_path) # todo must adjust outpath name
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
|
||||
def to_mel_spec(self, path):
|
||||
audio, sr = librosa.load(str(path), sr=self.sr, mono=True)
|
||||
spectrogram = librosa.stft(audio,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.n_fft // 2,
|
||||
center=False)
|
||||
spectrogram = librosa.feature.melspectrogram(S=np.abs(spectrogram) ** 2,
|
||||
sr=sr,
|
||||
n_mels=self.n_mels,
|
||||
hop_length=self.hop_length)
|
||||
# prepare plot
|
||||
spectrogram = librosa.power_to_db(spectrogram, ref=np.max, top_db=None)
|
||||
return spectrogram
|
||||
|
||||
def mel_spec_to_img(self, spectrogram, out_path, size=227):
|
||||
# prepare plotting
|
||||
fig = plt.figure(frameon=False, tight_layout=False)
|
||||
fig.set_size_inches(1, 1)
|
||||
ax = plt.Axes(fig, [0., 0., 1., 1.])
|
||||
ax.set_axis_off()
|
||||
fig.add_axes(ax)
|
||||
|
||||
spectrogram_axes = librosa.display.specshow(spectrogram,
|
||||
hop_length=self.n_fft // 2,
|
||||
fmax=self.sr/2,
|
||||
sr=self.sr,
|
||||
cmap=self.cmap,
|
||||
y_axis='mel',
|
||||
x_axis='time')
|
||||
|
||||
fig.add_axes(spectrogram_axes, id='spectrogram')
|
||||
fig.savefig(out_path, format='jpg', dpi=size)
|
||||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
def process_folder(self, folder_in, folder_out):
|
||||
wavs = folder_in.glob('*.wav')
|
||||
folder_out.mkdir(parents=True, exist_ok=True)
|
||||
for wav in tqdm(list(wavs)):
|
||||
self.process_audio(wav, folder_out)
|
||||
|
||||
if __name__ == '__main__':
|
||||
models = ['slider', 'pump', 'fan']
|
||||
model_ids = [0, 2, 4, 6]
|
||||
preprocessor = Preprocessor()
|
||||
for model in models:
|
||||
for model_id in model_ids:
|
||||
preprocessor.process_folder(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal'),
|
||||
Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal/melspec_images/')
|
||||
)
|
||||
preprocessor.process_folder(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal'),
|
||||
Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal/melspec_images/')
|
||||
)
|
Loading…
x
Reference in New Issue
Block a user