big update

This commit is contained in:
Robert Müller 2020-04-06 14:46:26 +02:00
parent 0f325676e5
commit 482f45df87
17 changed files with 1027 additions and 32 deletions

4
cfg.py
View File

@ -2,11 +2,11 @@ from pathlib import Path
import torch
BATCH_SIZE = 96
NUM_EPOCHS = 100
NUM_EPOCHS = 75
NUM_WORKERS = 4
NUM_SEGMENTS = 80
NUM_SEGMENT_HOPS = 15
SEEDS = [42, 1337, 666, 2012, 1e42]
SEEDS = [42, 1337, 666]
ALL_DATASET_PATHS = list((Path(__file__).parent.absolute() / 'data' / 'mimii').glob('*/'))
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

18
clean.py Normal file
View File

@ -0,0 +1,18 @@
# scores on band level per file
# merge scores using GMM/IsoForest
# combine combine using mean
# compute roc auc
# 1. inputs: scorer function 80x80 -> 1x7
# 2. GMM: 1x7 -> 1x1
# 3. mean over all snippets
# single score
class Scorer:
def __init__(self):
pass
class Evaluator:
def __init__(self):
pass

18
main.py
View File

@ -9,7 +9,8 @@ if __name__ == '__main__':
import torch.optim as optim
from models.layers import Subspectrogram
def train(dataset_path, machine_id, band, norm, seed):
def train(dataset_path, machine_id, band, norm='batch', loss_fn='mse', seed=42):
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
@ -32,12 +33,12 @@ if __name__ == '__main__':
transform=tfms
)
model = SubSpecCAE(norm=norm, band=band).to(DEVICE)
model = SubSpecCAE(norm=norm, loss_fn=loss_fn, band=band).to(DEVICE)
model.init_weights()
# print(model(torch.randn(128, 1, 20, 80).to(DEVICE)).shape)
optimizer = optim.Adam(model.parameters(), lr=0.001)
optimizer = optim.Adam(model.parameters(), lr=0.0005)
for epoch in range(NUM_EPOCHS):
@ -60,16 +61,17 @@ if __name__ == '__main__':
print(f'AUC: {auc}, Machine: {machine_id}, Band: {band}, Norm: {norm}, Seed: {seed}')
return auc
loss_fn = 'mse'
results = []
for norm in ('instance', 'batch'):
for norm in ['batch']:
for seed in SEEDS:
for dataset_path in ALL_DATASET_PATHS:
for machine_id in [0, 2, 4, 6]:
if '-6_dB' in dataset_path.name:
for machine_id in [4]:
for band in range(7):
auc = train(dataset_path, machine_id, band, norm, seed)
auc = train(dataset_path, machine_id, band, norm, loss_fn, seed)
results.append([dataset_path.name, machine_id, seed, band, norm, auc])
with open(f'results_{norm}.pkl', 'wb') as f:
with open(f'results2_hard_{norm}_{loss_fn}.pkl', 'wb') as f:
pickle.dump(results, f)

79
main2.py Normal file
View File

@ -0,0 +1,79 @@
if __name__ == '__main__':
import numpy as np
import random
from tqdm import tqdm
from cfg import *
from mimii import MIMII
from models.ae import GlobalCAE
import pickle
import torch.optim as optim
from models.layers import Subspectrogram
def train(dataset_path, machine_id, norm='batch', loss_fn='mse', seed=42):
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
random.seed(seed)
print(f'Training on {dataset_path.name}')
mimii = MIMII(dataset_path=dataset_path, machine_id=machine_id)
mimii.to(DEVICE)
#mimii.preprocess(n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0) # 80 x 80
dl = mimii.train_dataloader(
segment_len=NUM_SEGMENTS,
hop_len=NUM_SEGMENT_HOPS,
batch_size=BATCH_SIZE,
num_workers=NUM_WORKERS,
shuffle=True
)
model = GlobalCAE(norm=norm).to(DEVICE)
model.init_weights()
# print(model(torch.randn(128, 1, 20, 80).to(DEVICE)).shape)
optimizer = optim.Adam(model.parameters(), lr=0.0005)
for epoch in range(NUM_EPOCHS):
print(f'EPOCH #{epoch+1}')
losses = []
for batch in tqdm(dl):
data, labels = batch
data = data.to(DEVICE) # torch.Size([128, 4, 20, 80]) batch x subs_specs x height x width
loss = model.train_loss(data)
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses.append(loss.item())
print(f'Loss: {np.mean(losses)}')
auc = mimii.evaluate_model(model, NUM_SEGMENTS, NUM_SEGMENTS)
print(f'AUC: {auc}, Machine: {machine_id}, Norm: {norm}, Seed: {seed}')
return auc
loss_fn = 'mse'
results = []
for norm in ['batch']:
for seed in SEEDS:
for dataset_path in ALL_DATASET_PATHS:
if '-6_dB' in dataset_path.name:
for machine_id in [4]:
auc = train(dataset_path, machine_id, norm, loss_fn, seed)
results.append([dataset_path.name, machine_id, seed, norm, auc])
with open(f'TODO_hard_{norm}_{loss_fn}.pkl', 'wb') as f:
pickle.dump(results, f)

81
main3.py Normal file
View File

@ -0,0 +1,81 @@
if __name__ == '__main__':
import numpy as np
import random
from tqdm import tqdm
from cfg import *
from mimii import MIMII
from models.ae import FullSubSpecCAE, SubSpecCAE
import pickle
import torch.optim as optim
from models.layers import Subspectrogram
def train(dataset_path, machine_id, norm='batch', seed=42):
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
random.seed(seed)
print(f'Training on {dataset_path.name}')
mimii = MIMII(dataset_path=dataset_path, machine_id=machine_id)
mimii.to(DEVICE)
#mimii.preprocess(n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0) # 80 x 80
tfms = Subspectrogram(SUB_SPEC_HEIGT, SUB_SPEC_HOP)
dl = mimii.train_dataloader(
segment_len=NUM_SEGMENTS,
hop_len=NUM_SEGMENT_HOPS,
batch_size=BATCH_SIZE,
num_workers=NUM_WORKERS,
shuffle=True,
transform=tfms
)
model = FullSubSpecCAE().to(DEVICE)
model.init_weights()
# print(model(torch.randn(128, 1, 20, 80).to(DEVICE)).shape)
optimizer = optim.Adam(model.parameters(), lr=0.0005)
for epoch in range(NUM_EPOCHS):
print(f'EPOCH #{epoch+1}')
losses = []
for batch in tqdm(dl):
data, labels = batch
data = data.to(DEVICE) # torch.Size([128, 4, 20, 80]) batch x subs_specs x height x width
loss = model.train_loss(data)
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses.append(loss.item())
print(f'Loss: {np.mean(losses)}')
auc = mimii.evaluate_model(model, NUM_SEGMENTS, NUM_SEGMENTS, transform=tfms)
print(f'AUC: {auc}, Machine: {machine_id}, Norm: {norm}, Seed: {seed}')
return auc
loss_fn = 'mse'
results = []
for norm in ['batch']:
for seed in SEEDS:
for dataset_path in ALL_DATASET_PATHS:
if '-6_dB' in dataset_path.name:
for machine_id in [4]:
auc = train(dataset_path, machine_id, norm, seed)
results.append([dataset_path.name, machine_id, seed, -1, norm, auc])
with open(f'full_hard_{norm}_{loss_fn}.pkl', 'wb') as f:
pickle.dump(results, f)

View File

@ -71,11 +71,7 @@ class MIMII(object):
)
return DataLoader(ConcatDataset(ds), **kwargs)
def test_dataloader(self, *args, **kwargs):
raise NotImplementedError('test_dataloader is not supported')
def evaluate_model(self, f, segment_len=20, hop_len=5, transform=None):
f.eval()
def test_datasets(self, segment_len=20, hop_len=5, transform=None):
datasets = []
for p, l in zip(self.test_paths, self.test_labels):
datasets.append(
@ -83,6 +79,11 @@ class MIMII(object):
segment_len=segment_len,
hop=hop_len, transform=transform)
)
return datasets
def evaluate_model(self, f, segment_len=20, hop_len=5, transform=None):
f.eval()
datasets = self.test_datasets(segment_len, hop_len, transform)
y_true, y_score = [], []
with torch.no_grad():
for dataset in tqdm(datasets):

View File

@ -1,3 +1,4 @@
import numpy as np
import torch
import torch.nn as nn
import torch.functional as F
@ -67,42 +68,176 @@ class AE(nn.Module):
class SubSpecCAE(nn.Module):
def __init__(self, F=20, T=80, norm='batch', activation='relu', dropout_prob=0.25, band=0):
def __init__(self, F=20, T=80, norm='batch',
activation='relu', dropout_prob=0.25):
super(SubSpecCAE, self).__init__()
self.T = T
self.F = F
self.activation = activation
self.band = band
self.loss_fn = nn.MSELoss()
Norm = nn.BatchNorm2d if norm == 'batch' else nn.InstanceNorm2d
Activation = nn.ReLU if activation == 'relu' else nn.LeakyReLU
a, b = 20, 40
self.encoder = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=32, kernel_size=7, stride=1, padding=3), # 32 x 20 x 80
Norm(32),
nn.Conv2d(in_channels=1, out_channels=a, kernel_size=7, stride=1, padding=3), # 32 x 20 x 80
Norm(a),
Activation(),
nn.MaxPool2d((F//10, 5)),
nn.Dropout(dropout_prob),
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=7, stride=1, padding=3), # 64 x 10 x 16
Norm(64),
nn.Conv2d(in_channels=a, out_channels=b, kernel_size=7, stride=1, padding=3), # 64 x 10 x 16
Norm(b),
Activation(),
nn.MaxPool2d(4, T),
nn.Dropout(dropout_prob),
Flatten(),
nn.Linear(64, 16)
nn.Linear(b, 16)
)
self.decoder = nn.Sequential(
nn.Linear(16, 64),
Reshape(64, 1, 1),
nn.Linear(16, b),
Reshape(b, 1, 1),
nn.Upsample(size=(10, 16), mode='bilinear', align_corners=False),
nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=7, stride=1, padding=3),
Norm(32),
nn.ConvTranspose2d(in_channels=b, out_channels=a, kernel_size=7, stride=1, padding=3),
Norm(a),
Activation(),
nn.Upsample(size=(20, 80), mode='bilinear', align_corners=False),
nn.Dropout(dropout_prob),
nn.ConvTranspose2d(in_channels=32, out_channels=1, kernel_size=7, stride=1, padding=3)
nn.ConvTranspose2d(in_channels=a, out_channels=1, kernel_size=7, stride=1, padding=3),
nn.Sigmoid()
)
def forward(self, sub_x):
#x = x[:, self.band, :,].unsqueeze(1) # select a single supspec
encoded = self.encoder(sub_x)
decoded = self.decoder(encoded)
return decoded, sub_x
def init_weights(self):
def weight_init(m):
if isinstance(m, nn.Conv2d) or isinstance(m, torch.nn.Linear):
torch.nn.init.kaiming_uniform_(m.weight)
if m.bias is not None:
m.bias.data.fill_(0.2)
self.apply(weight_init)
class FullSubSpecCAE(nn.Module):
def __init__(self, F=20, T=80,
norm='batch', activation='relu',
dropout_prob=0.25, weight_sharing=False,
sub_bands=[0, 1, 2, 3, 4, 5, 6]):
super(FullSubSpecCAE, self).__init__()
self.bands = sub_bands
self.weight_sharing = weight_sharing
self.aes = nn.ModuleList([
SubSpecCAE(F, T, norm, activation, dropout_prob) for band in range(
1 if weight_sharing else len(sub_bands)
)
])
for ae in self.aes: ae.init_weights()
self.loss_fn = nn.MSELoss()
def select_sub_ae(self, band):
if self.weight_sharing:
return self.aes[0]
else:
return self.aes[band]
def select_sub_band(self, x, band):
return x[:, band, :, ].unsqueeze(1)
def forward(self, x):
y_hat, y = [], []
for band in self.bands:
sub_ae = self.select_sub_ae(band)
sub_x = self.select_sub_band(x, band)
decoded, target = sub_ae(sub_x)
y.append(target)
y_hat.append(decoded)
decoded = torch.cat(y_hat, dim=1)
y = torch.cat(y, dim=1)
return decoded, y
def train_loss(self, data):
y_hat, y = self.forward(data) # torch.Size([96, 7, 20, 80])
loss = self.complete_mse(y_hat, y)
return loss
def test_loss(self, data):
y_hat, y = self.forward(data)
preds = torch.sum((y_hat - y) ** 2, dim=tuple(range(1, y_hat.dim())))
return preds
def sub_band_mse(self, y_hat, y):
losses = []
for band in self.bands:
sub_y = self.select_sub_band(y, band)
sub_y_hat = self.select_sub_band(y_hat, band)
sub_loss = torch.mean((sub_y_hat - sub_y) ** 2, dim=tuple(range(1, sub_y.dim()))) # torch.Size([96])
losses.append(sub_loss)
losses = torch.stack(losses, dim=1) # torch.Size([96, 7])
return losses
def complete_mse(self, y_hat, y):
return self.sub_band_mse(y_hat, y).mean(dim=0).sum()
def gather_predictions(self, dataloader):
device = next(self.parameters()).device
predictions = []
labels = []
self.eval()
with torch.no_grad():
for batch in dataloader:
data, l = batch
data = data.to(device)
y_hat, y = self.forward(data)
mse = self.sub_band_mse(y_hat, y) # 96 x 7
predictions.append(mse)
labels += l.tolist()
predictions = torch.cat(predictions).cpu().numpy()
self.train()
return predictions, labels
class GlobalCAE(nn.Module):
def __init__(self, F=20, T=80, norm='batch', activation='relu', dropout_prob=0.25):
super(GlobalCAE, self).__init__()
self.activation = activation
Norm = nn.BatchNorm2d if norm == 'batch' else nn.InstanceNorm2d
Activation = nn.ReLU if activation == 'relu' else nn.LeakyReLU
self.encoder = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=20, kernel_size=8, stride=2), # 32 x 20 x 80
Norm(20),
Activation(),
nn.Dropout(dropout_prob),
nn.Conv2d(in_channels=20, out_channels=40, kernel_size=5, stride=2), # 64 x 10 x 16
Norm(40),
Activation(),
nn.Dropout(dropout_prob),
nn.Conv2d(in_channels=40, out_channels=60, kernel_size=3, stride=2), # 64 x 10 x 16
Norm(60),
Flatten(),
nn.Linear(60*8*8, 64)
)
self.decoder = nn.Sequential(
nn.Linear(64, 60*8*8),
Reshape(60, 8, 8),
nn.ConvTranspose2d(in_channels=60, out_channels=40, kernel_size=3, stride=2),
Norm(40),
Activation(),
nn.Dropout(dropout_prob),
nn.ConvTranspose2d(in_channels=40, out_channels=20, kernel_size=5, stride=2),
Norm(20),
Activation(),
nn.Dropout(dropout_prob),
nn.ConvTranspose2d(in_channels=20, out_channels=1, kernel_size=8, stride=2),
nn.Sigmoid()
)
def forward(self, x):
x = x[:,self.band,:,].unsqueeze(1) # select a single supspec
x = x.unsqueeze(1)
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return decoded, x
@ -123,5 +258,5 @@ class SubSpecCAE(nn.Module):
if isinstance(m, nn.Conv2d) or isinstance(m, torch.nn.Linear):
torch.nn.init.kaiming_uniform_(m.weight)
if m.bias is not None:
m.bias.data.fill_(0.02)
m.bias.data.fill_(0.01)
self.apply(weight_init)

2
models/utils.py Normal file
View File

@ -0,0 +1,2 @@
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)

101
new_main.py Normal file
View File

@ -0,0 +1,101 @@
if __name__ == '__main__':
import numpy as np
import random
from tqdm import tqdm
from cfg import *
from mimii import MIMII
from models.ae import FullSubSpecCAE, SubSpecCAE
import pickle
import torch.optim as optim
from models.layers import Subspectrogram
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import roc_auc_score
from torch.utils.data import DataLoader
from sklearn.ensemble import IsolationForest
from sklearn.mixture import GaussianMixture
def train(dataset_path, machine_id, norm='batch', seed=42):
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
random.seed(seed)
print(f'Training on {dataset_path.name}')
mimii = MIMII(dataset_path=dataset_path, machine_id=machine_id)
mimii.to(DEVICE)
#mimii.preprocess(n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0) # 80 x 80
tfms = Subspectrogram(SUB_SPEC_HEIGT, SUB_SPEC_HOP)
dl = mimii.train_dataloader(
segment_len=NUM_SEGMENTS,
hop_len=NUM_SEGMENT_HOPS,
batch_size=BATCH_SIZE,
num_workers=NUM_WORKERS,
shuffle=True,
transform=tfms
)
model = FullSubSpecCAE(weight_sharing=False).to(DEVICE)
# print(model(torch.randn(128, 1, 20, 80).to(DEVICE)).shape)
optimizer = optim.Adam(model.parameters(), lr=0.0005)
for epoch in range(NUM_EPOCHS):
print(f'EPOCH #{epoch+1}')
losses = []
for batch in tqdm(dl):
data, labels = batch
data = data.to(DEVICE) # torch.Size([128, 4, 20, 80]) batch x subs_specs x height x width
loss = model.train_loss(data)
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses.append(loss.item())
print(f'Loss: {np.mean(losses)}')
preds, _ = model.gather_predictions(dl)
scaler = MinMaxScaler()
scaler.fit(preds)
forest = IsolationForest()
forest.fit(preds)
datasets = mimii.test_datasets(NUM_SEGMENTS, NUM_SEGMENTS, transform=tfms)
y_true, y_score = [], []
for dataset in tqdm(datasets):
loader = DataLoader(dataset, batch_size=len(dataset), shuffle=False, num_workers=2)
preds, labels = model.gather_predictions(loader)
preds = scaler.transform(preds)
y_true.append(labels[0]) # always the same
score = forest.decision_function(preds) # 7x7 -> 1 one score for the whole dataset
y_score.append(score.mean())
auc = roc_auc_score(y_true, y_score)
print(f'AUC: {auc}, Dataset: {dataset_path.name}, Machine: {machine_id}, Norm: {norm}, Seed: {seed}')
return auc
loss_fn = 'mse'
results = []
for norm in ['batch']:
for seed in SEEDS:
for dataset_path in ALL_DATASET_PATHS:
if '-6_dB' in dataset_path.name:
for machine_id in [4]:
auc = train(dataset_path, machine_id, norm, seed)
results.append([dataset_path.name, machine_id, seed, -1, norm, auc])
with open(f'full234_hard_{norm}_{loss_fn}.pkl', 'wb') as f:
pickle.dump(results, f)

0
plots/__init__.py Normal file
View File

View File

@ -0,0 +1,60 @@
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import librosa
import librosa.display
# set LaTeX font
# ==============
nice_fonts = {
# Use LaTeX to write all text
"text.usetex": True,
"font.family": "serif",
# Use 10pt font in plots, to match 10pt font in document
"axes.labelsize": 16, # default 10
"font.size": 16, # default 10
# Make the legend/label fonts a little smaller
"legend.fontsize": 14, # default 8
"xtick.labelsize": 14, # default 8
"ytick.labelsize": 14, # default 8
}
mpl.rcParams.update(nice_fonts, )
mpl.rcParams['axes.unicode_minus'] = False
# TODO LaTex
fan_normal = '../data/mimii/-6_dB_fan/id_04/normal/00000042.wav'
fan_anomaly = '../data/mimii/-6_dB_fan/id_04/abnormal/00000048.wav'
pump_normal = '../data/mimii/-6_dB_pump/id_04/normal/00000042.wav'
pump_anomaly = '../data/mimii/-6_dB_pump/id_04/abnormal/00000042.wav'
slider_normal = '../data/mimii/-6_dB_slider/id_04/normal/00000042.wav'
slider_anomaly = '../data/mimii/-6_dB_slider/id_04/abnormal/00000042.wav'
valve_normal = '../data/mimii/-6_dB_valve/id_04/normal/00000042.wav'
valve_anomaly = '../data/mimii/-6_dB_valve/id_04/abnormal/00000042.wav'
fig = plt.figure(figsize=(17, 5))
for i, (p, title) in enumerate(zip([fan_normal, pump_normal, slider_normal, valve_normal, fan_anomaly, pump_anomaly, slider_anomaly, valve_anomaly],
['Fan', 'Pump', 'Slider', 'Valve', 'Fan', 'Pump', 'Slider', 'Valve'])):
plt.subplot(2, 4, i+1)
audio, sr = librosa.load(p, sr=None, mono=True)
# n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0
S = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0)
S_dB = librosa.power_to_db(S, ref=np.max)
librosa.display.specshow(S_dB, x_axis='s' if i == 4 else 'off', hop_length=256,
y_axis='mel' if i==4 else 'off', sr=16000, cmap='viridis')
if i < 4:
plt.title(title)
else:
plt.title(title + ' malfunction')
cbar_ax = fig.add_axes([0.94, 0.18, 0.015, 0.7])
cmap = mpl.cm.viridis
norm = mpl.colors.Normalize(vmin=0, vmax=-80)
cb1 = mpl.colorbar.ColorbarBase(cbar_ax, cmap=cmap,
norm=norm,
orientation='vertical',
format='%+2.0f dB')
plt.tight_layout()
fig.subplots_adjust(right=0.93)
plt.savefig('normal_vs_abnormal.png')

64
plots/playground.py Normal file
View File

@ -0,0 +1,64 @@
import librosa
from matplotlib import pyplot as plt
import numpy as np
from models.utils import count_parameters
from models.ae import SubSpecCAE, AE, GlobalCAE
from cfg import ALL_DATASET_PATHS
import librosa.display
import librosa.feature
import seaborn as sns
purple = (126/255, 87/255, 194/255)
params_subspecae = count_parameters(SubSpecCAE())
params_ae = count_parameters(AE())
params_globalcae = count_parameters(GlobalCAE())
print(f'#Parameters SubSpecCAE: {params_subspecae}')
print(f'#Parameters AE: {params_ae}')
print(f'#SubSpecAe/#AE: {(params_subspecae/params_ae):.2f}')
print(f'#GlobalCAE: {params_globalcae}')
print(f'#GlobalCAE/#SubSpecCAE: {params_globalcae/params_subspecae}')
fig = plt.figure(figsize=(10, 5))
slider_normal = '../data/mimii/-6_dB_slider/id_04/normal/00000042.wav'
audio, sr = librosa.load(slider_normal)
waveplot = librosa.display.waveplot(audio, sr=sr)
plt.tight_layout()
plt.savefig('wavplot.png')
ids = range(0)
specs = []
i = 0
audios = list((ALL_DATASET_PATHS[1] / 'id_00' / 'normal').glob('*.wav'))
print(str(audios[80]))
audio, sr = librosa.load(str(audios[80]), sr=None)
mel_spec = librosa.feature.melspectrogram(audio, sr=sr, n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0)
mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
librosa.display.specshow(mel_spec_db, hop_length=256, x_axis='s', y_axis='mel', cmap='viridis', sr=sr)
plt.savefig('notadjusted.png')
plt.clf()
centroids = []
for p in audios:
audio, sr = librosa.load(str(p), sr=None)
spectral_centroids = librosa.feature.spectral_centroid(audio, sr=sr)[0]
centroids += spectral_centroids.tolist()
sns.distplot(centroids, hist=True, kde=True,
color=purple,
hist_kws={'edgecolor':'black'},
kde_kws={'linewidth': 2})
plt.xlabel('Occurence counts')
plt.ylabel('Density')
plt.title('Spectral centroid distribution')
plt.tight_layout()
plt.savefig('hist.png')
def get_bands(centroids, n_mels=80):
std = np.std(centroids)
mu = np.mean(centroids)
return int((mu-3*std)/n_mels), int((mu+3*std)/n_mels)
print(get_bands(centroids))

View File

View File

@ -0,0 +1,149 @@
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
import torch.nn as nn
from PIL import Image
from tqdm import tqdm
import pandas as pd
class FeatureExtractor:
supported_extractors = ['resnet18', 'resnet34', 'resnet50',
'alexnet_fc6', 'alexnet_fc7', 'vgg16',
'densenet121', 'inception_v3', 'squeezenet']
def __init__(self, version='resnet18', device='cpu'):
assert version.lower() in self.supported_extractors
self.device = device
self.version = version
self.F = self.__choose_feature_extractor(version)
for param in self.F.parameters():
param.requires_grad = False
self.F.eval()
self.input_size = (299, 299) if version.lower() == 'inception' else (224, 224)
self.transforms = transforms.Compose([
transforms.Resize(self.input_size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
def to(self, device):
self.device = device
self.F = self.F.to(self.device)
return self
def __choose_feature_extractor(self, version):
if 'resnet' in version.lower():
v = int(version[-2:])
if v == 18:
resnet = torchvision.models.resnet18(pretrained=True)
elif v == 34:
resnet = torchvision.models.resnet34(pretrained=True)
elif v == 50:
resnet = torchvision.models.resnet50(pretrained=True)
return nn.Sequential(*list(resnet.children())[:-1])
elif 'alexnet' in version.lower():
v = int(version[-1])
alexnet = torchvision.models.alexnet(pretrained=True)
if v == 7:
f = nn.Sequential(*list(alexnet.classifier.children())[:-2])
elif v == 6:
f = nn.Sequential(*list(alexnet.classifier.children())[:-5])
alexnet.classifier = f
return alexnet
elif version.lower() == 'vgg16':
vgg = torchvision.models.vgg16_bn(pretrained=True)
classifier = list(
vgg.classifier.children())[:4]
vgg.classifier = nn.Sequential(*classifier)
return vgg
elif version.lower() == 'densenet121':
densenet = torchvision.models.densenet121(pretrained=True)
avg_pool = nn.AvgPool2d(kernel_size=7)
densenet = nn.Sequential(*list(densenet.children())[:-1])
densenet.add_module('avg_pool', avg_pool)
return densenet
elif version.lower() == 'inception_v3':
inception = torchvision.models.inception_v3(pretrained=True)
f = nn.Sequential(*list(inception.children())[:-1])
f._modules.pop('13')
f.add_module('global average', nn.AvgPool2d(26))
return f
elif version.lower() == 'squeezenet':
squeezenet = torchvision.models.squeezenet1_1(pretrained=True)
f = torch.nn.Sequential(
squeezenet.features,
torch.nn.AdaptiveAvgPool2d(output_size=(2, 2))
)
return f
else:
raise NotImplementedError('The feature extractor you requested is not yet supported')
@property
def feature_size(self):
x = torch.randn(size=(1, 3, *self.input_size)).to(self.device)
return self.F(x).squeeze().shape[0]
def __call__(self, batch):
batch = self.transforms(batch)
if len(batch.shape) <= 3:
batch = batch.unsqueeze(0)
return self.F(batch).view(batch.shape[0], -1).squeeze()
def from_image_folder(self, folder_path, extension='jpg'):
sorted_files = sorted(list(folder_path.glob(f'*.{extension}')))
split_names = [x.stem.split('_') for x in sorted_files]
names = [x[0] for x in split_names]
seq_ids = [x[1] for x in split_names]
X = []
for i, p_img in enumerate(tqdm(sorted_files)):
x = Image.open(p_img)
features = self(x)
X.append([names[i], seq_ids[i]] + features.tolist())
return pd.DataFrame(X, columns=['name', 'seq_id', *(f'feature_{i}' for i in range(self.feature_size))])
class AudioTransferLearningImageDataset(Dataset):
def __init__(self, root_or_files, extension='jpg', input_size=224):
self.root_or_files = root_or_files
if type(root_or_files) == list:
self.files = root_or_files
else:
self.files = list(self.root.glob(f'*.{extension}'))
self.transforms = transforms.Compose([
transforms.Resize(input_size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
def process_name(self, name):
split_name = name.stem.split('_')
return split_name[0], split_name[1] #name, seq_id
def __getitem__(self, item):
p_img = self.files[item]
x = Image.open(p_img)
x = self.transforms(x)
name, seq_id = self.process_name(p_img)
return x, name, seq_id
def __len__(self):
return len(self.files)
if __name__ == '__main__':
from pathlib import Path
version='resnet18'
extractor = FeatureExtractor(version=version)
models = ['slider', 'pump', 'fan']
model_ids = [0, 2, 4, 6]
for model in models:
for model_id in model_ids:
df = extractor.from_image_folder(Path( f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal/melspec_images/'))
df.to_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal/{version}_features.csv'), index=False)
del df
df = extractor.from_image_folder(Path( f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal/melspec_images/'))
df.to_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal/{version}_features.csv'), index=False)

134
transfer_learning/main.py Normal file
View File

@ -0,0 +1,134 @@
from cfg import ALL_DATASET_PATHS
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.neighbors import KernelDensity
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from transfer_learning.extractors import AudioTransferLearningImageDataset, FeatureExtractor
from torch.utils.data import DataLoader
import torch.nn as nn
from torch.optim import Adam
from tqdm import tqdm
from transfer_learning.my_model import MyModel
import random
np.random.seed(1337)
random.seed(1337)
# Switch
OURS = False
# Parameters
leave_n_out = 150
model = 'fan'
model_id = 0
# get all wav files
wavs_normal = Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal').glob('*.wav')
wavs_abnormal = Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal').glob('*.wav')
# as list + shuffle
normal_files = list(wavs_normal)
abnormal_files = list(wavs_abnormal)
random.shuffle(normal_files)
random.shuffle(abnormal_files)
normal_train_files = normal_files[leave_n_out:]
normal_test_files = normal_files[:leave_n_out]
abnormal_test_files = abnormal_files[:leave_n_out]
print(len(normal_train_files), len(normal_test_files), len(normal_files))
print(len(abnormal_test_files))
if not OURS:
normal_df = pd.read_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal/resnet18_features.csv'))
abnormal_df = pd.read_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal/resnet18_features.csv'))
print(normal_df.shape, abnormal_df.shape)
normal_trainset = normal_df[normal_df.name.isin([int(x.stem.split('_')[0]) for x in normal_train_files])]
normal_testset = normal_df[normal_df.name.isin([int(x.stem.split('_')[0]) for x in normal_test_files])]
abnormal_testset = abnormal_df[abnormal_df.name.isin([int(x.stem.split('_')[0]) for x in abnormal_test_files])]
print(f'normal train: {normal_trainset.shape}')
print(f'normal test: {normal_testset.shape}')
print(f'abnormal test: {abnormal_testset.shape}')
only_features = lambda x: x[:, 2:]
print(f'#Normal files:{len(normal_files)}\t#Abnormal files: {len(abnormal_files)}')
print(f'#Normal test snippets normal: {len(normal_testset)}, #Abnormal test snippets: {len(abnormal_testset)}')
#model = BayesianGaussianMixture(n_components=64, max_iter=150, covariance_type='diag')
#model = GaussianMixture(n_components=64, covariance_type='diag')
#model = OneClassSVM(nu=1e-2)
#model = IsolationForest(n_estimators=128, contamination='auto')
model = KernelDensity(kernel='gaussian', bandwidth=0.1)
scaler = Pipeline(
[('Scaler', StandardScaler())]
)
X = only_features(normal_trainset.values)
scaler.fit(X)
model.fit(scaler.transform(X))
scores_normal_test = model.score_samples(
scaler.transform(only_features(normal_testset.values))
)
scores_abnormal_test = model.score_samples(
scaler.transform(only_features(abnormal_testset.values))
)
normal_with_scores = normal_testset[['name', 'seq_id']].copy()
normal_with_scores['score'] = -scores_normal_test
normal_with_scores['label'] = 0
normal_grouped = normal_with_scores.groupby(by=['name']).mean()
abnormal_with_scores = abnormal_testset[['name', 'seq_id']].copy()
abnormal_with_scores['score'] = -scores_abnormal_test
abnormal_with_scores['label'] = 0
abnormal_grouped = abnormal_with_scores.groupby(by=['name']).mean()
scores_normal_grouped = normal_grouped.score.values.tolist()
scores_abnormal_grouped = abnormal_grouped.score.values.tolist()
labels_normal = [0] * len(scores_normal_grouped)
labels_abnormal = [1] * len(scores_abnormal_grouped)
labels = labels_normal + labels_abnormal
scores = scores_normal_grouped + scores_abnormal_grouped
auc = roc_auc_score(y_score=scores, y_true=labels)
print(auc)
else:
dataset = AudioTransferLearningImageDataset(root_or_files=normal_train_files)
dataloader = DataLoader(dataset, batch_size=80, shuffle=True)
F = FeatureExtractor(version='resnet18').to('cuda')
feature_size = F.feature_size
criterion = nn.MSELoss()
my_model = MyModel(F).to('cuda')
for e in range(0):
losses = []
for batch in tqdm(dataloader):
imgs, names, seq_ids = batch
imgs = imgs.to('cuda')
prediction, target = my_model(imgs)
loss = criterion(prediction, target)
my_model.optimizer.zero_grad()
loss.backward()
my_model.optimizer.step()
losses.append(loss.item())
print(sum(losses)/len(losses))
# test
dataset = AudioTransferLearningImageDataset(root_or_files=normal_test_files)
dataloader = DataLoader(dataset, batch_size=80, shuffle=False)
my_model.scores_from_dataloader(dataloader)

View File

@ -0,0 +1,87 @@
import torch
import torch.nn as nn
from torch.optim import Adam
class MyModel(nn.Module):
def __init__(self, feature_extractor):
super(MyModel, self).__init__()
self.feature_extractor = feature_extractor
feature_size = feature_extractor.feature_size
self.noise = nn.Sequential(
nn.Linear(feature_size, feature_size // 2),
nn.ELU(),
nn.Linear(feature_size // 2, feature_size // 4)
)
for p in self.noise.parameters():
p.requires_grad = False
self.noise.eval()
self.student = nn.Sequential(
nn.Linear(feature_size, feature_size // 2),
nn.ELU(),
nn.Linear(feature_size // 2, feature_size // 4),
nn.ELU(),
nn.Linear(feature_size // 4, feature_size // 4)
)
self.optimizer = Adam(self.student.parameters(), lr=0.0001, weight_decay=1e-7, amsgrad=True)
def forward(self, imgs):
features = self.feature_extractor.F(imgs).squeeze()
target = self.noise(features)
prediction = self.student(features)
return target, prediction
def scores_from_dataloader(self, dataloader):
scores = []
with torch.no_grad():
for batch in dataloader:
imgs, names, seq_ids = batch
imgs = imgs.to('cuda')
target, prediction = self.forward(imgs)
preds = torch.sum((prediction - target) ** 2, dim=tuple(range(1, target.dim())))
print(preds.shape)
class HyperFraud(nn.Module):
def __init__(self, hidden_dim=256):
super(HyperFraud, self).__init__()
self.hidden_dim = hidden_dim
self.mean = torch.randn(size=(1, 512))
self.std = torch.randn(size=(1, 512))
self.W_forget = nn.Sequential(
nn.Linear(512, hidden_dim)
)
self.U_forget = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim)
)
self.W_hidden = nn.Sequential(
nn.Linear(512, 256)
)
self.U_hidden = nn.Sequential(
nn.Linear(hidden_dim, 256)
)
self.b_forget = nn.Parameter(torch.randn(1, hidden_dim))
self.b_hidden = nn.Parameter(torch.randn(1, hidden_dim))
def forward(self, data, max_seq_len=10):
# data. batch x seqs x dim
# random seq sampling
h_prev = [torch.zeros(size=(data.shape[0], self.hidden_dim))]
for i in range(0, max_seq_len):
x_t = data[:, i]
h_t_prev = h_prev[-1]
W_x_t = self.W_forget(x_t)
U_h_prev = self.U_forget(h_t_prev)
forget_t = torch.sigmoid(W_x_t + U_h_prev + self.b_forget)
h_t = forget_t * h_t_prev + (1.0 - forget_t) * torch.tanh(self.W_hidden(x_t) + self.U_hidden(forget_t * h_t_prev) + self.b_hidden)
h_prev.append(h_t)
return torch.stack(h_prev[1:], dim=1)
#hf = HyperFraud()
#rand_input = torch.randn(size=(42, 10, 512))
#print(hf(rand_input).shape)

View File

@ -0,0 +1,82 @@
import numpy as np
from tqdm import tqdm
import librosa
import librosa.display
from matplotlib import pyplot as plt
from pathlib import Path
class Preprocessor:
def __init__(self, sr=16000, n_mels=64, n_fft=1024, hop_length=256, chunk_size=64, chunk_hop=32, cmap='viridis'):
self.sr = sr
self.n_fft = n_fft
self.n_mels = n_mels
self.hop_length = hop_length
self.chunk_size = chunk_size
self.chunk_hop = chunk_hop
self.cmap = cmap
def process_audio(self, path, out_folder=None):
mel_spec = self.to_mel_spec(path)
for count, i in enumerate(range(0, mel_spec.shape[1], self.chunk_hop)):
try:
chunk = mel_spec[:, i:i+self.chunk_size]
out_path = out_folder / f'{path.stem}_{count}.jpg'
self.mel_spec_to_img(chunk, out_path) # todo must adjust outpath name
except IndexError:
pass
def to_mel_spec(self, path):
audio, sr = librosa.load(str(path), sr=self.sr, mono=True)
spectrogram = librosa.stft(audio,
n_fft=self.n_fft,
hop_length=self.n_fft // 2,
center=False)
spectrogram = librosa.feature.melspectrogram(S=np.abs(spectrogram) ** 2,
sr=sr,
n_mels=self.n_mels,
hop_length=self.hop_length)
# prepare plot
spectrogram = librosa.power_to_db(spectrogram, ref=np.max, top_db=None)
return spectrogram
def mel_spec_to_img(self, spectrogram, out_path, size=227):
# prepare plotting
fig = plt.figure(frameon=False, tight_layout=False)
fig.set_size_inches(1, 1)
ax = plt.Axes(fig, [0., 0., 1., 1.])
ax.set_axis_off()
fig.add_axes(ax)
spectrogram_axes = librosa.display.specshow(spectrogram,
hop_length=self.n_fft // 2,
fmax=self.sr/2,
sr=self.sr,
cmap=self.cmap,
y_axis='mel',
x_axis='time')
fig.add_axes(spectrogram_axes, id='spectrogram')
fig.savefig(out_path, format='jpg', dpi=size)
plt.clf()
plt.close()
def process_folder(self, folder_in, folder_out):
wavs = folder_in.glob('*.wav')
folder_out.mkdir(parents=True, exist_ok=True)
for wav in tqdm(list(wavs)):
self.process_audio(wav, folder_out)
if __name__ == '__main__':
models = ['slider', 'pump', 'fan']
model_ids = [0, 2, 4, 6]
preprocessor = Preprocessor()
for model in models:
for model_id in model_ids:
preprocessor.process_folder(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal'),
Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal/melspec_images/')
)
preprocessor.process_folder(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal'),
Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal/melspec_images/')
)