big update

This commit is contained in:
Robert Müller
2020-04-06 14:46:26 +02:00
parent 0f325676e5
commit 482f45df87
17 changed files with 1027 additions and 32 deletions

View File

View File

@@ -0,0 +1,149 @@
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
import torch.nn as nn
from PIL import Image
from tqdm import tqdm
import pandas as pd
class FeatureExtractor:
supported_extractors = ['resnet18', 'resnet34', 'resnet50',
'alexnet_fc6', 'alexnet_fc7', 'vgg16',
'densenet121', 'inception_v3', 'squeezenet']
def __init__(self, version='resnet18', device='cpu'):
assert version.lower() in self.supported_extractors
self.device = device
self.version = version
self.F = self.__choose_feature_extractor(version)
for param in self.F.parameters():
param.requires_grad = False
self.F.eval()
self.input_size = (299, 299) if version.lower() == 'inception' else (224, 224)
self.transforms = transforms.Compose([
transforms.Resize(self.input_size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
def to(self, device):
self.device = device
self.F = self.F.to(self.device)
return self
def __choose_feature_extractor(self, version):
if 'resnet' in version.lower():
v = int(version[-2:])
if v == 18:
resnet = torchvision.models.resnet18(pretrained=True)
elif v == 34:
resnet = torchvision.models.resnet34(pretrained=True)
elif v == 50:
resnet = torchvision.models.resnet50(pretrained=True)
return nn.Sequential(*list(resnet.children())[:-1])
elif 'alexnet' in version.lower():
v = int(version[-1])
alexnet = torchvision.models.alexnet(pretrained=True)
if v == 7:
f = nn.Sequential(*list(alexnet.classifier.children())[:-2])
elif v == 6:
f = nn.Sequential(*list(alexnet.classifier.children())[:-5])
alexnet.classifier = f
return alexnet
elif version.lower() == 'vgg16':
vgg = torchvision.models.vgg16_bn(pretrained=True)
classifier = list(
vgg.classifier.children())[:4]
vgg.classifier = nn.Sequential(*classifier)
return vgg
elif version.lower() == 'densenet121':
densenet = torchvision.models.densenet121(pretrained=True)
avg_pool = nn.AvgPool2d(kernel_size=7)
densenet = nn.Sequential(*list(densenet.children())[:-1])
densenet.add_module('avg_pool', avg_pool)
return densenet
elif version.lower() == 'inception_v3':
inception = torchvision.models.inception_v3(pretrained=True)
f = nn.Sequential(*list(inception.children())[:-1])
f._modules.pop('13')
f.add_module('global average', nn.AvgPool2d(26))
return f
elif version.lower() == 'squeezenet':
squeezenet = torchvision.models.squeezenet1_1(pretrained=True)
f = torch.nn.Sequential(
squeezenet.features,
torch.nn.AdaptiveAvgPool2d(output_size=(2, 2))
)
return f
else:
raise NotImplementedError('The feature extractor you requested is not yet supported')
@property
def feature_size(self):
x = torch.randn(size=(1, 3, *self.input_size)).to(self.device)
return self.F(x).squeeze().shape[0]
def __call__(self, batch):
batch = self.transforms(batch)
if len(batch.shape) <= 3:
batch = batch.unsqueeze(0)
return self.F(batch).view(batch.shape[0], -1).squeeze()
def from_image_folder(self, folder_path, extension='jpg'):
sorted_files = sorted(list(folder_path.glob(f'*.{extension}')))
split_names = [x.stem.split('_') for x in sorted_files]
names = [x[0] for x in split_names]
seq_ids = [x[1] for x in split_names]
X = []
for i, p_img in enumerate(tqdm(sorted_files)):
x = Image.open(p_img)
features = self(x)
X.append([names[i], seq_ids[i]] + features.tolist())
return pd.DataFrame(X, columns=['name', 'seq_id', *(f'feature_{i}' for i in range(self.feature_size))])
class AudioTransferLearningImageDataset(Dataset):
def __init__(self, root_or_files, extension='jpg', input_size=224):
self.root_or_files = root_or_files
if type(root_or_files) == list:
self.files = root_or_files
else:
self.files = list(self.root.glob(f'*.{extension}'))
self.transforms = transforms.Compose([
transforms.Resize(input_size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
def process_name(self, name):
split_name = name.stem.split('_')
return split_name[0], split_name[1] #name, seq_id
def __getitem__(self, item):
p_img = self.files[item]
x = Image.open(p_img)
x = self.transforms(x)
name, seq_id = self.process_name(p_img)
return x, name, seq_id
def __len__(self):
return len(self.files)
if __name__ == '__main__':
from pathlib import Path
version='resnet18'
extractor = FeatureExtractor(version=version)
models = ['slider', 'pump', 'fan']
model_ids = [0, 2, 4, 6]
for model in models:
for model_id in model_ids:
df = extractor.from_image_folder(Path( f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal/melspec_images/'))
df.to_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal/{version}_features.csv'), index=False)
del df
df = extractor.from_image_folder(Path( f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal/melspec_images/'))
df.to_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal/{version}_features.csv'), index=False)

134
transfer_learning/main.py Normal file
View File

@@ -0,0 +1,134 @@
from cfg import ALL_DATASET_PATHS
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.mixture import GaussianMixture, BayesianGaussianMixture
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.neighbors import KernelDensity
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from transfer_learning.extractors import AudioTransferLearningImageDataset, FeatureExtractor
from torch.utils.data import DataLoader
import torch.nn as nn
from torch.optim import Adam
from tqdm import tqdm
from transfer_learning.my_model import MyModel
import random
np.random.seed(1337)
random.seed(1337)
# Switch
OURS = False
# Parameters
leave_n_out = 150
model = 'fan'
model_id = 0
# get all wav files
wavs_normal = Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal').glob('*.wav')
wavs_abnormal = Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal').glob('*.wav')
# as list + shuffle
normal_files = list(wavs_normal)
abnormal_files = list(wavs_abnormal)
random.shuffle(normal_files)
random.shuffle(abnormal_files)
normal_train_files = normal_files[leave_n_out:]
normal_test_files = normal_files[:leave_n_out]
abnormal_test_files = abnormal_files[:leave_n_out]
print(len(normal_train_files), len(normal_test_files), len(normal_files))
print(len(abnormal_test_files))
if not OURS:
normal_df = pd.read_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal/resnet18_features.csv'))
abnormal_df = pd.read_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal/resnet18_features.csv'))
print(normal_df.shape, abnormal_df.shape)
normal_trainset = normal_df[normal_df.name.isin([int(x.stem.split('_')[0]) for x in normal_train_files])]
normal_testset = normal_df[normal_df.name.isin([int(x.stem.split('_')[0]) for x in normal_test_files])]
abnormal_testset = abnormal_df[abnormal_df.name.isin([int(x.stem.split('_')[0]) for x in abnormal_test_files])]
print(f'normal train: {normal_trainset.shape}')
print(f'normal test: {normal_testset.shape}')
print(f'abnormal test: {abnormal_testset.shape}')
only_features = lambda x: x[:, 2:]
print(f'#Normal files:{len(normal_files)}\t#Abnormal files: {len(abnormal_files)}')
print(f'#Normal test snippets normal: {len(normal_testset)}, #Abnormal test snippets: {len(abnormal_testset)}')
#model = BayesianGaussianMixture(n_components=64, max_iter=150, covariance_type='diag')
#model = GaussianMixture(n_components=64, covariance_type='diag')
#model = OneClassSVM(nu=1e-2)
#model = IsolationForest(n_estimators=128, contamination='auto')
model = KernelDensity(kernel='gaussian', bandwidth=0.1)
scaler = Pipeline(
[('Scaler', StandardScaler())]
)
X = only_features(normal_trainset.values)
scaler.fit(X)
model.fit(scaler.transform(X))
scores_normal_test = model.score_samples(
scaler.transform(only_features(normal_testset.values))
)
scores_abnormal_test = model.score_samples(
scaler.transform(only_features(abnormal_testset.values))
)
normal_with_scores = normal_testset[['name', 'seq_id']].copy()
normal_with_scores['score'] = -scores_normal_test
normal_with_scores['label'] = 0
normal_grouped = normal_with_scores.groupby(by=['name']).mean()
abnormal_with_scores = abnormal_testset[['name', 'seq_id']].copy()
abnormal_with_scores['score'] = -scores_abnormal_test
abnormal_with_scores['label'] = 0
abnormal_grouped = abnormal_with_scores.groupby(by=['name']).mean()
scores_normal_grouped = normal_grouped.score.values.tolist()
scores_abnormal_grouped = abnormal_grouped.score.values.tolist()
labels_normal = [0] * len(scores_normal_grouped)
labels_abnormal = [1] * len(scores_abnormal_grouped)
labels = labels_normal + labels_abnormal
scores = scores_normal_grouped + scores_abnormal_grouped
auc = roc_auc_score(y_score=scores, y_true=labels)
print(auc)
else:
dataset = AudioTransferLearningImageDataset(root_or_files=normal_train_files)
dataloader = DataLoader(dataset, batch_size=80, shuffle=True)
F = FeatureExtractor(version='resnet18').to('cuda')
feature_size = F.feature_size
criterion = nn.MSELoss()
my_model = MyModel(F).to('cuda')
for e in range(0):
losses = []
for batch in tqdm(dataloader):
imgs, names, seq_ids = batch
imgs = imgs.to('cuda')
prediction, target = my_model(imgs)
loss = criterion(prediction, target)
my_model.optimizer.zero_grad()
loss.backward()
my_model.optimizer.step()
losses.append(loss.item())
print(sum(losses)/len(losses))
# test
dataset = AudioTransferLearningImageDataset(root_or_files=normal_test_files)
dataloader = DataLoader(dataset, batch_size=80, shuffle=False)
my_model.scores_from_dataloader(dataloader)

View File

@@ -0,0 +1,87 @@
import torch
import torch.nn as nn
from torch.optim import Adam
class MyModel(nn.Module):
def __init__(self, feature_extractor):
super(MyModel, self).__init__()
self.feature_extractor = feature_extractor
feature_size = feature_extractor.feature_size
self.noise = nn.Sequential(
nn.Linear(feature_size, feature_size // 2),
nn.ELU(),
nn.Linear(feature_size // 2, feature_size // 4)
)
for p in self.noise.parameters():
p.requires_grad = False
self.noise.eval()
self.student = nn.Sequential(
nn.Linear(feature_size, feature_size // 2),
nn.ELU(),
nn.Linear(feature_size // 2, feature_size // 4),
nn.ELU(),
nn.Linear(feature_size // 4, feature_size // 4)
)
self.optimizer = Adam(self.student.parameters(), lr=0.0001, weight_decay=1e-7, amsgrad=True)
def forward(self, imgs):
features = self.feature_extractor.F(imgs).squeeze()
target = self.noise(features)
prediction = self.student(features)
return target, prediction
def scores_from_dataloader(self, dataloader):
scores = []
with torch.no_grad():
for batch in dataloader:
imgs, names, seq_ids = batch
imgs = imgs.to('cuda')
target, prediction = self.forward(imgs)
preds = torch.sum((prediction - target) ** 2, dim=tuple(range(1, target.dim())))
print(preds.shape)
class HyperFraud(nn.Module):
def __init__(self, hidden_dim=256):
super(HyperFraud, self).__init__()
self.hidden_dim = hidden_dim
self.mean = torch.randn(size=(1, 512))
self.std = torch.randn(size=(1, 512))
self.W_forget = nn.Sequential(
nn.Linear(512, hidden_dim)
)
self.U_forget = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim)
)
self.W_hidden = nn.Sequential(
nn.Linear(512, 256)
)
self.U_hidden = nn.Sequential(
nn.Linear(hidden_dim, 256)
)
self.b_forget = nn.Parameter(torch.randn(1, hidden_dim))
self.b_hidden = nn.Parameter(torch.randn(1, hidden_dim))
def forward(self, data, max_seq_len=10):
# data. batch x seqs x dim
# random seq sampling
h_prev = [torch.zeros(size=(data.shape[0], self.hidden_dim))]
for i in range(0, max_seq_len):
x_t = data[:, i]
h_t_prev = h_prev[-1]
W_x_t = self.W_forget(x_t)
U_h_prev = self.U_forget(h_t_prev)
forget_t = torch.sigmoid(W_x_t + U_h_prev + self.b_forget)
h_t = forget_t * h_t_prev + (1.0 - forget_t) * torch.tanh(self.W_hidden(x_t) + self.U_hidden(forget_t * h_t_prev) + self.b_hidden)
h_prev.append(h_t)
return torch.stack(h_prev[1:], dim=1)
#hf = HyperFraud()
#rand_input = torch.randn(size=(42, 10, 512))
#print(hf(rand_input).shape)

View File

@@ -0,0 +1,82 @@
import numpy as np
from tqdm import tqdm
import librosa
import librosa.display
from matplotlib import pyplot as plt
from pathlib import Path
class Preprocessor:
def __init__(self, sr=16000, n_mels=64, n_fft=1024, hop_length=256, chunk_size=64, chunk_hop=32, cmap='viridis'):
self.sr = sr
self.n_fft = n_fft
self.n_mels = n_mels
self.hop_length = hop_length
self.chunk_size = chunk_size
self.chunk_hop = chunk_hop
self.cmap = cmap
def process_audio(self, path, out_folder=None):
mel_spec = self.to_mel_spec(path)
for count, i in enumerate(range(0, mel_spec.shape[1], self.chunk_hop)):
try:
chunk = mel_spec[:, i:i+self.chunk_size]
out_path = out_folder / f'{path.stem}_{count}.jpg'
self.mel_spec_to_img(chunk, out_path) # todo must adjust outpath name
except IndexError:
pass
def to_mel_spec(self, path):
audio, sr = librosa.load(str(path), sr=self.sr, mono=True)
spectrogram = librosa.stft(audio,
n_fft=self.n_fft,
hop_length=self.n_fft // 2,
center=False)
spectrogram = librosa.feature.melspectrogram(S=np.abs(spectrogram) ** 2,
sr=sr,
n_mels=self.n_mels,
hop_length=self.hop_length)
# prepare plot
spectrogram = librosa.power_to_db(spectrogram, ref=np.max, top_db=None)
return spectrogram
def mel_spec_to_img(self, spectrogram, out_path, size=227):
# prepare plotting
fig = plt.figure(frameon=False, tight_layout=False)
fig.set_size_inches(1, 1)
ax = plt.Axes(fig, [0., 0., 1., 1.])
ax.set_axis_off()
fig.add_axes(ax)
spectrogram_axes = librosa.display.specshow(spectrogram,
hop_length=self.n_fft // 2,
fmax=self.sr/2,
sr=self.sr,
cmap=self.cmap,
y_axis='mel',
x_axis='time')
fig.add_axes(spectrogram_axes, id='spectrogram')
fig.savefig(out_path, format='jpg', dpi=size)
plt.clf()
plt.close()
def process_folder(self, folder_in, folder_out):
wavs = folder_in.glob('*.wav')
folder_out.mkdir(parents=True, exist_ok=True)
for wav in tqdm(list(wavs)):
self.process_audio(wav, folder_out)
if __name__ == '__main__':
models = ['slider', 'pump', 'fan']
model_ids = [0, 2, 4, 6]
preprocessor = Preprocessor()
for model in models:
for model_id in model_ids:
preprocessor.process_folder(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal'),
Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal/melspec_images/')
)
preprocessor.process_folder(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal'),
Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal/melspec_images/')
)