from cfg import ALL_DATASET_PATHS from pathlib import Path import pandas as pd import numpy as np from sklearn.mixture import GaussianMixture, BayesianGaussianMixture from sklearn.ensemble import IsolationForest from sklearn.svm import OneClassSVM from sklearn.neighbors import KernelDensity from sklearn.metrics import roc_auc_score from sklearn.preprocessing import StandardScaler, MinMaxScaler from sklearn.decomposition import PCA from sklearn.pipeline import Pipeline from transfer_learning.extractors import AudioTransferLearningImageDataset, FeatureExtractor from torch.utils.data import DataLoader import torch.nn as nn from torch.optim import Adam from tqdm import tqdm from transfer_learning.my_model import MyModel import random np.random.seed(1337) random.seed(1337) # Switch OURS = False # Parameters leave_n_out = 150 model = 'fan' model_id = 0 # get all wav files wavs_normal = Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal').glob('*.wav') wavs_abnormal = Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal').glob('*.wav') # as list + shuffle normal_files = list(wavs_normal) abnormal_files = list(wavs_abnormal) random.shuffle(normal_files) random.shuffle(abnormal_files) normal_train_files = normal_files[leave_n_out:] normal_test_files = normal_files[:leave_n_out] abnormal_test_files = abnormal_files[:leave_n_out] print(len(normal_train_files), len(normal_test_files), len(normal_files)) print(len(abnormal_test_files)) if not OURS: normal_df = pd.read_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/normal/resnet18_features.csv')) abnormal_df = pd.read_csv(Path(f'/home/robert/coding/audio_anomaly_detection/data/mimii/-6_dB_{model}/id_0{model_id}/abnormal/resnet18_features.csv')) print(normal_df.shape, abnormal_df.shape) normal_trainset = normal_df[normal_df.name.isin([int(x.stem.split('_')[0]) for x in normal_train_files])] normal_testset = normal_df[normal_df.name.isin([int(x.stem.split('_')[0]) for x in normal_test_files])] abnormal_testset = abnormal_df[abnormal_df.name.isin([int(x.stem.split('_')[0]) for x in abnormal_test_files])] print(f'normal train: {normal_trainset.shape}') print(f'normal test: {normal_testset.shape}') print(f'abnormal test: {abnormal_testset.shape}') only_features = lambda x: x[:, 2:] print(f'#Normal files:{len(normal_files)}\t#Abnormal files: {len(abnormal_files)}') print(f'#Normal test snippets normal: {len(normal_testset)}, #Abnormal test snippets: {len(abnormal_testset)}') #model = BayesianGaussianMixture(n_components=64, max_iter=150, covariance_type='diag') #model = GaussianMixture(n_components=64, covariance_type='diag') #model = OneClassSVM(nu=1e-2) #model = IsolationForest(n_estimators=128, contamination='auto') model = KernelDensity(kernel='gaussian', bandwidth=0.1) scaler = Pipeline( [('Scaler', StandardScaler())] ) X = only_features(normal_trainset.values) scaler.fit(X) model.fit(scaler.transform(X)) scores_normal_test = model.score_samples( scaler.transform(only_features(normal_testset.values)) ) scores_abnormal_test = model.score_samples( scaler.transform(only_features(abnormal_testset.values)) ) normal_with_scores = normal_testset[['name', 'seq_id']].copy() normal_with_scores['score'] = -scores_normal_test normal_with_scores['label'] = 0 normal_grouped = normal_with_scores.groupby(by=['name']).mean() abnormal_with_scores = abnormal_testset[['name', 'seq_id']].copy() abnormal_with_scores['score'] = -scores_abnormal_test abnormal_with_scores['label'] = 0 abnormal_grouped = abnormal_with_scores.groupby(by=['name']).mean() scores_normal_grouped = normal_grouped.score.values.tolist() scores_abnormal_grouped = abnormal_grouped.score.values.tolist() labels_normal = [0] * len(scores_normal_grouped) labels_abnormal = [1] * len(scores_abnormal_grouped) labels = labels_normal + labels_abnormal scores = scores_normal_grouped + scores_abnormal_grouped auc = roc_auc_score(y_score=scores, y_true=labels) print(auc) else: dataset = AudioTransferLearningImageDataset(root_or_files=normal_train_files) dataloader = DataLoader(dataset, batch_size=80, shuffle=True) F = FeatureExtractor(version='resnet18').to('cuda') feature_size = F.feature_size criterion = nn.MSELoss() my_model = MyModel(F).to('cuda') for e in range(0): losses = [] for batch in tqdm(dataloader): imgs, names, seq_ids = batch imgs = imgs.to('cuda') prediction, target = my_model(imgs) loss = criterion(prediction, target) my_model.optimizer.zero_grad() loss.backward() my_model.optimizer.step() losses.append(loss.item()) print(sum(losses)/len(losses)) # test dataset = AudioTransferLearningImageDataset(root_or_files=normal_test_files) dataloader = DataLoader(dataset, batch_size=80, shuffle=False) my_model.scores_from_dataloader(dataloader)