if __name__ == '__main__': import numpy as np import random from tqdm import tqdm from cfg import * from mimii import MIMII from models.ae import FullSubSpecCAE, SubSpecCAE import pickle import torch.optim as optim from models.layers import Subspectrogram from sklearn.preprocessing import StandardScaler, MinMaxScaler from sklearn.metrics import roc_auc_score from torch.utils.data import DataLoader from sklearn.ensemble import IsolationForest from sklearn.mixture import GaussianMixture def train(dataset_path, machine_id, norm='batch', seed=42): torch.manual_seed(seed) torch.cuda.manual_seed(seed) np.random.seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True random.seed(seed) print(f'Training on {dataset_path.name}') mimii = MIMII(dataset_path=dataset_path, machine_id=machine_id) mimii.to(DEVICE) #mimii.preprocess(n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0) # 80 x 80 tfms = Subspectrogram(SUB_SPEC_HEIGT, SUB_SPEC_HOP) dl = mimii.train_dataloader( segment_len=NUM_SEGMENTS, hop_len=NUM_SEGMENT_HOPS, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=True, transform=tfms ) model = FullSubSpecCAE(weight_sharing=False).to(DEVICE) # print(model(torch.randn(128, 1, 20, 80).to(DEVICE)).shape) optimizer = optim.Adam(model.parameters(), lr=0.0005) for epoch in range(NUM_EPOCHS): print(f'EPOCH #{epoch+1}') losses = [] for batch in tqdm(dl): data, labels = batch data = data.to(DEVICE) # torch.Size([128, 4, 20, 80]) batch x subs_specs x height x width loss = model.train_loss(data) optimizer.zero_grad() loss.backward() optimizer.step() losses.append(loss.item()) print(f'Loss: {np.mean(losses)}') preds, _ = model.gather_predictions(dl) scaler = MinMaxScaler() scaler.fit(preds) forest = IsolationForest() forest.fit(preds) datasets = mimii.test_datasets(NUM_SEGMENTS, NUM_SEGMENTS, transform=tfms) y_true, y_score = [], [] for dataset in tqdm(datasets): loader = DataLoader(dataset, batch_size=len(dataset), shuffle=False, num_workers=2) preds, labels = model.gather_predictions(loader) preds = scaler.transform(preds) y_true.append(labels[0]) # always the same score = forest.decision_function(preds) # 7x7 -> 1 one score for the whole dataset y_score.append(score.mean()) auc = roc_auc_score(y_true, y_score) print(f'AUC: {auc}, Dataset: {dataset_path.name}, Machine: {machine_id}, Norm: {norm}, Seed: {seed}') return auc loss_fn = 'mse' results = [] for norm in ['batch']: for seed in SEEDS: for dataset_path in ALL_DATASET_PATHS: if '-6_dB' in dataset_path.name: for machine_id in [4]: auc = train(dataset_path, machine_id, norm, seed) results.append([dataset_path.name, machine_id, seed, -1, norm, auc]) with open(f'full234_hard_{norm}_{loss_fn}.pkl', 'wb') as f: pickle.dump(results, f)