Robert Müller 482f45df87 big update
2020-04-06 14:46:26 +02:00

102 lines
3.5 KiB
Python

if __name__ == '__main__':
import numpy as np
import random
from tqdm import tqdm
from cfg import *
from mimii import MIMII
from models.ae import FullSubSpecCAE, SubSpecCAE
import pickle
import torch.optim as optim
from models.layers import Subspectrogram
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import roc_auc_score
from torch.utils.data import DataLoader
from sklearn.ensemble import IsolationForest
from sklearn.mixture import GaussianMixture
def train(dataset_path, machine_id, norm='batch', seed=42):
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
random.seed(seed)
print(f'Training on {dataset_path.name}')
mimii = MIMII(dataset_path=dataset_path, machine_id=machine_id)
mimii.to(DEVICE)
#mimii.preprocess(n_fft=1024, hop_length=256, n_mels=80, center=False, power=2.0) # 80 x 80
tfms = Subspectrogram(SUB_SPEC_HEIGT, SUB_SPEC_HOP)
dl = mimii.train_dataloader(
segment_len=NUM_SEGMENTS,
hop_len=NUM_SEGMENT_HOPS,
batch_size=BATCH_SIZE,
num_workers=NUM_WORKERS,
shuffle=True,
transform=tfms
)
model = FullSubSpecCAE(weight_sharing=False).to(DEVICE)
# print(model(torch.randn(128, 1, 20, 80).to(DEVICE)).shape)
optimizer = optim.Adam(model.parameters(), lr=0.0005)
for epoch in range(NUM_EPOCHS):
print(f'EPOCH #{epoch+1}')
losses = []
for batch in tqdm(dl):
data, labels = batch
data = data.to(DEVICE) # torch.Size([128, 4, 20, 80]) batch x subs_specs x height x width
loss = model.train_loss(data)
optimizer.zero_grad()
loss.backward()
optimizer.step()
losses.append(loss.item())
print(f'Loss: {np.mean(losses)}')
preds, _ = model.gather_predictions(dl)
scaler = MinMaxScaler()
scaler.fit(preds)
forest = IsolationForest()
forest.fit(preds)
datasets = mimii.test_datasets(NUM_SEGMENTS, NUM_SEGMENTS, transform=tfms)
y_true, y_score = [], []
for dataset in tqdm(datasets):
loader = DataLoader(dataset, batch_size=len(dataset), shuffle=False, num_workers=2)
preds, labels = model.gather_predictions(loader)
preds = scaler.transform(preds)
y_true.append(labels[0]) # always the same
score = forest.decision_function(preds) # 7x7 -> 1 one score for the whole dataset
y_score.append(score.mean())
auc = roc_auc_score(y_true, y_score)
print(f'AUC: {auc}, Dataset: {dataset_path.name}, Machine: {machine_id}, Norm: {norm}, Seed: {seed}')
return auc
loss_fn = 'mse'
results = []
for norm in ['batch']:
for seed in SEEDS:
for dataset_path in ALL_DATASET_PATHS:
if '-6_dB' in dataset_path.name:
for machine_id in [4]:
auc = train(dataset_path, machine_id, norm, seed)
results.append([dataset_path.name, machine_id, seed, -1, norm, auc])
with open(f'full234_hard_{norm}_{loss_fn}.pkl', 'wb') as f:
pickle.dump(results, f)