torchaudio testing
This commit is contained in:
@@ -1,11 +1,17 @@
|
||||
from collections import defaultdict
|
||||
|
||||
# Imports from python Internals
|
||||
from abc import ABC
|
||||
from argparse import Namespace
|
||||
from itertools import cycle
|
||||
from collections import defaultdict, namedtuple
|
||||
|
||||
import sklearn
|
||||
import torch
|
||||
# Numerical Imports, Metrics and Plotting
|
||||
import numpy as np
|
||||
from sklearn.ensemble import IsolationForest
|
||||
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_auc_score, roc_curve, auc, f1_score, \
|
||||
recall_score, average_precision_score
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
# Import Deep Learning Framework
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.optim import Adam
|
||||
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
|
||||
@@ -13,15 +19,25 @@ from torch.utils.data import DataLoader
|
||||
from torchcontrib.optim import SWA
|
||||
from torchvision.transforms import Compose, RandomApply
|
||||
|
||||
from ml_lib.audio_toolset.audio_augmentation import Speed
|
||||
# Import Functions and Modules from MLLIB
|
||||
from ml_lib.audio_toolset.mel_augmentation import NoiseInjection, LoudnessManipulator, ShiftTime, MaskAug
|
||||
from ml_lib.audio_toolset.audio_io import AudioToMel, MelToImage, NormalizeLocal
|
||||
from ml_lib.audio_toolset.audio_io import NormalizeLocal
|
||||
from ml_lib.modules.util import LightningBaseModule
|
||||
from ml_lib.utils.tools import to_one_hot
|
||||
from ml_lib.utils.transforms import ToTensor
|
||||
|
||||
# Import Project Variables
|
||||
import variables as V
|
||||
|
||||
|
||||
class BaseLossMixin:
|
||||
|
||||
absolute_loss = nn.L1Loss()
|
||||
nll_loss = nn.NLLLoss()
|
||||
bce_loss = nn.BCELoss()
|
||||
ce_loss = nn.CrossEntropyLoss()
|
||||
|
||||
|
||||
class BaseOptimizerMixin:
|
||||
|
||||
def configure_optimizers(self):
|
||||
@@ -60,16 +76,12 @@ class BaseOptimizerMixin:
|
||||
|
||||
class BaseTrainMixin:
|
||||
|
||||
absolute_loss = nn.L1Loss()
|
||||
nll_loss = nn.NLLLoss()
|
||||
bce_loss = nn.BCELoss()
|
||||
|
||||
def training_step(self, batch_xy, batch_nb, *args, **kwargs):
|
||||
assert isinstance(self, LightningBaseModule)
|
||||
batch_x, batch_y = batch_xy
|
||||
y = self(batch_x).main_out
|
||||
bce_loss = self.bce_loss(y.squeeze(), batch_y)
|
||||
return dict(loss=bce_loss)
|
||||
loss = self.ce_loss(y.squeeze(), batch_y.long())
|
||||
return dict(loss=loss)
|
||||
|
||||
def training_epoch_end(self, outputs):
|
||||
assert isinstance(self, LightningBaseModule)
|
||||
@@ -84,55 +96,39 @@ class BaseTrainMixin:
|
||||
|
||||
class BaseValMixin:
|
||||
|
||||
absolute_loss = nn.L1Loss()
|
||||
nll_loss = nn.NLLLoss()
|
||||
bce_loss = nn.BCELoss()
|
||||
|
||||
def validation_step(self, batch_xy, batch_idx, dataloader_idx, *args, **kwargs):
|
||||
def validation_step(self, batch_xy, batch_idx, *args, **kwargs):
|
||||
assert isinstance(self, LightningBaseModule)
|
||||
batch_x, batch_y = batch_xy
|
||||
y = self(batch_x).main_out
|
||||
val_bce_loss = self.bce_loss(y.squeeze(), batch_y)
|
||||
return dict(val_bce_loss=val_bce_loss,
|
||||
val_loss = self.ce_loss(y.squeeze(), batch_y.long())
|
||||
return dict(val_loss=val_loss,
|
||||
batch_idx=batch_idx, y=y, batch_y=batch_y)
|
||||
|
||||
def validation_epoch_end(self, outputs, *_, **__):
|
||||
assert isinstance(self, LightningBaseModule)
|
||||
summary_dict = dict()
|
||||
for output_idx, output in enumerate(outputs):
|
||||
keys = list(output[0].keys())
|
||||
ident = '' if output_idx == 0 else '_train'
|
||||
summary_dict.update({f'mean{ident}_{key}': torch.mean(torch.stack([output[key]
|
||||
for output in output]))
|
||||
for key in keys if 'loss' in key}
|
||||
)
|
||||
|
||||
# UnweightedAverageRecall
|
||||
y_true = torch.cat([output['batch_y'] for output in output]) .cpu().numpy()
|
||||
y_pred = torch.cat([output['y'] for output in output]).squeeze().cpu().numpy()
|
||||
keys = list(outputs[0].keys())
|
||||
summary_dict.update({f'mean_{key}': torch.mean(torch.stack([output[key]
|
||||
for output in outputs]))
|
||||
for key in keys if 'loss' in key}
|
||||
)
|
||||
|
||||
y_pred = (y_pred >= 0.5).astype(np.float32)
|
||||
additional_scores = self.additional_scores(outputs)
|
||||
summary_dict.update(**additional_scores)
|
||||
|
||||
uar_score = sklearn.metrics.recall_score(y_true, y_pred, labels=[0, 1], average='macro',
|
||||
sample_weight=None, zero_division='warn')
|
||||
uar_score = torch.as_tensor(uar_score)
|
||||
summary_dict.update({f'uar{ident}_score': uar_score})
|
||||
for key in summary_dict.keys():
|
||||
self.log(key, summary_dict[key])
|
||||
for key in summary_dict.keys():
|
||||
self.log(key, summary_dict[key])
|
||||
|
||||
|
||||
class BaseTestMixin:
|
||||
|
||||
absolute_loss = nn.L1Loss()
|
||||
nll_loss = nn.NLLLoss()
|
||||
bce_loss = nn.BCELoss()
|
||||
|
||||
def test_step(self, batch_xy, batch_idx, dataloader_idx, *args, **kwargs):
|
||||
def test_step(self, batch_xy, batch_idx, *_, **__):
|
||||
assert isinstance(self, LightningBaseModule)
|
||||
batch_x, batch_y = batch_xy
|
||||
y = self(batch_x).main_out
|
||||
test_bce_loss = self.bce_loss(y.squeeze(), batch_y)
|
||||
return dict(test_bce_loss=test_bce_loss,
|
||||
test_loss = self.ce_loss(y.squeeze(), batch_y.long())
|
||||
return dict(test_loss=test_loss,
|
||||
batch_idx=batch_idx, y=y, batch_y=batch_y)
|
||||
|
||||
def test_epoch_end(self, outputs, *_, **__):
|
||||
@@ -145,16 +141,9 @@ class BaseTestMixin:
|
||||
for key in keys if 'loss' in key}
|
||||
)
|
||||
|
||||
# UnweightedAverageRecall
|
||||
y_true = torch.cat([output['batch_y'] for output in outputs]) .cpu().numpy()
|
||||
y_pred = torch.cat([output['y'] for output in outputs]).squeeze().cpu().numpy()
|
||||
additional_scores = self.additional_scores(outputs)
|
||||
summary_dict.update(**additional_scores)
|
||||
|
||||
y_pred = (y_pred >= 0.5).astype(np.float32)
|
||||
|
||||
uar_score = sklearn.metrics.recall_score(y_true, y_pred, labels=[0, 1], average='macro',
|
||||
sample_weight=None, zero_division='warn')
|
||||
uar_score = torch.as_tensor(uar_score)
|
||||
summary_dict.update({f'uar_score': uar_score})
|
||||
for key in summary_dict.keys():
|
||||
self.log(key, summary_dict[key])
|
||||
|
||||
@@ -167,53 +156,56 @@ class DatasetMixin:
|
||||
# Dataset
|
||||
# =============================================================================
|
||||
# Mel Transforms
|
||||
mel_transforms = Compose([
|
||||
# Audio to Mel Transformations
|
||||
AudioToMel(sr=self.params.sr,
|
||||
n_mels=self.params.n_mels,
|
||||
n_fft=self.params.n_fft,
|
||||
hop_length=self.params.hop_length),
|
||||
MelToImage()])
|
||||
|
||||
mel_transforms_train = Compose([
|
||||
# Audio to Mel Transformations
|
||||
Speed(max_amount=self.params.speed_amount,
|
||||
speed_min=self.params.speed_min,
|
||||
speed_max=self.params.speed_max
|
||||
),
|
||||
mel_transforms])
|
||||
mel_kwargs = dict(sample_rate=self.params.sr,
|
||||
n_mels=self.params.n_mels,
|
||||
n_fft=self.params.n_fft,
|
||||
hop_length=self.params.hop_length)
|
||||
|
||||
# Utility
|
||||
util_transforms = Compose([NormalizeLocal(), ToTensor()])
|
||||
normalize = NormalizeLocal()
|
||||
|
||||
# Data Augmentations
|
||||
aug_transforms = Compose([
|
||||
mel_augmentations = Compose([
|
||||
RandomApply([
|
||||
NoiseInjection(self.params.noise_ratio),
|
||||
LoudnessManipulator(self.params.loudness_ratio),
|
||||
ShiftTime(self.params.shift_ratio),
|
||||
MaskAug(self.params.mask_ratio),
|
||||
NoiseInjection(0.2),
|
||||
LoudnessManipulator(0.5),
|
||||
ShiftTime(0.4),
|
||||
MaskAug(0.2),
|
||||
], p=0.6),
|
||||
util_transforms])
|
||||
normalize])
|
||||
|
||||
# Datasets
|
||||
dataset = Namespace(
|
||||
**dict(
|
||||
# TRAIN DATASET
|
||||
train_dataset=self.dataset_class(self.params.root, setting=V.DATA_OPTIONS.train,
|
||||
use_preprocessed=self.params.use_preprocessed,
|
||||
stretch_dataset=self.params.stretch,
|
||||
mel_transforms=mel_transforms_train, transforms=aug_transforms),
|
||||
# VALIDATION DATASET
|
||||
val_train_dataset=self.dataset_class(self.params.root, setting=V.DATA_OPTIONS.train,
|
||||
mel_transforms=mel_transforms, transforms=util_transforms),
|
||||
val_dataset=self.dataset_class(self.params.root, setting=V.DATA_OPTIONS.devel,
|
||||
mel_transforms=mel_transforms, transforms=util_transforms),
|
||||
# TEST DATASET
|
||||
test_dataset=self.dataset_class(self.params.root, setting=V.DATA_OPTIONS.test,
|
||||
mel_transforms=mel_transforms, transforms=util_transforms),
|
||||
)
|
||||
)
|
||||
Dataset = namedtuple('Datasets', 'train_dataset val_dataset test_dataset')
|
||||
dataset = Dataset(self.dataset_class(data_root=self.params.root, # TRAIN DATASET
|
||||
setting=V.DATA_OPTION_train,
|
||||
fold=list(range(1,8)),
|
||||
reset=self.params.reset,
|
||||
mel_kwargs=mel_kwargs,
|
||||
mel_augmentations=mel_augmentations),
|
||||
val_dataset=self.dataset_class(data_root=self.params.root, # VALIDATION DATASET
|
||||
setting=V.DATA_OPTION_devel,
|
||||
fold=9,
|
||||
reset=self.params.reset,
|
||||
mel_kwargs=mel_kwargs,
|
||||
mel_augmentations=normalize),
|
||||
test_dataset=self.dataset_class(data_root=self.params.root, # TEST DATASET
|
||||
setting=V.DATA_OPTION_test,
|
||||
fold=10,
|
||||
reset=self.params.reset,
|
||||
mel_kwargs=mel_kwargs,
|
||||
mel_augmentations=normalize),
|
||||
)
|
||||
|
||||
if dataset.train_dataset.task_type == V.TASK_OPTION_binary:
|
||||
# noinspection PyAttributeOutsideInit
|
||||
self.additional_scores = BinaryScores(self)
|
||||
|
||||
elif dataset.train_dataset.task_type == V.TASK_OPTION_multiclass:
|
||||
# noinspection PyAttributeOutsideInit
|
||||
self.additional_scores = MultiClassScores(self)
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
return dataset
|
||||
|
||||
|
||||
@@ -240,10 +232,185 @@ class BaseDataloadersMixin(ABC):
|
||||
# Validation Dataloader
|
||||
def val_dataloader(self):
|
||||
assert isinstance(self, LightningBaseModule)
|
||||
val_dataloader = DataLoader(dataset=self.dataset.val_dataset, shuffle=False, pin_memory=True,
|
||||
batch_size=self.params.batch_size, num_workers=self.params.worker)
|
||||
return DataLoader(dataset=self.dataset.val_dataset, shuffle=False, pin_memory=True,
|
||||
batch_size=self.params.batch_size, num_workers=self.params.worker)
|
||||
|
||||
train_dataloader = DataLoader(self.dataset.val_train_dataset, num_workers=self.params.worker,
|
||||
pin_memory=True,
|
||||
batch_size=self.params.batch_size, shuffle=False)
|
||||
return [val_dataloader, train_dataloader]
|
||||
|
||||
class BaseScores(ABC):
|
||||
|
||||
def __init__(self, lightning_model):
|
||||
self.model = lightning_model
|
||||
pass
|
||||
|
||||
def __call__(self, outputs):
|
||||
# summary_dict = dict()
|
||||
# return summary_dict
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class MultiClassScores(BaseScores):
|
||||
|
||||
def __init__(self, *args):
|
||||
super(MultiClassScores, self).__init__(*args)
|
||||
pass
|
||||
|
||||
def __call__(self, outputs):
|
||||
summary_dict = dict()
|
||||
#######################################################################################
|
||||
# Additional Score - UAR - ROC - Conf. Matrix - F1
|
||||
#######################################################################################
|
||||
#
|
||||
# INIT
|
||||
y_true = torch.cat([output['batch_y'] for output in outputs]).cpu().numpy()
|
||||
y_true_one_hot = to_one_hot(y_true, self.model.n_classes)
|
||||
|
||||
y_pred = torch.cat([output['y'] for output in outputs]).squeeze().cpu().float().numpy()
|
||||
y_pred_max = np.argmax(y_pred, axis=1)
|
||||
|
||||
class_names = {val: key for key, val in self.model.dataset.test_dataset.classes.items()}
|
||||
######################################################################################
|
||||
#
|
||||
# F1 SCORE
|
||||
micro_f1_score = f1_score(y_true, y_pred_max, labels=None, pos_label=1, average='micro', sample_weight=None,
|
||||
zero_division=True)
|
||||
macro_f1_score = f1_score(y_true, y_pred_max, labels=None, pos_label=1, average='macro', sample_weight=None,
|
||||
zero_division=True)
|
||||
summary_dict.update(dict(micro_f1_score=micro_f1_score, macro_f1_score=macro_f1_score))
|
||||
|
||||
#######################################################################################
|
||||
#
|
||||
# ROC Curve
|
||||
|
||||
# Compute ROC curve and ROC area for each class
|
||||
fpr = dict()
|
||||
tpr = dict()
|
||||
roc_auc = dict()
|
||||
for i in range(self.model.n_classes):
|
||||
fpr[i], tpr[i], _ = roc_curve(y_true_one_hot[:, i], y_pred[:, i])
|
||||
roc_auc[i] = auc(fpr[i], tpr[i])
|
||||
|
||||
# Compute micro-average ROC curve and ROC area
|
||||
fpr["micro"], tpr["micro"], _ = roc_curve(y_true_one_hot.ravel(), y_pred.ravel())
|
||||
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
|
||||
|
||||
# First aggregate all false positive rates
|
||||
all_fpr = np.unique(np.concatenate([fpr[i] for i in range(self.model.n_classes)]))
|
||||
|
||||
# Then interpolate all ROC curves at this points
|
||||
mean_tpr = np.zeros_like(all_fpr)
|
||||
for i in range(self.model.n_classes):
|
||||
mean_tpr += np.interp(all_fpr, fpr[i], tpr[i])
|
||||
|
||||
# Finally average it and compute AUC
|
||||
mean_tpr /= self.model.n_classes
|
||||
|
||||
fpr["macro"] = all_fpr
|
||||
tpr["macro"] = mean_tpr
|
||||
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
|
||||
|
||||
# Plot all ROC curves
|
||||
plt.figure()
|
||||
plt.plot(fpr["micro"], tpr["micro"],
|
||||
label=f'micro ROC ({round(roc_auc["micro"], 2)})',
|
||||
color='deeppink', linestyle=':', linewidth=4)
|
||||
|
||||
plt.plot(fpr["macro"], tpr["macro"],
|
||||
label=f'macro ROC({round(roc_auc["macro"], 2)})',
|
||||
color='navy', linestyle=':', linewidth=4)
|
||||
|
||||
colors = cycle(['firebrick', 'orangered', 'gold', 'olive', 'limegreen', 'aqua',
|
||||
'dodgerblue', 'slategrey', 'royalblue', 'indigo', 'fuchsia'], )
|
||||
|
||||
for i, color in zip(range(self.model.n_classes), colors):
|
||||
plt.plot(fpr[i], tpr[i], color=color, lw=2, label=f'{class_names[i]} ({round(roc_auc[i], 2)})')
|
||||
|
||||
plt.plot([0, 1], [0, 1], 'k--', lw=2)
|
||||
plt.xlim([0.0, 1.0])
|
||||
plt.ylim([0.0, 1.05])
|
||||
plt.xlabel('False Positive Rate')
|
||||
plt.ylabel('True Positive Rate')
|
||||
plt.legend(loc="lower right")
|
||||
|
||||
self.model.logger.log_image('ROC', image=plt.gcf(), step=self.model.current_epoch)
|
||||
self.model.logger.log_image('ROC', image=plt.gcf(), step=self.model.current_epoch, ext='pdf')
|
||||
plt.clf()
|
||||
|
||||
#######################################################################################
|
||||
#
|
||||
# ROC SCORE
|
||||
|
||||
try:
|
||||
macro_roc_auc_ovr = roc_auc_score(y_true_one_hot, y_pred, multi_class="ovr",
|
||||
average="macro")
|
||||
summary_dict.update(macro_roc_auc_ovr=macro_roc_auc_ovr)
|
||||
except ValueError:
|
||||
micro_roc_auc_ovr = roc_auc_score(y_true_one_hot, y_pred, multi_class="ovr",
|
||||
average="micro")
|
||||
summary_dict.update(micro_roc_auc_ovr=micro_roc_auc_ovr)
|
||||
|
||||
#######################################################################################
|
||||
#
|
||||
# Confusion matrix
|
||||
|
||||
cm = confusion_matrix([class_names[x] for x in y_true], [class_names[x] for x in y_pred_max],
|
||||
labels=[class_names[key] for key in class_names.keys()],
|
||||
normalize='all')
|
||||
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
|
||||
display_labels=[class_names[i] for i in range(self.model.n_classes)]
|
||||
)
|
||||
disp.plot(include_values=True)
|
||||
|
||||
self.model.logger.log_image('Confusion_Matrix', image=disp.figure_, step=self.model.current_epoch)
|
||||
self.model.logger.log_image('Confusion_Matrix', image=disp.figure_, step=self.model.current_epoch, ext='pdf')
|
||||
|
||||
plt.close('all')
|
||||
return summary_dict
|
||||
|
||||
|
||||
class BinaryScores(BaseScores):
|
||||
|
||||
def __init__(self, *args):
|
||||
super(BinaryScores, self).__init__(*args)
|
||||
|
||||
def __call__(self, outputs):
|
||||
summary_dict = dict()
|
||||
|
||||
# Additional Score like the unweighted Average Recall:
|
||||
#########################
|
||||
# UnweightedAverageRecall
|
||||
y_true = torch.cat([output['batch_y'] for output in outputs]) .cpu().numpy()
|
||||
y_pred = torch.cat([output['element_wise_recon_error'] for output in outputs]).squeeze().cpu().numpy()
|
||||
|
||||
# How to apply a threshold manualy
|
||||
# y_pred = (y_pred >= 0.5).astype(np.float32)
|
||||
|
||||
# How to apply a threshold by IF (Isolation Forest)
|
||||
clf = IsolationForest(random_state=self.model.seed)
|
||||
y_score = clf.fit_predict(y_pred.reshape(-1,1))
|
||||
y_score = (np.asarray(y_score) == -1).astype(np.float32)
|
||||
|
||||
uar_score = recall_score(y_true, y_score, labels=[0, 1], average='macro',
|
||||
sample_weight=None, zero_division='warn')
|
||||
summary_dict.update(dict(uar_score=uar_score))
|
||||
#########################
|
||||
# Precission
|
||||
precision_score = average_precision_score(y_true, y_score)
|
||||
summary_dict.update(dict(precision_score=precision_score))
|
||||
|
||||
#########################
|
||||
# AUC
|
||||
try:
|
||||
auc_score = roc_auc_score(y_true=y_true, y_score=y_score)
|
||||
summary_dict.update(dict(auc_score=auc_score))
|
||||
except ValueError:
|
||||
summary_dict.update(dict(auc_score=-1))
|
||||
|
||||
#########################
|
||||
# pAUC
|
||||
try:
|
||||
pauc = roc_auc_score(y_true=y_true, y_score=y_score, max_fpr=0.15)
|
||||
summary_dict.update(dict(pauc_score=pauc))
|
||||
except ValueError:
|
||||
summary_dict.update(dict(pauc_score=-1))
|
||||
|
||||
return summary_dict
|
||||
|
Reference in New Issue
Block a user