diff --git a/cfg.py b/cfg.py new file mode 100644 index 0000000..8bc9e7d --- /dev/null +++ b/cfg.py @@ -0,0 +1,12 @@ +from pathlib import Path +import torch + +BATCH_SIZE = 128 +NUM_EPOCHS = 50 +NUM_WORKERS = 4 +NUM_SEGMENTS = 5 +NUM_SEGMENT_HOPS = 2 +SEEDS = [42, 1337] +ALL_DATASET_PATHS = list((Path(__file__).parent.absolute() / 'data' / 'mimii').glob('*/')) + +DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' diff --git a/main.py b/main.py new file mode 100644 index 0000000..cc82f6c --- /dev/null +++ b/main.py @@ -0,0 +1,64 @@ +import numpy as np +from tqdm import tqdm +from cfg import * +from mimii import MIMII +from models.ae import AE, LCAE +import torch.nn as nn +import torch.optim as optim +import random + +torch.manual_seed(42) +torch.cuda.manual_seed(42) +np.random.seed(42) +random.seed(42) + +dataset_path = ALL_DATASET_PATHS[5] +print(f'Training on {dataset_path.name}') +mimii = MIMII(dataset_path=ALL_DATASET_PATHS[5], machine_id=0) +mimii.preprocess(n_fft=1024, hop_length=512, n_mels=64, center=False, power=2.0) + +dl = mimii.train_dataloader( + segment_len=NUM_SEGMENTS, + hop_len=NUM_SEGMENT_HOPS, + batch_size=BATCH_SIZE, + num_workers=NUM_WORKERS, + shuffle=True +) + + +model = LCAE(320).to(DEVICE) +model.init_weights() +criterion = nn.MSELoss() +optimizer = optim.Adam(model.parameters(), lr=0.001) + + +beta_1 = 0.00 +beta_2 = 0.0 + +for epoch in range(NUM_EPOCHS): + print(f'EPOCH #{epoch+1}') + losses = [] + entropies = [] + l1s = [] + for batch in tqdm(dl): + data, labels = batch + data = data.to(DEVICE) + data = data.view(data.shape[0], -1) + + preds, entropy, diversity = model(data) + loss = criterion(preds, data) + beta_1*entropy.mean() + beta_2*diversity + + optimizer.zero_grad() + loss.backward() + optimizer.step() + #print(reconstruction.shape) + losses.append(loss.item()) + entropies.append(entropy.mean().item()) + l1s.append(diversity.item()) + print(f'Loss: {np.mean(losses)}; Entropy: {np.mean(entropies)}; l1:{np.mean(l1s)}') + +auc = mimii.evaluate_model(model, NUM_SEGMENTS, NUM_SEGMENTS) +print(f'AUC: {auc}') + + + diff --git a/mimii.py b/mimii.py new file mode 100644 index 0000000..3e4d702 --- /dev/null +++ b/mimii.py @@ -0,0 +1,128 @@ +import random +import numpy as np +import torch +from torch.utils.data import Dataset, DataLoader, ConcatDataset +import librosa +from sklearn.metrics import roc_auc_score +from tqdm import tqdm +from pathlib import Path + +__all__ = ['MIMII'] + + +class MIMII(object): + def __init__(self, dataset_path, machine_id, seed=42): + torch.random.manual_seed(seed) + np.random.seed(seed) + self.machine = dataset_path.name + self.machine_id = machine_id + self.root = dataset_path / f'id_0{machine_id}' + self.min_level_db = -80 + self.sr = 16000 + + train = list((self.root / 'normal' / 'processed').glob('*.npy')) + test = list((self.root / 'abnormal' / 'processed').glob('*.npy')) + random.shuffle(train) + + normal_test = train[:len(test)] + + self.test_labels = [0]*len(normal_test) + [1]*len(test) + self.train_labels = [0]*(len(train) - len(normal_test)) + + self.train_paths = train[len(test):] + self.test_paths = normal_test + test + + def _normalize(self, S): + return np.clip((S - self.min_level_db) / -self.min_level_db, 0, 1) + + def _denormalize(self, S): + return (np.clip(S, 0, 1) * -self.min_level_db) + self.min_level_db + + def preprocess(self, **kwargs): + for mode in ['normal', 'abnormal']: + folder = (self.root / mode / 'processed') + folder.mkdir(parents=False, exist_ok=True) + wavs = (self.root / mode).glob('*.wav') + print(f' Processing {folder}') + for file in tqdm(list(wavs)): + # (folder / file.stem).mkdir(parents=False, exist_ok=True) + audio, sr = librosa.load(str(file), sr=self.sr) + mel_spec = librosa.feature.melspectrogram(audio, sr=sr, **kwargs) + mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max) + mel_spec_norm = self._normalize(mel_spec_db) + m, n = mel_spec_norm.shape + np.save(folder/(file.stem + f'_{m}_{n}.npy'), mel_spec_norm) + + def train_dataloader(self, segment_len=20, hop_len=5, **kwargs): + # return both!!! + # todo exclude a part and save for eval + ds = [] + for p, l in zip(self.train_paths, self.train_labels): + ds.append( + MimiiTorchDataset(path=p, label=l, + segment_len=segment_len, + hop=hop_len) + ) + return DataLoader(ConcatDataset(ds), **kwargs) + + def test_dataloader(self, *args, **kwargs): + raise NotImplementedError('test_dataloader is not supported') + + def evaluate_model(self, f, segment_len=20, hop_len=5): + datasets = [] + for p, l in zip(self.test_paths, self.test_labels): + datasets.append( + MimiiTorchDataset(path=p, label=l, + segment_len=segment_len, + hop=hop_len) + ) + y_true, y_score = [], [] + with torch.no_grad(): + for dataset in tqdm(datasets): + loader = DataLoader(dataset, batch_size=300, shuffle=False, num_workers=2) + file_preds = [] + for batch in loader: + data, labels = batch + data = data.to('cuda') + data = data.view(data.shape[0], -1) + + y_hat, entropy, diversity = f(data) + preds = torch.sum((y_hat - data) ** 2, dim=tuple(range(1, y_hat.dim()))) + + file_preds += preds.cpu().data.tolist() + y_true.append(labels.max().item()) + y_score .append(np.mean(file_preds)) + return roc_auc_score(y_true, y_score) + + + +class MimiiTorchDataset(Dataset): + def __init__(self, path, segment_len, hop, label): + self.path = path + self.segment_len = segment_len + self.m, self.n = str(path).split('_')[-2:] # get spectrogram dimensions + self.n = int(self.n.split('.', 1)[0]) # remove .npy + self.m, self.n = (int(i) for i in (self.m, self.n)) + self.offsets = list(range(0, self.n - segment_len, hop)) + self.label = label + + def __getitem__(self, item): + start = self.offsets[item] + mel_spec = np.load(self.path) + snippet = mel_spec[:, start: start + self.segment_len] + return snippet, self.label + + def __len__(self): + return len(self.offsets) + + +class MimiiTorchTestDataset(MimiiTorchDataset): + def __init__(self, *arg, **kwargs): + super(MimiiTorchTestDataset, self).__init__(*arg, **kwargs) + + def __getitem__(self, item): + x = super.__init__(item) + return x, self.path + + + diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/ae.py b/models/ae.py new file mode 100644 index 0000000..8b0f461 --- /dev/null +++ b/models/ae.py @@ -0,0 +1,93 @@ +import torch +import torch.nn as nn +import torch.functional as F + +class AE(nn.Module): + def __init__(self, in_dim=320): + super(AE, self).__init__() + self.net = nn.Sequential( + nn.Linear(in_dim, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 8), + nn.ReLU(), + nn.Linear(8, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 320), + nn.ReLU(), + ) + + def forward(self, data): + return self.net(data) + + def init_weights(self): + def _weight_init(m): + if hasattr(m, 'weight'): + if isinstance(m.weight, torch.Tensor): + torch.nn.init.xavier_uniform_(m.weight, + gain=nn.init.calculate_gain('relu')) + if hasattr(m, 'bias'): + if isinstance(m.bias, torch.Tensor): + m.bias.data.fill_(0.01) + + self.apply(_weight_init) + + + +class LCAE(nn.Module): + def __init__(self, in_dim=320): + super(LCAE, self).__init__() + num_mem = 10 + mem_size= 8 + self.num_mem = num_mem + self.encode = nn.Sequential( + nn.Linear(in_dim, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, num_mem), + nn.Softmax(-1) + ) + + self.decode = nn.Sequential( + nn.Linear(mem_size, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 320), + nn.ReLU(), + ) + + self.M = nn.Parameter( + torch.randn(num_mem, mem_size) + ) + + def forward(self, data): + alphas = self.encode(data).unsqueeze(-1) + entropy_alphas = (alphas * -alphas.log()).sum(1) + M = self.M.expand(data.shape[0], *self.M.shape) + #print(M.shape, alphas.shape) # torch.Size([128, 4, 8]) torch.Size([128, 4, 1]) + elu = nn.ELU() + weighted = alphas * (1+elu(M+1e-13)) + #print(weighted.shape) + summed = weighted.sum(1) + #print(summed.shape) + decoded = self.decode(summed) + diversity = (alphas.sum(dim=0)/data.shape[0]).max() + #print(alphas[0]) + return decoded, entropy_alphas, diversity + + def init_weights(self): + def _weight_init(m): + if hasattr(m, 'weight'): + if isinstance(m.weight, torch.Tensor): + torch.nn.init.xavier_uniform_(m.weight, + gain=nn.init.calculate_gain('relu')) + if hasattr(m, 'bias'): + if isinstance(m.bias, torch.Tensor): + m.bias.data.fill_(0.01) + + self.apply(_weight_init) \ No newline at end of file diff --git a/models/layers.py b/models/layers.py new file mode 100644 index 0000000..ab012f8 --- /dev/null +++ b/models/layers.py @@ -0,0 +1,27 @@ +import torch +import torch.nn as nn + + +class Subspectrogram(object): + def __init__(self, height, hop_size): + self.height = height + self.hop_size = hop_size + + def __call__(self, sample): + if len(sample.shape) < 3: + sample = sample.unsqueeze(0) + # sample shape: 1 x num_mels x num_frames + sub_specs = [] + for i in range(0, sample.shape[1], self.hop_size): + sub_spec = sample[:, i:i+self.hop_size:,] + sub_specs.append(sub_spec) + return np.concatenate(sub_specs) + + + +if __name__ == '__main__': + import numpy as np + sub_spec_tnfm = Subspectrogram(20, 10) + X = np.random.rand(1, 60, 40) + Y = sub_spec_tnfm(X) + print(f'\t Sub-Spectrogram transformation from shape {X.shape} to {Y.shape}')