diff --git a/_paramters.py b/_paramters.py index 36e2cc5..b4ee88f 100644 --- a/_paramters.py +++ b/_paramters.py @@ -21,18 +21,16 @@ main_arg_parser.add_argument("--main_seed", type=int, default=69, help="") main_arg_parser.add_argument("--data_worker", type=int, default=11, help="") main_arg_parser.add_argument("--data_root", type=str, default='data', help="") main_arg_parser.add_argument("--data_class_name", type=str, default='BinaryMasksDataset', help="") -main_arg_parser.add_argument("--data_normalized", type=strtobool, default=True, help="") main_arg_parser.add_argument("--data_use_preprocessed", type=strtobool, default=False, help="") main_arg_parser.add_argument("--data_n_mels", type=int, default=64, help="") main_arg_parser.add_argument("--data_sr", type=int, default=16000, help="") main_arg_parser.add_argument("--data_hop_length", type=int, default=256, help="") main_arg_parser.add_argument("--data_n_fft", type=int, default=512, help="") -main_arg_parser.add_argument("--data_mixup", type=strtobool, default=False, help="") main_arg_parser.add_argument("--data_stretch", type=strtobool, default=True, help="") # Transformation Parameters main_arg_parser.add_argument("--data_loudness_ratio", type=float, default=0, help="") # 0.4 -main_arg_parser.add_argument("--data_shift_ratio", type=float, default=0.3, help="") # 0.4 +main_arg_parser.add_argument("--data_shift_ratio", type=float, default=0.3, help="") # 0.3 main_arg_parser.add_argument("--data_noise_ratio", type=float, default=0, help="") # 0.4 main_arg_parser.add_argument("--data_mask_ratio", type=float, default=0, help="") # 0.2 main_arg_parser.add_argument("--data_speed_ratio", type=float, default=0, help="") # 0.3 @@ -54,7 +52,7 @@ main_arg_parser.add_argument("--train_outpath", type=str, default="output", help main_arg_parser.add_argument("--train_version", type=strtobool, required=False, help="") # FIXME: Stochastic weight Avaraging is not good, maybe its my implementation? main_arg_parser.add_argument("--train_sto_weight_avg", type=strtobool, default=False, help="") -main_arg_parser.add_argument("--train_weight_decay", type=float, default=1e-8, help="") +main_arg_parser.add_argument("--train_weight_decay", type=float, default=1e-7, help="") main_arg_parser.add_argument("--train_opt_reset_interval", type=int, default=0, help="") main_arg_parser.add_argument("--train_epochs", type=int, default=51, help="") main_arg_parser.add_argument("--train_batch_size", type=int, default=300, help="") diff --git a/datasets/binar_masks.py b/datasets/binar_masks.py index 9d147d9..ea4e3f6 100644 --- a/datasets/binar_masks.py +++ b/datasets/binar_masks.py @@ -1,7 +1,6 @@ import pickle from collections import defaultdict from pathlib import Path -import random import librosa as librosa from torch.utils.data import Dataset @@ -19,7 +18,7 @@ class BinaryMasksDataset(Dataset): def sample_shape(self): return self[0][0].shape - def __init__(self, data_root, setting, mel_transforms, transforms=None, mixup=False, stretch_dataset=False, + def __init__(self, data_root, setting, mel_transforms, transforms=None, stretch_dataset=False, use_preprocessed=True): self.use_preprocessed = use_preprocessed self.stretch = stretch_dataset @@ -29,7 +28,6 @@ class BinaryMasksDataset(Dataset): self.data_root = Path(data_root) self.setting = setting - self.mixup = mixup self._wav_folder = self.data_root / 'wav' self._mel_folder = self.data_root / 'mel' self.container_ext = '.pik' @@ -40,19 +38,20 @@ class BinaryMasksDataset(Dataset): self._transforms = transforms or F_x(in_shape=None) def _build_labels(self): + labeldict = dict() with open(Path(self.data_root) / 'lab' / 'labels.csv', mode='r') as f: # Exclude the header _ = next(f) - labeldict = dict() for row in f: if self.setting not in row: continue filename, label = row.strip().split(',') labeldict[filename] = self._to_label[label.lower()] if not self.setting == 'test' else filename if self.stretch and self.setting == V.DATA_OPTIONS.train: - additional_dict = ({f'X_{key}': val for key, val in labeldict.items()}) - additional_dict.update({f'X_X_{key}': val for key, val in labeldict.items()}) - additional_dict.update({f'X_X_X_{key}': val for key, val in labeldict.items()}) + additional_dict = ({f'X{key}': val for key, val in labeldict.items()}) + additional_dict.update({f'XX{key}': val for key, val in labeldict.items()}) + additional_dict.update({f'XXX{key}': val for key, val in labeldict.items()}) + additional_dict.update({f'XXXX{key}': val for key, val in labeldict.items()}) labeldict.update(additional_dict) # Delete File if one exists. @@ -66,12 +65,12 @@ class BinaryMasksDataset(Dataset): return labeldict def __len__(self): - return len(self._labels) * 2 if self.mixup else len(self._labels) + return len(self._labels) def _compute_or_retrieve(self, filename): if not (self._mel_folder / (filename + self.container_ext)).exists(): - raw_sample, sr = librosa.core.load(self._wav_folder / (filename.replace('X_', '') + '.wav')) + raw_sample, sr = librosa.core.load(self._wav_folder / (filename.replace('X', '') + '.wav')) mel_sample = self._mel_transform(raw_sample) self._mel_folder.mkdir(exist_ok=True, parents=True) with (self._mel_folder / (filename + self.container_ext)).open(mode='wb') as f: @@ -82,28 +81,16 @@ class BinaryMasksDataset(Dataset): return mel_sample def __getitem__(self, item): - is_mixed = item >= len(self._labels) - if is_mixed: - item = item - len(self._labels) key: str = list(self._labels.keys())[item] filename = key.replace('.wav', '') mel_sample = self._compute_or_retrieve(filename) label = self._labels[key] - if is_mixed: - label_sec = -1 - while label_sec != self._labels[key]: - key_sec = random.choice(list(self._labels.keys())) - label_sec = self._labels[key_sec] - # noinspection PyUnboundLocalVariable - filename_sec = key_sec[:-4] - mel_sample_sec = self._compute_or_retrieve(filename_sec) - mix_in_border = int(random.random() * mel_sample.shape[-1]) * random.choice([1, -1]) - mel_sample[:, :mix_in_border] = mel_sample_sec[:, :mix_in_border] - transformed_samples = self._transforms(mel_sample) - if not self.setting == 'test': + + if self.setting != V.DATA_OPTIONS.test: + # In test, filenames instead of labels are returned. This is a little hacky though. label = torch.as_tensor(label, dtype=torch.float) return transformed_samples, label diff --git a/main.py b/main.py index 1ca2d99..9f9b34e 100644 --- a/main.py +++ b/main.py @@ -110,6 +110,7 @@ def run_lightning_loop(config_obj): inference_out = f'{parameters}_test_out.csv' from main_inference import prepare_dataloader + import variables as V test_dataloader = prepare_dataloader(config_obj) with (outpath / model_type / parameters / version / inference_out).open(mode='w') as outfile: @@ -118,12 +119,12 @@ def run_lightning_loop(config_obj): from tqdm import tqdm for batch in tqdm(test_dataloader, total=len(test_dataloader)): batch_x, file_name = batch - batch_x = batch_x.unsqueeze(0).to(device='cuda' if model.on_gpu else 'cpu') + batch_x = batch_x.to(device='cuda' if model.on_gpu else 'cpu') y = model(batch_x).main_out - prediction = (y.squeeze() >= 0.5).int().item() - import variables as V - prediction = 'clear' if prediction == V.CLEAR else 'mask' - outfile.write(f'{file_name},{prediction}\n') + predictions = (y >= 0.5).int() + for prediction in predictions: + prediction_text = 'clear' if prediction == V.CLEAR else 'mask' + outfile.write(f'{file_name},{prediction_text}\n') return model diff --git a/main_inference.py b/main_inference.py index 960a718..54473cc 100644 --- a/main_inference.py +++ b/main_inference.py @@ -43,7 +43,8 @@ def prepare_dataloader(config_obj): mel_transforms=mel_transforms, transforms=transforms ) # noinspection PyTypeChecker - return DataLoader(dataset, batch_size=None, num_workers=0, shuffle=False) + return DataLoader(dataset, batch_size=config_obj.train.batch_size, + num_workers=config_obj.data.worker, shuffle=False) def restore_logger_and_model(log_dir): diff --git a/multi_run.py b/multi_run.py index 207e681..e8c9c67 100644 --- a/multi_run.py +++ b/multi_run.py @@ -20,9 +20,9 @@ if __name__ == '__main__': config = MConfig().read_namespace(args) arg_dict = dict() - for seed in range(40, 45): + for seed in range(0, 10): arg_dict.update(main_seed=seed) - for model in ['CC', 'BCMC', 'BCC', 'RCC']: + for model in ['CC']: # , 'BCMC', 'BCC', 'RCC']: arg_dict.update(model_type=model) raw_conf = dict(data_speed_factor=0.0, data_speed_ratio=0.0, data_mask_ratio=0.0, data_noise_ratio=0.0, data_shift_ratio=0.0, data_loudness_ratio=0.0, diff --git a/util/module_mixins.py b/util/module_mixins.py index ae2da15..13fff52 100644 --- a/util/module_mixins.py +++ b/util/module_mixins.py @@ -122,7 +122,8 @@ class BinaryMaskDatasetMixin: mel_transforms = Compose([ # Audio to Mel Transformations AudioToMel(sr=self.params.sr, n_mels=self.params.n_mels, n_fft=self.params.n_fft, - hop_length=self.params.hop_length), MelToImage()]) + hop_length=self.params.hop_length), + MelToImage()]) # Data Augmentations aug_transforms = Compose([ RandomApply([ @@ -132,7 +133,8 @@ class BinaryMaskDatasetMixin: MaskAug(self.params.mask_ratio), ], p=0.6), # Utility - NormalizeLocal(), ToTensor() + NormalizeLocal(), + ToTensor() ]) val_transforms = Compose([NormalizeLocal(), ToTensor()]) @@ -143,7 +145,7 @@ class BinaryMaskDatasetMixin: # TRAIN DATASET train_dataset=BinaryMasksDataset(self.params.root, setting=V.DATA_OPTIONS.train, use_preprocessed=self.params.use_preprocessed, - mixup=self.params.mixup, stretch_dataset=self.params.stretch, + stretch_dataset=self.params.stretch, mel_transforms=mel_transforms_train, transforms=aug_transforms), # VALIDATION DATASET val_train_dataset=BinaryMasksDataset(self.params.root, setting=V.DATA_OPTIONS.train,