diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c342fd8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +# my own stuff + +/data +/.idea +/ml_lib \ No newline at end of file diff --git a/datasets/__init__.py b/datasets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/datasets/binar_masks.py b/datasets/binar_masks.py new file mode 100644 index 0000000..a475d9c --- /dev/null +++ b/datasets/binar_masks.py @@ -0,0 +1,53 @@ +from collections import defaultdict +from pathlib import Path + +import librosa as librosa +from torch.utils.data import Dataset + +import variables as V + + +class BinaryMasks(Dataset): + _to_label = defaultdict(-1) + _to_label['clear'] = V.CLEAR + _to_label['mask'] = V.MASK + + def __init__(self, data_root, setting): + assert isinstance(setting, str) + assert setting in ['test', 'devel', 'train'] + super(BinaryMasks, self).__init__() + + self.data_root = Path(data_root) + self.setting = setting + self._labels = self._build_labels() + self._wav_folder = self.data_root / 'wav' + self._files = list(sorted(self._labels.keys())) + + def _build_labels(self): + with open(Path(self.data_root) / 'lab' / 'labels.csv', mode='r') as f: + # Exclude the header + _ = next(f) + labeldict = dict() + for row in f: + if self.setting not in row: + continue + filename, label = row.split(',') + labeldict[filename] = self._to_label[label.lower()] + return labeldict + + def __len__(self): + return len(self._labels) + + def __getitem__(self, item): + key = self._files[item] + sample = librosa.core.load(self._wav_folder / self._files[key]) + label = self._labels[key] + return sample, label + + + + + + + + diff --git a/main.py b/main.py new file mode 100644 index 0000000..ac4232c --- /dev/null +++ b/main.py @@ -0,0 +1,141 @@ +# Imports +# ============================================================================= +import os +from distutils.util import strtobool +from pathlib import Path +from argparse import ArgumentParser, Namespace + +import warnings + +import torch +from pytorch_lightning import Trainer +from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping +from torch.utils.data import DataLoader + +from ml_lib.modules.utils import LightningBaseModule +from ml_lib.utils.config import Config +from ml_lib.utils.logging import Logger +from ml_lib.utils.model_io import SavedLightningModels + +warnings.filterwarnings('ignore', category=FutureWarning) +warnings.filterwarnings('ignore', category=UserWarning) + +_ROOT = Path(__file__).parent + +# Parameter Configuration +# ============================================================================= +# Argument Parser +main_arg_parser = ArgumentParser(description="parser for fast-neural-style") + +# Main Parameters +main_arg_parser.add_argument("--main_debug", type=strtobool, default=False, help="") +main_arg_parser.add_argument("--main_eval", type=strtobool, default=True, help="") +main_arg_parser.add_argument("--main_seed", type=int, default=69, help="") + +# Data Parameters +main_arg_parser.add_argument("--data_worker", type=int, default=10, help="") +main_arg_parser.add_argument("--data_dataset_length", type=int, default=10000, help="") +main_arg_parser.add_argument("--data_root", type=str, default='data', help="") +main_arg_parser.add_argument("--data_normalized", type=strtobool, default=True, help="") +main_arg_parser.add_argument("--data_use_preprocessed", type=strtobool, default=True, help="") + +# Transformation Parameters +main_arg_parser.add_argument("--transformations_to_tensor", type=strtobool, default=False, help="") + +# Training Parameters +main_arg_parser.add_argument("--train_outpath", type=str, default="output", help="") +main_arg_parser.add_argument("--train_version", type=strtobool, required=False, help="") +main_arg_parser.add_argument("--train_epochs", type=int, default=500, help="") +main_arg_parser.add_argument("--train_batch_size", type=int, default=200, help="") +main_arg_parser.add_argument("--train_lr", type=float, default=1e-3, help="") +main_arg_parser.add_argument("--train_num_sanity_val_steps", type=int, default=0, help="") + +# Model Parameters +main_arg_parser.add_argument("--model_type", type=str, default="BinaryClassifier", help="") +main_arg_parser.add_argument("--model_activation", type=str, default="leaky_relu", help="") +main_arg_parser.add_argument("--model_filters", type=str, default="[16, 32, 64]", help="") +main_arg_parser.add_argument("--model_classes", type=int, default=2, help="") +main_arg_parser.add_argument("--model_lat_dim", type=int, default=16, help="") +main_arg_parser.add_argument("--model_use_bias", type=strtobool, default=True, help="") +main_arg_parser.add_argument("--model_use_norm", type=strtobool, default=False, help="") +main_arg_parser.add_argument("--model_dropout", type=float, default=0.00, help="") + +# Project Parameters +main_arg_parser.add_argument("--project_name", type=str, default=_ROOT.parent.name, help="") +main_arg_parser.add_argument("--project_owner", type=str, default='si11ium', help="") +main_arg_parser.add_argument("--project_neptune_key", type=str, default=os.getenv('NEPTUNE_KEY'), help="") + +# Parse it +args: Namespace = main_arg_parser.parse_args() + + +def run_lightning_loop(config_obj): + + # Logging + # ================================================================================ + # Logger + with Logger(config_obj) as logger: + # Callbacks + # ============================================================================= + # Checkpoint Saving + checkpoint_callback = ModelCheckpoint( + filepath=str(logger.log_dir / 'ckpt_weights'), + verbose=True, save_top_k=0, + ) + + # ============================================================================= + # Early Stopping + # TODO: For This to work, set a validation step and End Eval and Score + early_stopping_callback = EarlyStopping( + monitor='val_loss', + min_delta=0.0, + patience=0, + ) + + # Dataset and Dataloaders + # ============================================================================= + # Train Dataset + from datasets.binar_masks import BinaryMasks + dataset = BinaryMasks(config_obj.data.root, setting='train') + # Train Dataloader + dataloader = DataLoader(dataset) + + # Model + # ============================================================================= + # Build and Init its Weights + model: LightningBaseModule = config_obj.build_and_init_model(weight_init_function=torch.nn.init.xavier_normal_) + + # Trainer + # ============================================================================= + trainer = Trainer(max_epochs=config_obj.train.epochs, + show_progress_bar=True, + weights_save_path=logger.log_dir, + gpus=[0] if torch.cuda.is_available() else None, + check_val_every_n_epoch=10, + # num_sanity_val_steps=config_obj.train.num_sanity_val_steps, + # row_log_interval=(model.n_train_batches * 0.1), # TODO: Better Value / Setting + # log_save_interval=(model.n_train_batches * 0.2), # TODO: Better Value / Setting + checkpoint_callback=checkpoint_callback, + logger=logger, + fast_dev_run=config_obj.main.debug, + early_stop_callback=None + ) + + # Train It + trainer.fit(model) + + # Save the last state & all parameters + trainer.save_checkpoint(logger.log_dir / 'weights.ckpt') + model.save_to_disk(logger.log_dir) + + # Evaluate It + if config_obj.main.eval: + trainer.test() + + return model + + +if __name__ == "__main__": + + config = Config.read_namespace(args) + trained_model = run_lightning_loop(config) diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/binary_classifier.py b/models/binary_classifier.py new file mode 100644 index 0000000..3840a9e --- /dev/null +++ b/models/binary_classifier.py @@ -0,0 +1,47 @@ +import torch +from torch import nn +from torch.optim import Adam + +from ml_lib.modules.blocks import ConvModule +from ml_lib.modules.utils import LightningBaseModule + + +class BinaryClassifier(LightningBaseModule): + @classmethod + def name(cls): + return cls.__name__ + + def configure_optimizers(self): + return Adam(lr=self.hparams.train.lr) + + def training_step(self, batch_xy, batch_nb, *args, **kwargs): + batch_x, batch_y = batch_xy + y = self(batch_y) + loss = self.criterion(y, batch_y) + return dict(loss=loss) + + def validation_step(self, batch_xy, **kwargs): + batch_x, batch_y = batch_xy + y = self(batch_y) + val_loss = self.criterion(y, batch_y) + return dict(val_loss=val_loss) + + def validation_epoch_end(self, outputs): + over_all_val_loss = torch.mean(torch.stack([output['val_loss'] for output in outputs])) + + def __init__(self, hparams): + super(BinaryClassifier, self).__init__(hparams) + self.criterion = nn.BCELoss() + + # Additional parameters + self.in_shape = () + + # Model Modules + self.conv_1 = ConvModule(self.in_shape, 32, 5, ) + self.conv_2 = ConvModule(64) + self.conv_3 = ConvModule(128) + + def forward(self, batch, **kwargs): + return batch + + diff --git a/variables.py b/variables.py new file mode 100644 index 0000000..2a3838b --- /dev/null +++ b/variables.py @@ -0,0 +1,3 @@ +# Labels +CLEAR = 0 +MASK = 1