Initial ComParE commit, Primate Dataset
This commit is contained in:
130
.idea/workspace.xml
generated
Normal file
130
.idea/workspace.xml
generated
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ChangeListManager">
|
||||||
|
<list default="true" id="2be1f675-29fe-4a7d-9fe6-9e96cd7c8055" name="Default Changelist" comment="">
|
||||||
|
<change afterPath="$PROJECT_DIR$/ml_lib/metrics/attention_rollout.py" afterDir="false" />
|
||||||
|
<change afterPath="$PROJECT_DIR$/ml_lib/utils/_basedatamodule.py" afterDir="false" />
|
||||||
|
<change beforePath="$PROJECT_DIR$/ml_lib/audio_toolset/audio_to_mel_dataset.py" beforeDir="false" afterPath="$PROJECT_DIR$/ml_lib/audio_toolset/audio_to_mel_dataset.py" afterDir="false" />
|
||||||
|
<change beforePath="$PROJECT_DIR$/ml_lib/audio_toolset/mel_dataset.py" beforeDir="false" afterPath="$PROJECT_DIR$/ml_lib/audio_toolset/mel_dataset.py" afterDir="false" />
|
||||||
|
<change beforePath="$PROJECT_DIR$/ml_lib/experiments.py" beforeDir="false" />
|
||||||
|
<change beforePath="$PROJECT_DIR$/ml_lib/metrics/multi_class_classification.py" beforeDir="false" afterPath="$PROJECT_DIR$/ml_lib/metrics/multi_class_classification.py" afterDir="false" />
|
||||||
|
<change beforePath="$PROJECT_DIR$/ml_lib/modules/blocks.py" beforeDir="false" afterPath="$PROJECT_DIR$/ml_lib/modules/blocks.py" afterDir="false" />
|
||||||
|
<change beforePath="$PROJECT_DIR$/ml_lib/modules/util.py" beforeDir="false" afterPath="$PROJECT_DIR$/ml_lib/modules/util.py" afterDir="false" />
|
||||||
|
<change beforePath="$PROJECT_DIR$/ml_lib/utils/logging.py" beforeDir="false" afterPath="$PROJECT_DIR$/ml_lib/utils/logging.py" afterDir="false" />
|
||||||
|
<change beforePath="$PROJECT_DIR$/ml_lib/utils/model_io.py" beforeDir="false" afterPath="$PROJECT_DIR$/ml_lib/utils/model_io.py" afterDir="false" />
|
||||||
|
<change beforePath="$PROJECT_DIR$/ml_lib/utils/tools.py" beforeDir="false" afterPath="$PROJECT_DIR$/ml_lib/utils/tools.py" afterDir="false" />
|
||||||
|
</list>
|
||||||
|
<option name="SHOW_DIALOG" value="false" />
|
||||||
|
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||||
|
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||||
|
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||||
|
</component>
|
||||||
|
<component name="FileTemplateManagerImpl">
|
||||||
|
<option name="RECENT_TEMPLATES">
|
||||||
|
<list>
|
||||||
|
<option value="Python Script" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</component>
|
||||||
|
<component name="Git.Settings">
|
||||||
|
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/ml_lib" />
|
||||||
|
</component>
|
||||||
|
<component name="ProjectId" id="1oTEXjx0b8UPBPmOIGceYxEch8r" />
|
||||||
|
<component name="ProjectLevelVcsManager" settingsEditedManually="true" />
|
||||||
|
<component name="ProjectViewState">
|
||||||
|
<option name="hideEmptyMiddlePackages" value="true" />
|
||||||
|
<option name="showLibraryContents" value="true" />
|
||||||
|
</component>
|
||||||
|
<component name="PropertiesComponent">
|
||||||
|
<property name="RunOnceActivity.OpenProjectViewOnStart" value="true" />
|
||||||
|
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
|
||||||
|
<property name="WebServerToolWindowFactoryState" value="true" />
|
||||||
|
<property name="WebServerToolWindowPanel.toolwindow.highlight.mappings" value="true" />
|
||||||
|
<property name="WebServerToolWindowPanel.toolwindow.highlight.symlinks" value="true" />
|
||||||
|
<property name="WebServerToolWindowPanel.toolwindow.show.date" value="false" />
|
||||||
|
<property name="WebServerToolWindowPanel.toolwindow.show.permissions" value="false" />
|
||||||
|
<property name="WebServerToolWindowPanel.toolwindow.show.size" value="false" />
|
||||||
|
</component>
|
||||||
|
<component name="RunManager" selected="Python.main">
|
||||||
|
<configuration name="main" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
|
||||||
|
<module name="compare_21" />
|
||||||
|
<option name="INTERPRETER_OPTIONS" value="" />
|
||||||
|
<option name="PARENT_ENVS" value="true" />
|
||||||
|
<envs>
|
||||||
|
<env name="PYTHONUNBUFFERED" value="1" />
|
||||||
|
</envs>
|
||||||
|
<option name="SDK_HOME" value="" />
|
||||||
|
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||||
|
<option name="IS_MODULE_SDK" value="true" />
|
||||||
|
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||||
|
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||||
|
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||||
|
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/main.py" />
|
||||||
|
<option name="PARAMETERS" value="" />
|
||||||
|
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||||
|
<option name="EMULATE_TERMINAL" value="false" />
|
||||||
|
<option name="MODULE_MODE" value="false" />
|
||||||
|
<option name="REDIRECT_INPUT" value="false" />
|
||||||
|
<option name="INPUT_FILE" value="" />
|
||||||
|
<method v="2" />
|
||||||
|
</configuration>
|
||||||
|
<configuration name="metadata_readout" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
||||||
|
<module name="compare_21" />
|
||||||
|
<option name="INTERPRETER_OPTIONS" value="" />
|
||||||
|
<option name="PARENT_ENVS" value="true" />
|
||||||
|
<envs>
|
||||||
|
<env name="PYTHONUNBUFFERED" value="1" />
|
||||||
|
</envs>
|
||||||
|
<option name="SDK_HOME" value="" />
|
||||||
|
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/datasets" />
|
||||||
|
<option name="IS_MODULE_SDK" value="true" />
|
||||||
|
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||||
|
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||||
|
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||||
|
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/datasets/metadata_readout.py" />
|
||||||
|
<option name="PARAMETERS" value="" />
|
||||||
|
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||||
|
<option name="EMULATE_TERMINAL" value="false" />
|
||||||
|
<option name="MODULE_MODE" value="false" />
|
||||||
|
<option name="REDIRECT_INPUT" value="false" />
|
||||||
|
<option name="INPUT_FILE" value="" />
|
||||||
|
<method v="2" />
|
||||||
|
</configuration>
|
||||||
|
<recent_temporary>
|
||||||
|
<list>
|
||||||
|
<item itemvalue="Python.metadata_readout" />
|
||||||
|
</list>
|
||||||
|
</recent_temporary>
|
||||||
|
</component>
|
||||||
|
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
|
||||||
|
<component name="TaskManager">
|
||||||
|
<task active="true" id="Default" summary="Default task">
|
||||||
|
<changelist id="2be1f675-29fe-4a7d-9fe6-9e96cd7c8055" name="Default Changelist" comment="" />
|
||||||
|
<created>1613302221903</created>
|
||||||
|
<option name="number" value="Default" />
|
||||||
|
<option name="presentableId" value="Default" />
|
||||||
|
<updated>1613302221903</updated>
|
||||||
|
<workItem from="1613302223434" duration="2570000" />
|
||||||
|
<workItem from="1613305247599" duration="11387000" />
|
||||||
|
</task>
|
||||||
|
<servers />
|
||||||
|
</component>
|
||||||
|
<component name="TypeScriptGeneratedFilesManager">
|
||||||
|
<option name="version" value="3" />
|
||||||
|
</component>
|
||||||
|
<component name="XDebuggerManager">
|
||||||
|
<breakpoint-manager>
|
||||||
|
<breakpoints>
|
||||||
|
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
|
||||||
|
<url>file://$PROJECT_DIR$/main.py</url>
|
||||||
|
<line>11</line>
|
||||||
|
<option name="timeStamp" value="2" />
|
||||||
|
</line-breakpoint>
|
||||||
|
</breakpoints>
|
||||||
|
</breakpoint-manager>
|
||||||
|
</component>
|
||||||
|
<component name="com.intellij.coverage.CoverageDataManagerImpl">
|
||||||
|
<SUITE FILE_PATH="coverage/compare_21$metadata_readout.coverage" NAME="metadata_readout Coverage Results" MODIFIED="1613306122664" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/datasets" />
|
||||||
|
<SUITE FILE_PATH="coverage/compare_21$main.coverage" NAME="main Coverage Results" MODIFIED="1613376576627" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
34
datasets/metadata_readout.py
Normal file
34
datasets/metadata_readout.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import pytorch_lightning as pl
|
||||||
|
import librosa
|
||||||
|
import pandas as pd
|
||||||
|
import variables as v
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
sr = 16000
|
||||||
|
wavs = list((v.PRIMATES_Root/'wav').glob('*.wav'))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
durations = []
|
||||||
|
|
||||||
|
for wav in tqdm(wavs):
|
||||||
|
duration = librosa.get_duration(filename=str(wav), sr=sr)
|
||||||
|
durations.append(duration)
|
||||||
|
|
||||||
|
mean_duration, std_duration, min_duration, max_duration = np.mean(durations), np.std(durations), np.min(durations), np.max(durations)
|
||||||
|
|
||||||
|
print(f'Mean duration: {mean_duration:.3f}s\tstd: {std_duration:.3f}s\tmin: {min_duration:.3f}s\t max: {max_duration:.3f}s')
|
||||||
|
|
||||||
|
primates_train_csv, primates_devel_csv, primates_test_csv = \
|
||||||
|
[pd.read_csv(p) for p in [v.PRIMATES_Root / 'lab' / 'train.csv',
|
||||||
|
v.PRIMATES_Root / 'lab' / 'devel.csv',
|
||||||
|
v.PRIMATES_Root / 'lab' / 'test.csv']]
|
||||||
|
|
||||||
|
csv = pd.concat((primates_train_csv,
|
||||||
|
primates_devel_csv,
|
||||||
|
primates_test_csv))
|
||||||
|
|
||||||
|
print(csv.groupby('label').count())
|
||||||
|
print([np.quantile(durations, q=q) for q in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]])
|
||||||
121
datasets/primates_librosa_datamodule.py
Normal file
121
datasets/primates_librosa_datamodule.py
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
from multiprocessing.pool import ApplyResult
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from torch.utils.data import DataLoader, ConcatDataset
|
||||||
|
from torchvision.transforms import Compose, RandomApply
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from ml_lib.audio_toolset.audio_io import NormalizeLocal
|
||||||
|
from ml_lib.audio_toolset.audio_to_mel_dataset import LibrosaAudioToMelDataset
|
||||||
|
from ml_lib.audio_toolset.mel_augmentation import NoiseInjection, LoudnessManipulator, ShiftTime, MaskAug
|
||||||
|
from ml_lib.utils._basedatamodule import _BaseDataModule, DATA_OPTION_test, DATA_OPTION_train, DATA_OPTION_devel
|
||||||
|
from ml_lib.utils.transforms import ToTensor
|
||||||
|
import multiprocessing as mp
|
||||||
|
|
||||||
|
|
||||||
|
data_options = [DATA_OPTION_test, DATA_OPTION_train, DATA_OPTION_devel]
|
||||||
|
class_names = {key: val for val, key in enumerate(['background', 'chimpanze', 'geunon', 'mandrille', 'redcap'])}
|
||||||
|
|
||||||
|
|
||||||
|
class PrimatesLibrosaDatamodule(_BaseDataModule):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mel_folder(self):
|
||||||
|
return self.root / 'mel_folder'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def wav_folder(self):
|
||||||
|
return self.root / 'wav'
|
||||||
|
|
||||||
|
def __init__(self, root, batch_size, num_worker, sr, n_mels, n_fft, hop_length,
|
||||||
|
sample_segment_len=40, sample_hop_len=15):
|
||||||
|
super(PrimatesLibrosaDatamodule, self).__init__()
|
||||||
|
self.sample_hop_len = sample_hop_len
|
||||||
|
self.sample_segment_len = sample_segment_len
|
||||||
|
self.num_worker = num_worker
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.root = Path(root) / 'primates'
|
||||||
|
|
||||||
|
# Mel Transforms - will be pushed with all other paramters by self.__dict__ to subdataset-class
|
||||||
|
self.mel_kwargs = dict(sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
|
||||||
|
|
||||||
|
# Utility
|
||||||
|
self.utility_transforms = Compose([NormalizeLocal(), ToTensor()])
|
||||||
|
|
||||||
|
# Data Augmentations
|
||||||
|
self.mel_augmentations = Compose([
|
||||||
|
# ToDo: HP Search this parameters, make it adjustable from outside
|
||||||
|
RandomApply([NoiseInjection(0.2)], p=0.3),
|
||||||
|
RandomApply([LoudnessManipulator(0.5)], p=0.3),
|
||||||
|
RandomApply([ShiftTime(0.4)], p=0.3),
|
||||||
|
RandomApply([MaskAug(0.2)], p=0.3),
|
||||||
|
self.utility_transforms])
|
||||||
|
|
||||||
|
def train_dataloader(self):
|
||||||
|
return DataLoader(dataset=self.datasets[DATA_OPTION_train], shuffle=True,
|
||||||
|
batch_size=self.batch_size, pin_memory=True,
|
||||||
|
num_workers=self.num_worker)
|
||||||
|
|
||||||
|
# Validation Dataloader
|
||||||
|
def val_dataloader(self):
|
||||||
|
return DataLoader(dataset=self.datasets[DATA_OPTION_devel], shuffle=False, pin_memory=True,
|
||||||
|
batch_size=self.batch_size, num_workers=self.num_worker)
|
||||||
|
|
||||||
|
# Test Dataloader
|
||||||
|
def test_dataloader(self):
|
||||||
|
return DataLoader(dataset=self.datasets[DATA_OPTION_test], shuffle=False,
|
||||||
|
batch_size=self.batch_size, pin_memory=True,
|
||||||
|
num_workers=self.num_worker)
|
||||||
|
|
||||||
|
def _build_subdataset(self, row, build=False):
|
||||||
|
slice_file_name, class_name = row.strip().split(',')
|
||||||
|
class_id = class_names.get(class_name, -1)
|
||||||
|
audio_file_path = self.wav_folder / slice_file_name
|
||||||
|
# DATA OPTION DIFFERENTIATION !!!!!!!!!!! - Begin
|
||||||
|
kwargs = self.__dict__
|
||||||
|
if any([x in slice_file_name for x in [DATA_OPTION_devel, DATA_OPTION_test]]):
|
||||||
|
kwargs.update(mel_augmentations=self.utility_transforms)
|
||||||
|
# DATA OPTION DIFFERENTIATION !!!!!!!!!!! - End
|
||||||
|
mel_dataset = LibrosaAudioToMelDataset(audio_file_path, class_id, **kwargs)
|
||||||
|
if build:
|
||||||
|
assert mel_dataset.build_mel()
|
||||||
|
return mel_dataset
|
||||||
|
|
||||||
|
def prepare_data(self, *args, **kwargs):
|
||||||
|
datasets = dict()
|
||||||
|
for data_option in data_options:
|
||||||
|
with open(Path(self.root) / 'lab' / f'{data_option}.csv', mode='r') as f:
|
||||||
|
# Exclude the header
|
||||||
|
_ = next(f)
|
||||||
|
all_rows = list(f)
|
||||||
|
chunksize = len(all_rows) // max(self.num_worker, 1)
|
||||||
|
dataset = list()
|
||||||
|
with mp.Pool(processes=self.num_worker) as pool:
|
||||||
|
pbar = tqdm(total=len(all_rows))
|
||||||
|
|
||||||
|
def update():
|
||||||
|
pbar.update(chunksize)
|
||||||
|
from itertools import repeat
|
||||||
|
results = pool.starmap_async(self._build_subdataset, zip(all_rows, repeat(True, len(all_rows))),
|
||||||
|
chunksize=chunksize)
|
||||||
|
for sub_dataset in results.get():
|
||||||
|
dataset.append(sub_dataset)
|
||||||
|
tqdm.update() # FIXME: will i ever get this to work?
|
||||||
|
datasets[data_option] = ConcatDataset(dataset)
|
||||||
|
self.datasets = datasets
|
||||||
|
return datasets
|
||||||
|
|
||||||
|
def setup(self, stag=None):
|
||||||
|
datasets = dict()
|
||||||
|
for data_option in data_options:
|
||||||
|
with open(Path(self.root) / 'lab' / f'{data_option}.csv', mode='r') as f:
|
||||||
|
# Exclude the header
|
||||||
|
_ = next(f)
|
||||||
|
all_rows = list(f)
|
||||||
|
dataset = list()
|
||||||
|
for row in all_rows:
|
||||||
|
dataset.append(self._build_subdataset(row))
|
||||||
|
datasets[data_option] = ConcatDataset(dataset)
|
||||||
|
self.datasets = datasets
|
||||||
|
return datasets
|
||||||
12
main.py
Normal file
12
main.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import variables as v
|
||||||
|
from datasets.primates_librosa_datamodule import PrimatesLibrosaDatamodule
|
||||||
|
|
||||||
|
data_root = Path() / 'data'
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
dataset = PrimatesLibrosaDatamodule(data_root, batch_size=25, num_worker=6,
|
||||||
|
sr=v.sr, n_mels=64, n_fft=512, hop_length=256)
|
||||||
|
dataset.prepare_data()
|
||||||
|
print('done')
|
||||||
8
variables.py
Normal file
8
variables.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
sr = 16000
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
PRIMATES_Root = Path(__file__).parent / 'data' / 'primates'
|
||||||
Reference in New Issue
Block a user