import numpy as np import torch import pytorch_lightning as pl import librosa import pandas as pd import variables as v from tqdm import tqdm sr = 16000 wavs = list((v.PRIMATES_Root/'wav').glob('*.wav')) if __name__ == '__main__': durations = [] for wav in tqdm(wavs): duration = librosa.get_duration(filename=str(wav), sr=sr) durations.append(duration) mean_duration, std_duration, min_duration, max_duration = np.mean(durations), np.std(durations), np.min(durations), np.max(durations) print(f'Mean duration: {mean_duration:.3f}s\tstd: {std_duration:.3f}s\tmin: {min_duration:.3f}s\t max: {max_duration:.3f}s') primates_train_csv, primates_devel_csv, primates_test_csv = \ [pd.read_csv(p) for p in [v.PRIMATES_Root / 'lab' / 'train.csv', v.PRIMATES_Root / 'lab' / 'devel.csv', v.PRIMATES_Root / 'lab' / 'test.csv']] csv = pd.concat((primates_train_csv, primates_devel_csv, primates_test_csv)) print(csv.groupby('label').count()) print([np.quantile(durations, q=q) for q in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]])