2021-02-15 09:26:21 +01:00

35 lines
1.2 KiB
Python

import numpy as np
import torch
import pytorch_lightning as pl
import librosa
import pandas as pd
import variables as v
from tqdm import tqdm
sr = 16000
wavs = list((v.PRIMATES_Root/'wav').glob('*.wav'))
if __name__ == '__main__':
durations = []
for wav in tqdm(wavs):
duration = librosa.get_duration(filename=str(wav), sr=sr)
durations.append(duration)
mean_duration, std_duration, min_duration, max_duration = np.mean(durations), np.std(durations), np.min(durations), np.max(durations)
print(f'Mean duration: {mean_duration:.3f}s\tstd: {std_duration:.3f}s\tmin: {min_duration:.3f}s\t max: {max_duration:.3f}s')
primates_train_csv, primates_devel_csv, primates_test_csv = \
[pd.read_csv(p) for p in [v.PRIMATES_Root / 'lab' / 'train.csv',
v.PRIMATES_Root / 'lab' / 'devel.csv',
v.PRIMATES_Root / 'lab' / 'test.csv']]
csv = pd.concat((primates_train_csv,
primates_devel_csv,
primates_test_csv))
print(csv.groupby('label').count())
print([np.quantile(durations, q=q) for q in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]])