35 lines
1.2 KiB
Python
35 lines
1.2 KiB
Python
import numpy as np
|
|
import torch
|
|
import pytorch_lightning as pl
|
|
import librosa
|
|
import pandas as pd
|
|
import variables as v
|
|
from tqdm import tqdm
|
|
|
|
sr = 16000
|
|
wavs = list((v.PRIMATES_Root/'wav').glob('*.wav'))
|
|
|
|
if __name__ == '__main__':
|
|
|
|
durations = []
|
|
|
|
for wav in tqdm(wavs):
|
|
duration = librosa.get_duration(filename=str(wav), sr=sr)
|
|
durations.append(duration)
|
|
|
|
mean_duration, std_duration, min_duration, max_duration = np.mean(durations), np.std(durations), np.min(durations), np.max(durations)
|
|
|
|
print(f'Mean duration: {mean_duration:.3f}s\tstd: {std_duration:.3f}s\tmin: {min_duration:.3f}s\t max: {max_duration:.3f}s')
|
|
|
|
primates_train_csv, primates_devel_csv, primates_test_csv = \
|
|
[pd.read_csv(p) for p in [v.PRIMATES_Root / 'lab' / 'train.csv',
|
|
v.PRIMATES_Root / 'lab' / 'devel.csv',
|
|
v.PRIMATES_Root / 'lab' / 'test.csv']]
|
|
|
|
csv = pd.concat((primates_train_csv,
|
|
primates_devel_csv,
|
|
primates_test_csv))
|
|
|
|
print(csv.groupby('label').count())
|
|
print([np.quantile(durations, q=q) for q in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]])
|