Mel_Vision_Transformer_ComP.../notebooks/Dataset Analysis.ipynb

298 lines
18 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "crazy-burden",
"metadata": {
"pycharm": {
"name": "#%% Imports\n"
}
},
"outputs": [],
"source": [
"from collections import defaultdict\n",
"from pathlib import Path\n",
"\n",
"import numpy as np\n",
"import torch\n",
"import pytorch_lightning as pl\n",
"import librosa\n",
"import pandas as pd\n",
"import variables as v\n",
"import seaborn as sns\n",
"from tqdm import tqdm\n",
"from matplotlib import pyplot as plt\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ongoing-darkness",
"metadata": {
"pycharm": {
"name": "#%% Variables\n"
}
},
"outputs": [],
"source": [
"_ROOT = Path('..')\n",
"output = Path('..') / Path('output')\n",
"sr = 16000\n",
"_root = v.CCS_Root\n",
"roots = [v.CCS_Root, v.PRIMATES_Root]\n",
"ext = 'pdf'\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "friendly-tribute",
"metadata": {
"pycharm": {
"name": "#%% Util Functions\n"
}
},
"outputs": [],
"source": [
"def print_stats(data_option, mean_duration, std_duration, min_duration, max_duration):\n",
" print(f'For {data_option}; statistics are:')\n",
" print(f'Mean duration: {mean_duration:.3f}s\\tstd: {std_duration:.3f}s '\n",
" f'min: {min_duration:.3f}s\\t max: {max_duration:.3f}s')\n",
"\n",
"def show_and_save(data_option, title):\n",
" plt.tight_layout()\n",
" figure = plt.gcf()\n",
" plt.show()\n",
" figure.savefig(f'figures/{data_option}_{title.lower().replace(\" \", \"_\")}.{ext}', format=ext, )"
]
},
{
"cell_type": "code",
"execution_count": 4,
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[1;31mKeyboardInterrupt\u001B[0m Traceback (most recent call last)",
"\u001B[1;32m<ipython-input-4-fa304702b81b>\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[0;32m 7\u001B[0m \u001B[0mdurations_per_class\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mdefaultdict\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mlist\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 8\u001B[0m \u001B[1;32mfor\u001B[0m \u001B[0midx\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mrow\u001B[0m \u001B[1;32min\u001B[0m \u001B[0mlab_csv\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0miterrows\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m----> 9\u001B[1;33m \u001B[0mduration\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mlibrosa\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mget_duration\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mfilename\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0mstr\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mroot\u001B[0m \u001B[1;33m/\u001B[0m \u001B[1;34m'wav'\u001B[0m \u001B[1;33m/\u001B[0m \u001B[0mrow\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mfilename\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0msr\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0msr\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 10\u001B[0m \u001B[0mdurations\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mappend\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mduration\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 11\u001B[0m \u001B[0mlabels\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mappend\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mrow\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mlabel\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
"\u001B[1;32mc:\\users\\steff\\envs\\compare_21\\lib\\site-packages\\librosa\\core\\audio.py\u001B[0m in \u001B[0;36mget_duration\u001B[1;34m(y, sr, S, n_fft, hop_length, center, filename)\u001B[0m\n\u001B[0;32m 677\u001B[0m \u001B[1;32mif\u001B[0m \u001B[0mfilename\u001B[0m \u001B[1;32mis\u001B[0m \u001B[1;32mnot\u001B[0m \u001B[1;32mNone\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 678\u001B[0m \u001B[1;32mtry\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 679\u001B[1;33m \u001B[1;32mreturn\u001B[0m \u001B[0msf\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0minfo\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mfilename\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mduration\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 680\u001B[0m \u001B[1;32mexcept\u001B[0m \u001B[0mRuntimeError\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 681\u001B[0m \u001B[1;32mwith\u001B[0m \u001B[0maudioread\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0maudio_open\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mfilename\u001B[0m\u001B[1;33m)\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mfdesc\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
"\u001B[1;32mc:\\users\\steff\\envs\\compare_21\\lib\\site-packages\\soundfile.py\u001B[0m in \u001B[0;36minfo\u001B[1;34m(file, verbose)\u001B[0m\n\u001B[0;32m 436\u001B[0m \u001B[0mWhether\u001B[0m \u001B[0mto\u001B[0m \u001B[0mprint\u001B[0m \u001B[0madditional\u001B[0m \u001B[0minformation\u001B[0m\u001B[1;33m.\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 437\u001B[0m \"\"\"\n\u001B[1;32m--> 438\u001B[1;33m \u001B[1;32mreturn\u001B[0m \u001B[0m_SoundFileInfo\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mfile\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mverbose\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 439\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 440\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n",
"\u001B[1;32mc:\\users\\steff\\envs\\compare_21\\lib\\site-packages\\soundfile.py\u001B[0m in \u001B[0;36m__init__\u001B[1;34m(self, file, verbose)\u001B[0m\n\u001B[0;32m 381\u001B[0m \u001B[1;32mdef\u001B[0m \u001B[0m__init__\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mself\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mfile\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mverbose\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 382\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mverbose\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mverbose\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 383\u001B[1;33m \u001B[1;32mwith\u001B[0m \u001B[0mSoundFile\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mfile\u001B[0m\u001B[1;33m)\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mf\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 384\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mname\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mf\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mname\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 385\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0msamplerate\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mf\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0msamplerate\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
"\u001B[1;32mc:\\users\\steff\\envs\\compare_21\\lib\\site-packages\\soundfile.py\u001B[0m in \u001B[0;36m__init__\u001B[1;34m(self, file, mode, samplerate, channels, subtype, endian, format, closefd)\u001B[0m\n\u001B[0;32m 627\u001B[0m self._info = _create_info_struct(file, mode, samplerate, channels,\n\u001B[0;32m 628\u001B[0m format, subtype, endian)\n\u001B[1;32m--> 629\u001B[1;33m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_file\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_open\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mfile\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mmode_int\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mclosefd\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 630\u001B[0m \u001B[1;32mif\u001B[0m \u001B[0mset\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mmode\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0missuperset\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;34m'r+'\u001B[0m\u001B[1;33m)\u001B[0m \u001B[1;32mand\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mseekable\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 631\u001B[0m \u001B[1;31m# Move write position to 0 (like in Python file objects)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
"\u001B[1;32mc:\\users\\steff\\envs\\compare_21\\lib\\site-packages\\soundfile.py\u001B[0m in \u001B[0;36m_open\u001B[1;34m(self, file, mode_int, closefd)\u001B[0m\n\u001B[0;32m 1173\u001B[0m \u001B[1;32melse\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 1174\u001B[0m \u001B[0mfile\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mfile\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mencode\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0m_sys\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mgetfilesystemencoding\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m-> 1175\u001B[1;33m \u001B[0mfile_ptr\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mopenfunction\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mfile\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mmode_int\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_info\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 1176\u001B[0m \u001B[1;32melif\u001B[0m \u001B[0misinstance\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mfile\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mint\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 1177\u001B[0m \u001B[0mfile_ptr\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0m_snd\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0msf_open_fd\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mfile\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mmode_int\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_info\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mclosefd\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
"\u001B[1;31mKeyboardInterrupt\u001B[0m: "
]
}
],
"source": [
"data = defaultdict(dict)\n",
"for root in roots:\n",
" for data_option in ['train', 'devel', 'test']:\n",
" lab_csv = pd.read_csv(root / 'lab' / f'{data_option}.csv')\n",
" labels = []\n",
" durations = []\n",
" durations_per_class = defaultdict(list)\n",
" for idx, row in lab_csv.iterrows():\n",
" duration = librosa.get_duration(filename=str(root / 'wav' / row.filename), sr=sr)\n",
" durations.append(duration)\n",
" labels.append(row.label)\n",
" durations_per_class[row.label].append(duration)\n",
" durations_per_class = dict(durations_per_class)\n",
" data[f'{root.name}_{data_option}'].update(durations=durations, labels=labels, durations_per_class=durations_per_class)\n",
"\n",
"data = dict(data)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% Prepare Data\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"for data_option in data.keys():\n",
" # For all files in data_option\n",
" durations = data[data_option]['durations']\n",
" print_stats(data_option, np.mean(durations), np.std(durations), np.min(durations), np.max(durations))\n",
"\n",
" # Per class in data_option\n",
" durations_per_class = data[data_option]['durations_per_class']\n",
" for label in durations_per_class.keys():\n",
" cls_dur = durations_per_class[label]\n",
" print_stats(label, np.mean(cls_dur), np.std(cls_dur), np.min(cls_dur), np.max(cls_dur))\n",
" print('----------------------######################-----------------\\n')"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n",
"is_executing": true
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"plt.style.use('default')\n",
"sns.set_palette('Dark2')\n",
"font_size = 16\n",
"tex_fonts = {\n",
" # Use LaTeX to write all text\n",
" \"text.usetex\": True,\n",
" \"font.family\": \"serif\",\n",
" # Use 10pt font in plots, to match 10pt font in document\n",
" \"axes.labelsize\": font_size,\n",
" \"font.size\": font_size,\n",
" # Make the legend/label fonts a little smaller\n",
" \"legend.fontsize\": font_size-2,\n",
" \"xtick.labelsize\": font_size-2,\n",
" \"ytick.labelsize\": font_size-2\n",
"}\n",
"\n",
"plt.rcParams.update(tex_fonts)\n",
"\n",
"Path('figures').mkdir(exist_ok=True)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% Seaborn Settings\n",
"is_executing": true
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"for root in roots:\n",
" for data_option in ['train', 'devel']:\n",
" title = 'durations per class'\n",
" key = f'{root.name}_{data_option}'\n",
" test_key = f'{root.name}_test'\n",
"\n",
" # Create the pandas DataFrame\n",
" df = pd.DataFrame().from_dict({key:val for key, val in data[key].items() if key!='durations_per_class'})\n",
" df_test = pd.DataFrame().from_dict({key:val for key, val in data[test_key].items() if key!='durations_per_class'})\n",
" df = df.append(df_test)\n",
"\n",
" # Plotting\n",
" histplot = sns.histplot(\n",
" data=df, x=\"durations\", hue=\"labels\",\n",
" hue_order=sorted((*data[key]['durations_per_class'].keys(), '?')),\n",
" log_scale=False, element=\"step\", fill=False, bins='auto',\n",
" cumulative=False, stat=\"density\", common_norm=False,\n",
" )\n",
" #histplot.set(title=f'{data_option}-dataset {title}')\n",
"\n",
" show_and_save(key, title)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% Histogram plotting for durations\n",
"is_executing": true
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"for data_option in data.keys():\n",
" title = 'counts per class'\n",
" # Create the pandas DataFrame\n",
" df = pd.DataFrame().from_dict({key:val for key, val in data[data_option].items() if key!='durations_per_class'})\n",
" # Plotting\n",
" print(df.groupby('labels').count())\n",
" plot = sns.countplot(\n",
" data=df.sort_values('labels'), x='labels',# hue=\"labels\",\n",
" # hue_order=sorted(data[data_option]['durations_per_class'].keys()),\n",
"\n",
" )\n",
" #plot.set(title=f'{data_option}-dataset {title}')\n",
"\n",
" for p in plot.patches:\n",
" y_mod = 150 if 'CCS' in data_option else 300\n",
" plot.annotate(format(round(p.get_height()), '.0f'),\n",
" (p.get_x() + p.get_width() / 2., p.get_height()),\n",
" ha = 'center', va = 'center',\n",
" size=12,\n",
" xytext = (0, (-12) if p.get_height() > y_mod else (10)),\n",
" textcoords = 'offset points')\n",
"\n",
" show_and_save(data_option, title)\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% Histogram plotting for class Count\n",
"is_executing": true
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"for data_option in data.keys():\n",
" title = f'KDE-plot'\n",
" # Create the pandas DataFrame\n",
" df = pd.DataFrame().from_dict({key:val for key, val in data[data_option].items() if key!='durations_per_class'})\n",
" # Plotting\n",
" kdeplot = sns.kdeplot(\n",
" data=df, x=\"durations\", hue=\"labels\",\n",
" hue_order=sorted(data[data_option]['durations_per_class'].keys()),\n",
" # log_scale=False, element=\"step\", fill=False,\n",
" # cumulative=False, stat=\"density\", common_norm=False,\n",
" # kde=True\n",
" )\n",
" # kdeplot.set(title=f'{data_option}-dataset {title}')\n",
"\n",
" show_and_save(data_option, title)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% Histogram plotting for durations\n",
"is_executing": true
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}