{
"cells": [
{
"cell_type": "code",
"execution_count": 19,
"id": "27deb847",
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "markdown",
"id": "2536ab2b",
"metadata": {},
"source": [
"# Import Deps"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "9bbb376c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: librosa in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (0.10.0)\n",
"Requirement already satisfied: soxr>=0.3.2 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from librosa) (0.3.5)\n",
"Requirement already satisfied: lazy-loader>=0.1 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from librosa) (0.1)\n",
"Requirement already satisfied: scikit-learn>=0.20.0 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from librosa) (1.2.2)\n",
"Requirement already satisfied: joblib>=0.14 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from librosa) (1.1.1)\n",
"Requirement already satisfied: audioread>=2.1.9 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from librosa) (3.0.0)\n",
"Requirement already satisfied: decorator>=4.3.0 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from librosa) (5.1.1)\n",
"Requirement already satisfied: soundfile>=0.12.1 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from librosa) (0.12.1)\n",
"Requirement already satisfied: numba>=0.51.0 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from librosa) (0.56.4)\n",
"Requirement already satisfied: pooch>=1.0 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from librosa) (1.4.0)\n",
"Requirement already satisfied: msgpack>=1.0 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from librosa) (1.0.3)\n",
"Requirement already satisfied: numpy>=1.20.3 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from librosa) (1.23.5)\n",
"Requirement already satisfied: scipy>=1.2.0 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from librosa) (1.10.0)\n",
"Requirement already satisfied: typing-extensions>=4.1.1 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from librosa) (4.5.0)\n",
"Requirement already satisfied: setuptools in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from numba>=0.51.0->librosa) (66.0.0)\n",
"Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from numba>=0.51.0->librosa) (0.39.1)\n",
"Requirement already satisfied: appdirs in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from pooch>=1.0->librosa) (1.4.4)\n",
"Requirement already satisfied: packaging in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from pooch>=1.0->librosa) (23.0)\n",
"Requirement already satisfied: requests in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from pooch>=1.0->librosa) (2.29.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from scikit-learn>=0.20.0->librosa) (2.2.0)\n",
"Requirement already satisfied: cffi>=1.0 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from soundfile>=0.12.1->librosa) (1.15.1)\n",
"Requirement already satisfied: pycparser in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from cffi>=1.0->soundfile>=0.12.1->librosa) (2.21)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from requests->pooch>=1.0->librosa) (1.26.15)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from requests->pooch>=1.0->librosa) (2.0.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from requests->pooch>=1.0->librosa) (2023.5.7)\n",
"Requirement already satisfied: idna<4,>=2.5 in /Users/amanmibra/anaconda3/envs/void/lib/python3.9/site-packages (from requests->pooch>=1.0->librosa) (3.4)\n"
]
}
],
"source": [
"!pip install librosa"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2019a9c0",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('..')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "24b482dc",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import matplotlib.pyplot as plt\n",
"from IPython.display import Audio\n",
"import numpy as np\n",
"import librosa\n",
"\n",
"# torch\n",
"import torch\n",
"import torchaudio\n",
"from torch.utils.data import DataLoader\n",
"\n",
"# model training\n",
"from cnn import CNNetwork\n",
"from dataset import VoiceDataset"
]
},
{
"cell_type": "markdown",
"id": "2bf224a0",
"metadata": {},
"source": [
"# Test Dataset"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "77a0394b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using cpu device.\n"
]
}
],
"source": [
"if torch.cuda.is_available():\n",
" device = \"cuda\"\n",
"else:\n",
" device = \"cpu\"\n",
"print(f\"Using {device} device.\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a52cde0b",
"metadata": {},
"outputs": [],
"source": [
"DATA_PATH = os.path.join('..', 'data')\n",
"TEST_PATH = os.path.join(DATA_PATH, 'test')\n",
"SAMPLE_RATE = 48000\n",
"MEL_SPEC = torchaudio.transforms.MelSpectrogram(\n",
" sample_rate=SAMPLE_RATE,\n",
" n_fft=2048,\n",
" hop_length=512,\n",
" n_mels=128\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "5c1d4624",
"metadata": {},
"outputs": [],
"source": [
"test_dataset = VoiceDataset(TEST_PATH, MEL_SPEC, device, SAMPLE_RATE)"
]
},
{
"cell_type": "markdown",
"id": "52aaf0b6",
"metadata": {},
"source": [
"## Choose Example"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "de395f46",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"19"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(test_dataset)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "58bb8a16",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"i = 0\n",
"test_dataset[i][1]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "5a7e8e7b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'aman': 0, 'imran': 1, 'labib': 2}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_dataset.label_mapping"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "6f759da6",
"metadata": {},
"outputs": [],
"source": [
"wav, actual_output = test_dataset[i]"
]
},
{
"cell_type": "markdown",
"id": "d79b6e47",
"metadata": {},
"source": [
"## Show Example Spec"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "1cb1b126",
"metadata": {},
"outputs": [],
"source": [
"def plot_waveform(waveform, sample_rate, title = \"Waveform\"):\n",
" waveform = waveform.numpy()\n",
" num_channels, num_frames = waveform.shape\n",
" time = np.arange(0, num_frames) / sample_rate\n",
"\n",
" fig, axes = plt.subplots(num_channels, 1)\n",
"\n",
" if num_channels == 1:\n",
" axes = [axes]\n",
" for ch in range(num_channels):\n",
" axes[ch].plot(time, waveform[ch])\n",
" axes[ch].grid(True)\n",
"\n",
" if num_channels > 1:\n",
" axes[ch].set_ylabel(f\"Channel: {ch+1}\")\n",
" plt.suptitle(title)\n",
" plt.show(block = False)\n",
"\n",
"def plot_spectrogram(specgram, title=None, ylabel=\"freq_bin\"):\n",
" fig, axs = plt.subplots(1, 1)\n",
" axs.set_title(title or \"Spectrogram (db)\")\n",
" axs.set_ylabel(ylabel)\n",
" axs.set_xlabel(\"frame\")\n",
" im = axs.imshow(librosa.power_to_db(specgram), origin=\"lower\", aspect=\"auto\")\n",
" fig.colorbar(im, ax=axs)\n",
" plt.show(block=False)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "74ae8c37",
"metadata": {},
"outputs": [],
"source": [
"file, label = test_dataset.audio_files_labels[i]\n",
"path = os.path.join(TEST_PATH, label, file)\n",
"audio_wavform, sr = torchaudio.load(path)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "2c468e16",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Audio(audio_wavform, rate=sr)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "c6ae3a30",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"