{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "55895dc1",
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "d1e95c40",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('..')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "0ae6ce32",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "import torch\n",
    "from torchsummary import summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "4200acc4",
   "metadata": {},
   "outputs": [],
   "source": [
    "from dataset import *\n",
    "from cnn import CNNetwork"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "b98d408a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using device cpu\n"
     ]
    }
   ],
   "source": [
    "if torch.cuda.is_available():\n",
    "        device = \"cuda\"\n",
    "else:\n",
    "        device = \"cpu\"\n",
    "print(f\"Using device {device}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "id": "f26723ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "mel_spectrogram = torchaudio.transforms.MelSpectrogram(\n",
    "        sample_rate=16000,\n",
    "        n_fft=1024,\n",
    "        hop_length=512,\n",
    "        n_mels=64\n",
    "    )\n",
    "dataset = VoiceDataset('../data/train', mel_spectrogram, 16000, device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "id": "7664a918",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5718"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "0adfe082",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([[[0.2647, 0.0247, 0.0324,  ..., 0.0230, 0.1026, 0.5454],\n",
       "          [0.0812, 0.0178, 0.0890,  ..., 0.2376, 0.5061, 0.5292],\n",
       "          [0.0052, 0.0212, 0.1341,  ..., 0.9336, 0.2778, 0.1372],\n",
       "          ...,\n",
       "          [0.5154, 0.3950, 0.4497,  ..., 0.4916, 0.4505, 0.7709],\n",
       "          [0.1919, 0.4804, 0.5144,  ..., 0.5931, 0.4466, 0.4706],\n",
       "          [0.1208, 0.4357, 0.4016,  ..., 0.5168, 0.7007, 0.3696]]]),\n",
       " 'aman')"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "6f095274",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([1, 64, 157])"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset[0][0].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "362d6f74",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------------------------------\n",
      "        Layer (type)               Output Shape         Param #\n",
      "================================================================\n",
      "            Conv2d-1           [-1, 16, 66, 46]             160\n",
      "              ReLU-2           [-1, 16, 66, 46]               0\n",
      "         MaxPool2d-3           [-1, 16, 33, 23]               0\n",
      "            Conv2d-4           [-1, 32, 35, 25]           4,640\n",
      "              ReLU-5           [-1, 32, 35, 25]               0\n",
      "         MaxPool2d-6           [-1, 32, 17, 12]               0\n",
      "            Conv2d-7           [-1, 64, 19, 14]          18,496\n",
      "              ReLU-8           [-1, 64, 19, 14]               0\n",
      "         MaxPool2d-9             [-1, 64, 9, 7]               0\n",
      "           Conv2d-10           [-1, 128, 11, 9]          73,856\n",
      "             ReLU-11           [-1, 128, 11, 9]               0\n",
      "        MaxPool2d-12            [-1, 128, 5, 4]               0\n",
      "          Flatten-13                 [-1, 2560]               0\n",
      "           Linear-14                   [-1, 10]          25,610\n",
      "          Softmax-15                   [-1, 10]               0\n",
      "================================================================\n",
      "Total params: 122,762\n",
      "Trainable params: 122,762\n",
      "Non-trainable params: 0\n",
      "----------------------------------------------------------------\n",
      "Input size (MB): 0.01\n",
      "Forward/backward pass size (MB): 1.83\n",
      "Params size (MB): 0.47\n",
      "Estimated Total Size (MB): 2.31\n",
      "----------------------------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "cnn = CNNetwork()\n",
    "summary(cnn, (1, 64, 44))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "d2da6515",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0)"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.tensor(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "8a10cc8c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'aman': 0, 'imran': 1, 'labib': 2}"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.label_mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "id": "e65a95c3",
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "join() argument must be str, bytes, or os.PathLike object, not 'int'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[98], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdataset\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\n",
      "File \u001b[0;32m~/ml-sandbox/VoID/notebooks/../dataset.py:41\u001b[0m, in \u001b[0;36mVoiceDataset.__getitem__\u001b[0;34m(self, index)\u001b[0m\n\u001b[1;32m     38\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, index):\n\u001b[1;32m     39\u001b[0m     \u001b[38;5;66;03m# get file\u001b[39;00m\n\u001b[1;32m     40\u001b[0m     file, label \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maudio_files_labels[index]\n\u001b[0;32m---> 41\u001b[0m     filepath \u001b[38;5;241m=\u001b[39m \u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_data_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     43\u001b[0m     \u001b[38;5;66;03m# load wav\u001b[39;00m\n\u001b[1;32m     44\u001b[0m     wav, sr \u001b[38;5;241m=\u001b[39m torchaudio\u001b[38;5;241m.\u001b[39mload(filepath, normalize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
      "File \u001b[0;32m~/anaconda3/envs/void/lib/python3.9/posixpath.py:90\u001b[0m, in \u001b[0;36mjoin\u001b[0;34m(a, *p)\u001b[0m\n\u001b[1;32m     88\u001b[0m             path \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m sep \u001b[38;5;241m+\u001b[39m b\n\u001b[1;32m     89\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mAttributeError\u001b[39;00m, \u001b[38;5;167;01mBytesWarning\u001b[39;00m):\n\u001b[0;32m---> 90\u001b[0m     \u001b[43mgenericpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_check_arg_types\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mjoin\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mp\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     91\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[1;32m     92\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m path\n",
      "File \u001b[0;32m~/anaconda3/envs/void/lib/python3.9/genericpath.py:152\u001b[0m, in \u001b[0;36m_check_arg_types\u001b[0;34m(funcname, *args)\u001b[0m\n\u001b[1;32m    150\u001b[0m         hasbytes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m    151\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 152\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m() argument must be str, bytes, or \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m    153\u001b[0m                         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mos.PathLike object, not \u001b[39m\u001b[38;5;132;01m{\u001b[39;00ms\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    154\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m hasstr \u001b[38;5;129;01mand\u001b[39;00m hasbytes:\n\u001b[1;32m    155\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt mix strings and bytes in path components\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "\u001b[0;31mTypeError\u001b[0m: join() argument must be str, bytes, or os.PathLike object, not 'int'"
     ]
    }
   ],
   "source": [
    "dataset[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "id": "a1357e5b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2023-05-12 22:28:06.556207'"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datetime import datetime\n",
    "now = datetime.now()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "190c8d4b",
   "metadata": {},
   "outputs": [],
   "source": [
    "now.strftime(\"%Y%m%d-%H%M%S\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}