{ "cells": [ { "cell_type": "code", "execution_count": 8, "id": "55895dc1", "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 10, "id": "d1e95c40", "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append('..')" ] }, { "cell_type": "code", "execution_count": 86, "id": "0ae6ce32", "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "import torch\n", "from torchsummary import summary" ] }, { "cell_type": "code", "execution_count": 85, "id": "4200acc4", "metadata": {}, "outputs": [], "source": [ "from dataset import *\n", "from cnn import CNNetwork" ] }, { "cell_type": "code", "execution_count": 78, "id": "b98d408a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using device cpu\n" ] } ], "source": [ "if torch.cuda.is_available():\n", " device = \"cuda\"\n", "else:\n", " device = \"cpu\"\n", "print(f\"Using device {device}\")" ] }, { "cell_type": "code", "execution_count": 97, "id": "f26723ab", "metadata": {}, "outputs": [], "source": [ "mel_spectrogram = torchaudio.transforms.MelSpectrogram(\n", " sample_rate=16000,\n", " n_fft=1024,\n", " hop_length=512,\n", " n_mels=64\n", " )\n", "dataset = VoiceDataset('../data/train', mel_spectrogram, 16000, device)" ] }, { "cell_type": "code", "execution_count": 93, "id": "7664a918", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5718" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(dataset)" ] }, { "cell_type": "code", "execution_count": 82, "id": "0adfe082", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[[0.2647, 0.0247, 0.0324, ..., 0.0230, 0.1026, 0.5454],\n", " [0.0812, 0.0178, 0.0890, ..., 0.2376, 0.5061, 0.5292],\n", " [0.0052, 0.0212, 0.1341, ..., 0.9336, 0.2778, 0.1372],\n", " ...,\n", " [0.5154, 0.3950, 0.4497, ..., 0.4916, 0.4505, 0.7709],\n", " [0.1919, 0.4804, 0.5144, ..., 0.5931, 0.4466, 0.4706],\n", " [0.1208, 0.4357, 0.4016, ..., 0.5168, 0.7007, 0.3696]]]),\n", " 'aman')" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset[0]" ] }, { "cell_type": "code", "execution_count": 83, "id": "6f095274", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([1, 64, 157])" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset[0][0].shape" ] }, { "cell_type": "code", "execution_count": 87, "id": "362d6f74", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "----------------------------------------------------------------\n", " Layer (type) Output Shape Param #\n", "================================================================\n", " Conv2d-1 [-1, 16, 66, 46] 160\n", " ReLU-2 [-1, 16, 66, 46] 0\n", " MaxPool2d-3 [-1, 16, 33, 23] 0\n", " Conv2d-4 [-1, 32, 35, 25] 4,640\n", " ReLU-5 [-1, 32, 35, 25] 0\n", " MaxPool2d-6 [-1, 32, 17, 12] 0\n", " Conv2d-7 [-1, 64, 19, 14] 18,496\n", " ReLU-8 [-1, 64, 19, 14] 0\n", " MaxPool2d-9 [-1, 64, 9, 7] 0\n", " Conv2d-10 [-1, 128, 11, 9] 73,856\n", " ReLU-11 [-1, 128, 11, 9] 0\n", " MaxPool2d-12 [-1, 128, 5, 4] 0\n", " Flatten-13 [-1, 2560] 0\n", " Linear-14 [-1, 10] 25,610\n", " Softmax-15 [-1, 10] 0\n", "================================================================\n", "Total params: 122,762\n", "Trainable params: 122,762\n", "Non-trainable params: 0\n", "----------------------------------------------------------------\n", "Input size (MB): 0.01\n", "Forward/backward pass size (MB): 1.83\n", "Params size (MB): 0.47\n", "Estimated Total Size (MB): 2.31\n", "----------------------------------------------------------------\n" ] } ], "source": [ "cnn = CNNetwork()\n", "summary(cnn, (1, 64, 44))" ] }, { "cell_type": "code", "execution_count": 91, "id": "d2da6515", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(0)" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.tensor(0)" ] }, { "cell_type": "code", "execution_count": 95, "id": "8a10cc8c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'aman': 0, 'imran': 1, 'labib': 2}" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset.label_mapping" ] }, { "cell_type": "code", "execution_count": 98, "id": "e65a95c3", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "join() argument must be str, bytes, or os.PathLike object, not 'int'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[98], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdataset\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\n", "File \u001b[0;32m~/ml-sandbox/VoID/notebooks/../dataset.py:41\u001b[0m, in \u001b[0;36mVoiceDataset.__getitem__\u001b[0;34m(self, index)\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, index):\n\u001b[1;32m 39\u001b[0m \u001b[38;5;66;03m# get file\u001b[39;00m\n\u001b[1;32m 40\u001b[0m file, label \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maudio_files_labels[index]\n\u001b[0;32m---> 41\u001b[0m filepath \u001b[38;5;241m=\u001b[39m \u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_data_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;66;03m# load wav\u001b[39;00m\n\u001b[1;32m 44\u001b[0m wav, sr \u001b[38;5;241m=\u001b[39m torchaudio\u001b[38;5;241m.\u001b[39mload(filepath, normalize\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", "File \u001b[0;32m~/anaconda3/envs/void/lib/python3.9/posixpath.py:90\u001b[0m, in \u001b[0;36mjoin\u001b[0;34m(a, *p)\u001b[0m\n\u001b[1;32m 88\u001b[0m path \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m sep \u001b[38;5;241m+\u001b[39m b\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mAttributeError\u001b[39;00m, \u001b[38;5;167;01mBytesWarning\u001b[39;00m):\n\u001b[0;32m---> 90\u001b[0m \u001b[43mgenericpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_check_arg_types\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mjoin\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mp\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m path\n", "File \u001b[0;32m~/anaconda3/envs/void/lib/python3.9/genericpath.py:152\u001b[0m, in \u001b[0;36m_check_arg_types\u001b[0;34m(funcname, *args)\u001b[0m\n\u001b[1;32m 150\u001b[0m hasbytes \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 152\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfuncname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m() argument must be str, bytes, or \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 153\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mos.PathLike object, not \u001b[39m\u001b[38;5;132;01m{\u001b[39;00ms\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m hasstr \u001b[38;5;129;01mand\u001b[39;00m hasbytes:\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCan\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt mix strings and bytes in path components\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", "\u001b[0;31mTypeError\u001b[0m: join() argument must be str, bytes, or os.PathLike object, not 'int'" ] } ], "source": [ "dataset[0]" ] }, { "cell_type": "code", "execution_count": 104, "id": "a1357e5b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2023-05-12 22:28:06.556207'" ] }, "execution_count": 104, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from datetime import datetime\n", "now = datetime.now()" ] }, { "cell_type": "code", "execution_count": null, "id": "190c8d4b", "metadata": {}, "outputs": [], "source": [ "now.strftime(\"%Y%m%d-%H%M%S\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 5 }