{ "cells": [ { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "import whisper\n", "from pytube import YouTube\n", "import pickle\n", "import pandas as pd\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|█████████████████████████████████████| 1.42G/1.42G [03:21<00:00, 7.57MiB/s]\n" ] } ], "source": [ "model = whisper.load_model('medium')\n" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "videos = ['https://www.youtube.com/watch?v=8mQZzlQXK1Q', \n", "'https://www.youtube.com/watch?v=dJ1eDL15_Lw', \n", "'https://www.youtube.com/watch?v=ikYCr-0GAfw', \n", "'https://www.youtube.com/watch?v=2QO8tgSA6oQ',\n", "'https://www.youtube.com/watch?v=-OM8RYUl_rg',\n", "'https://www.youtube.com/watch?v=TGetyy-LKcY',\n", "'https://www.youtube.com/watch?v=sDrakgSYvzc',\n", "'https://www.youtube.com/watch?v=LmB3ZQ2F1MY',\n", "'https://www.youtube.com/watch?v=CdyJ0iB_k00',\n", "'https://www.youtube.com/watch?v=gGqbEH69ZaI',\n", "'https://www.youtube.com/watch?v=2byXYrlDkZs',\n", "'https://www.youtube.com/watch?v=QRQMPCs7m0E',\n", "'https://www.youtube.com/watch?v=D1QKYLcvoU8',\n", "'https://www.youtube.com/watch?v=5eyE20HpaCo',\n", "'https://www.youtube.com/watch?v=XeeFp63L05k',\n", "'https://www.youtube.com/watch?v=wKPPf9YNv5c',\n", "'https://www.youtube.com/watch?v=xY2ftYAnUso',\n", "'https://www.youtube.com/watch?v=gOt--6HPrIo',\n", "'https://www.youtube.com/watch?v=L4zFpKpdub8',\n", "'https://www.youtube.com/watch?v=4YhpWZCdiZc',\n", "'https://www.youtube.com/watch?v=l2SNesXZoGM',\n", "'https://www.youtube.com/watch?v=8-2WQF3SWwo',\n", "'https://www.youtube.com/watch?v=bM0BeeA8RdY',\n", "'https://www.youtube.com/watch?v=Zl7MbbgE4aU',\n", "'https://www.youtube.com/watch?v=jQgkVKGqBCE',\n", "'https://www.youtube.com/watch?v=8aDFvvjC6XM',\n", "'https://www.youtube.com/watch?v=W3hMmZQAdhw']\n", "\n", "#select the v= part of the url\n", "vids = [x.split('=')[1] for x in videos]" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/2 [00:00