Spaces:

archit11
/

yt-chunks

Build error

App Files Files Community

archit11 commited on Jun 3, 2024

Commit

990677b

verified ·

1 Parent(s): 1fca56e

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -10

app.py CHANGED Viewed

@@ -7,16 +7,17 @@ from semantic_router.encoders import HuggingFaceEncoder
 from faster_whisper import WhisperModel
 import spaces
 # Function to download YouTube audio
 def download_youtube_audio(url, output_path, preferred_quality="192"):
     ydl_opts = {
-        'format': 'bestaudio/best',  # Select best audio quality
         'postprocessors': [{
             'key': 'FFmpegExtractAudio',
             'preferredcodec': 'mp3',
             'preferredquality': preferred_quality,
         }],
-        'outtmpl': output_path,  # Specify the output path and file name
     }
     try:
@@ -32,7 +33,7 @@ def download_youtube_audio(url, output_path, preferred_quality="192"):
     except yt_dlp.utils.DownloadError as e:
         print(f"Error downloading audio: {e}")
-        return None  # Indicate failure
 # Function to transcribe audio using WhisperModel
 def transcribe(path, model_name):
@@ -42,7 +43,6 @@ def transcribe(path, model_name):
     return segments
 # Function to process segments and convert them into a DataFrame
-@spaces.GPU
 def process_segments(segments):
     result = {}
     print("Processing...")
@@ -60,9 +60,8 @@ def process_segments(segments):
     return df
 # Gradio interface functions
-@spaces.GPU
-def generate_transcript(youtube_url, model_name="large-v3"):
-    path = "downloaded_audio.mp3.mp3"
     download_youtube_audio(youtube_url, path)
     segments = transcribe(path, model_name)
     df = process_segments(segments)
@@ -100,15 +99,14 @@ def download_video(youtube_url):
     # Define download options
     ydl_opts = {
         'format': 'mp4',
-        'outtmpl': 'downloaded_video.%(ext)s',
         'quiet': True
     }
     # Extract video ID to check if already downloaded
     with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
         info_dict = ydl.extract_info(youtube_url, download=False)
-        video_ext = info_dict.get('ext')
-        video_path = f'downloaded_video.mp4'
     # Check if video already downloaded
     if not os.path.exists(video_path):

 from faster_whisper import WhisperModel
 import spaces
 # Function to download YouTube audio
 def download_youtube_audio(url, output_path, preferred_quality="192"):
     ydl_opts = {
+        'format': 'bestaudio/best',
         'postprocessors': [{
             'key': 'FFmpegExtractAudio',
             'preferredcodec': 'mp3',
             'preferredquality': preferred_quality,
         }],
+        'outtmpl': output_path
     }
     try:
     except yt_dlp.utils.DownloadError as e:
         print(f"Error downloading audio: {e}")
+        return None
 # Function to transcribe audio using WhisperModel
 def transcribe(path, model_name):
     return segments
 # Function to process segments and convert them into a DataFrame
 def process_segments(segments):
     result = {}
     print("Processing...")
     return df
 # Gradio interface functions
+def generate_transcript(youtube_url, model_name="distil-large-v3"):
+    path = "downloaded_audio.mp3"
     download_youtube_audio(youtube_url, path)
     segments = transcribe(path, model_name)
     df = process_segments(segments)
     # Define download options
     ydl_opts = {
         'format': 'mp4',
+        'outtmpl': 'downloaded_video.mp4',
         'quiet': True
     }
     # Extract video ID to check if already downloaded
     with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
         info_dict = ydl.extract_info(youtube_url, download=False)
+        video_path = 'downloaded_video.mp4'
     # Check if video already downloaded
     if not os.path.exists(video_path):