archit11 commited on
Commit
990677b
·
verified ·
1 Parent(s): 1fca56e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -10
app.py CHANGED
@@ -7,16 +7,17 @@ from semantic_router.encoders import HuggingFaceEncoder
7
  from faster_whisper import WhisperModel
8
  import spaces
9
 
 
10
  # Function to download YouTube audio
11
  def download_youtube_audio(url, output_path, preferred_quality="192"):
12
  ydl_opts = {
13
- 'format': 'bestaudio/best', # Select best audio quality
14
  'postprocessors': [{
15
  'key': 'FFmpegExtractAudio',
16
  'preferredcodec': 'mp3',
17
  'preferredquality': preferred_quality,
18
  }],
19
- 'outtmpl': output_path, # Specify the output path and file name
20
  }
21
 
22
  try:
@@ -32,7 +33,7 @@ def download_youtube_audio(url, output_path, preferred_quality="192"):
32
 
33
  except yt_dlp.utils.DownloadError as e:
34
  print(f"Error downloading audio: {e}")
35
- return None # Indicate failure
36
 
37
  # Function to transcribe audio using WhisperModel
38
  def transcribe(path, model_name):
@@ -42,7 +43,6 @@ def transcribe(path, model_name):
42
  return segments
43
 
44
  # Function to process segments and convert them into a DataFrame
45
- @spaces.GPU
46
  def process_segments(segments):
47
  result = {}
48
  print("Processing...")
@@ -60,9 +60,8 @@ def process_segments(segments):
60
  return df
61
 
62
  # Gradio interface functions
63
- @spaces.GPU
64
- def generate_transcript(youtube_url, model_name="large-v3"):
65
- path = "downloaded_audio.mp3.mp3"
66
  download_youtube_audio(youtube_url, path)
67
  segments = transcribe(path, model_name)
68
  df = process_segments(segments)
@@ -100,15 +99,14 @@ def download_video(youtube_url):
100
  # Define download options
101
  ydl_opts = {
102
  'format': 'mp4',
103
- 'outtmpl': 'downloaded_video.%(ext)s',
104
  'quiet': True
105
  }
106
 
107
  # Extract video ID to check if already downloaded
108
  with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
109
  info_dict = ydl.extract_info(youtube_url, download=False)
110
- video_ext = info_dict.get('ext')
111
- video_path = f'downloaded_video.mp4'
112
 
113
  # Check if video already downloaded
114
  if not os.path.exists(video_path):
 
7
  from faster_whisper import WhisperModel
8
  import spaces
9
 
10
+
11
  # Function to download YouTube audio
12
  def download_youtube_audio(url, output_path, preferred_quality="192"):
13
  ydl_opts = {
14
+ 'format': 'bestaudio/best',
15
  'postprocessors': [{
16
  'key': 'FFmpegExtractAudio',
17
  'preferredcodec': 'mp3',
18
  'preferredquality': preferred_quality,
19
  }],
20
+ 'outtmpl': output_path
21
  }
22
 
23
  try:
 
33
 
34
  except yt_dlp.utils.DownloadError as e:
35
  print(f"Error downloading audio: {e}")
36
+ return None
37
 
38
  # Function to transcribe audio using WhisperModel
39
  def transcribe(path, model_name):
 
43
  return segments
44
 
45
  # Function to process segments and convert them into a DataFrame
 
46
  def process_segments(segments):
47
  result = {}
48
  print("Processing...")
 
60
  return df
61
 
62
  # Gradio interface functions
63
+ def generate_transcript(youtube_url, model_name="distil-large-v3"):
64
+ path = "downloaded_audio.mp3"
 
65
  download_youtube_audio(youtube_url, path)
66
  segments = transcribe(path, model_name)
67
  df = process_segments(segments)
 
99
  # Define download options
100
  ydl_opts = {
101
  'format': 'mp4',
102
+ 'outtmpl': 'downloaded_video.mp4',
103
  'quiet': True
104
  }
105
 
106
  # Extract video ID to check if already downloaded
107
  with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
108
  info_dict = ydl.extract_info(youtube_url, download=False)
109
+ video_path = 'downloaded_video.mp4'
 
110
 
111
  # Check if video already downloaded
112
  if not os.path.exists(video_path):