anzorq commited on
Commit
1ce7124
1 Parent(s): 15ae509

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -27,10 +27,11 @@ reverse_pattern = re.compile('|'.join(re.escape(key) for key in reverse_replacem
27
  def replace_symbols_back(text):
28
  return reverse_pattern.sub(lambda match: reverse_replacements[match.group(0)], text)
29
 
30
- def preprocess_audio(audio_tensor, original_sample_rate):
31
  audio_tensor = audio_tensor.to(dtype=torch.float32)
32
  audio_tensor = torch.mean(audio_tensor, dim=0, keepdim=True) # Convert to mono
33
- audio_tensor = audio_tensor / torch.max(torch.abs(audio_tensor)) # Normalize
 
34
  audio_tensor = torchaudio.functional.resample(audio_tensor, orig_freq=original_sample_rate, new_freq=16000) # Resample
35
  return audio_tensor
36
 
@@ -48,7 +49,7 @@ def transcribe_speech(audio, progress=gr.Progress()):
48
  transcription = pipe(audio_np, chunk_length_s=10)['text']
49
  return replace_symbols_back(transcription)
50
 
51
- def transcribe_from_youtube(url, apply_improvements, progress=gr.Progress()):
52
  progress(0, "Downloading YouTube audio...")
53
 
54
  yt = YouTube(url)
@@ -59,9 +60,9 @@ def transcribe_from_youtube(url, apply_improvements, progress=gr.Progress()):
59
 
60
  try:
61
  audio, original_sample_rate = torchaudio.load(audio_data)
62
- audio = preprocess_audio(audio, original_sample_rate)
63
 
64
- if apply_improvements:
65
  progress(0.4, "Applying Wiener filter...")
66
  audio = apply_wiener_filter(audio)
67
 
@@ -101,7 +102,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
101
  with gr.Tab("YouTube URL"):
102
  gr.Markdown("## Transcribe speech from YouTube video")
103
  youtube_url = gr.Textbox(label="Enter YouTube video URL")
104
- apply_improvements = gr.Checkbox(label="Apply Audio Improvements", value=True)
 
 
 
105
 
106
  with gr.Row():
107
  img = gr.Image(label="Thumbnail", height=240, width=240, scale=1)
@@ -110,7 +114,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
110
  transcribe_button = gr.Button("Transcribe")
111
  transcription_output = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
112
 
113
- transcribe_button.click(fn=transcribe_from_youtube, inputs=[youtube_url, apply_improvements], outputs=transcription_output)
114
  youtube_url.change(populate_metadata, inputs=[youtube_url], outputs=[img, title])
115
 
116
  demo.launch()
 
27
  def replace_symbols_back(text):
28
  return reverse_pattern.sub(lambda match: reverse_replacements[match.group(0)], text)
29
 
30
+ def preprocess_audio(audio_tensor, original_sample_rate, apply_normalization):
31
  audio_tensor = audio_tensor.to(dtype=torch.float32)
32
  audio_tensor = torch.mean(audio_tensor, dim=0, keepdim=True) # Convert to mono
33
+ if apply_normalization:
34
+ audio_tensor = audio_tensor / torch.max(torch.abs(audio_tensor)) # Normalize
35
  audio_tensor = torchaudio.functional.resample(audio_tensor, orig_freq=original_sample_rate, new_freq=16000) # Resample
36
  return audio_tensor
37
 
 
49
  transcription = pipe(audio_np, chunk_length_s=10)['text']
50
  return replace_symbols_back(transcription)
51
 
52
+ def transcribe_from_youtube(url, apply_wiener, apply_normalization, progress=gr.Progress()):
53
  progress(0, "Downloading YouTube audio...")
54
 
55
  yt = YouTube(url)
 
60
 
61
  try:
62
  audio, original_sample_rate = torchaudio.load(audio_data)
63
+ audio = preprocess_audio(audio, original_sample_rate, apply_normalization)
64
 
65
+ if apply_wiener:
66
  progress(0.4, "Applying Wiener filter...")
67
  audio = apply_wiener_filter(audio)
68
 
 
102
  with gr.Tab("YouTube URL"):
103
  gr.Markdown("## Transcribe speech from YouTube video")
104
  youtube_url = gr.Textbox(label="Enter YouTube video URL")
105
+
106
+ with gr.Accordion("Audio Improvements", open=False):
107
+ apply_wiener = gr.Checkbox(label="Apply Wiener Filter", info="Reduce noise", value=False)
108
+ apply_normalization = gr.Checkbox(label="Apply Normalization", info="Normalize audio volume", value=True)
109
 
110
  with gr.Row():
111
  img = gr.Image(label="Thumbnail", height=240, width=240, scale=1)
 
114
  transcribe_button = gr.Button("Transcribe")
115
  transcription_output = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
116
 
117
+ transcribe_button.click(fn=transcribe_from_youtube, inputs=[youtube_url, apply_wiener, apply_normalization], outputs=transcription_output)
118
  youtube_url.change(populate_metadata, inputs=[youtube_url], outputs=[img, title])
119
 
120
  demo.launch()