Spaces:

anzorq
/

w2v-bert-2.0-kbd

Paused

App Files Files Community

anzorq commited on May 21, 2024

Commit

1ce7124

verified ·

1 Parent(s): 15ae509

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -7

app.py CHANGED Viewed

@@ -27,10 +27,11 @@ reverse_pattern = re.compile('|'.join(re.escape(key) for key in reverse_replacem
 def replace_symbols_back(text):
     return reverse_pattern.sub(lambda match: reverse_replacements[match.group(0)], text)
-def preprocess_audio(audio_tensor, original_sample_rate):
     audio_tensor = audio_tensor.to(dtype=torch.float32)
     audio_tensor = torch.mean(audio_tensor, dim=0, keepdim=True)  # Convert to mono
-    audio_tensor = audio_tensor / torch.max(torch.abs(audio_tensor))  # Normalize
     audio_tensor = torchaudio.functional.resample(audio_tensor, orig_freq=original_sample_rate, new_freq=16000)  # Resample
     return audio_tensor
@@ -48,7 +49,7 @@ def transcribe_speech(audio, progress=gr.Progress()):
     transcription = pipe(audio_np, chunk_length_s=10)['text']
     return replace_symbols_back(transcription)
-def transcribe_from_youtube(url, apply_improvements, progress=gr.Progress()):
     progress(0, "Downloading YouTube audio...")
     yt = YouTube(url)
@@ -59,9 +60,9 @@ def transcribe_from_youtube(url, apply_improvements, progress=gr.Progress()):
     try:
         audio, original_sample_rate = torchaudio.load(audio_data)
-        audio = preprocess_audio(audio, original_sample_rate)
-        if apply_improvements:
             progress(0.4, "Applying Wiener filter...")
             audio = apply_wiener_filter(audio)
@@ -101,7 +102,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Tab("YouTube URL"):
         gr.Markdown("## Transcribe speech from YouTube video")
         youtube_url = gr.Textbox(label="Enter YouTube video URL")
-        apply_improvements = gr.Checkbox(label="Apply Audio Improvements", value=True)
         with gr.Row():
             img = gr.Image(label="Thumbnail", height=240, width=240, scale=1)
@@ -110,7 +114,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         transcribe_button = gr.Button("Transcribe")
         transcription_output = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
-        transcribe_button.click(fn=transcribe_from_youtube, inputs=[youtube_url, apply_improvements], outputs=transcription_output)
         youtube_url.change(populate_metadata, inputs=[youtube_url], outputs=[img, title])
 demo.launch()

 def replace_symbols_back(text):
     return reverse_pattern.sub(lambda match: reverse_replacements[match.group(0)], text)
+def preprocess_audio(audio_tensor, original_sample_rate, apply_normalization):
     audio_tensor = audio_tensor.to(dtype=torch.float32)
     audio_tensor = torch.mean(audio_tensor, dim=0, keepdim=True)  # Convert to mono
+    if apply_normalization:
+        audio_tensor = audio_tensor / torch.max(torch.abs(audio_tensor))  # Normalize
     audio_tensor = torchaudio.functional.resample(audio_tensor, orig_freq=original_sample_rate, new_freq=16000)  # Resample
     return audio_tensor
     transcription = pipe(audio_np, chunk_length_s=10)['text']
     return replace_symbols_back(transcription)
+def transcribe_from_youtube(url, apply_wiener, apply_normalization, progress=gr.Progress()):
     progress(0, "Downloading YouTube audio...")
     yt = YouTube(url)
     try:
         audio, original_sample_rate = torchaudio.load(audio_data)
+        audio = preprocess_audio(audio, original_sample_rate, apply_normalization)
+        if apply_wiener:
             progress(0.4, "Applying Wiener filter...")
             audio = apply_wiener_filter(audio)
     with gr.Tab("YouTube URL"):
         gr.Markdown("## Transcribe speech from YouTube video")
         youtube_url = gr.Textbox(label="Enter YouTube video URL")
+        with gr.Accordion("Audio Improvements", open=False):
+            apply_wiener = gr.Checkbox(label="Apply Wiener Filter", info="Reduce noise", value=False)
+            apply_normalization = gr.Checkbox(label="Apply Normalization", info="Normalize audio volume", value=True)
         with gr.Row():
             img = gr.Image(label="Thumbnail", height=240, width=240, scale=1)
         transcribe_button = gr.Button("Transcribe")
         transcription_output = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
+        transcribe_button.click(fn=transcribe_from_youtube, inputs=[youtube_url, apply_wiener, apply_normalization], outputs=transcription_output)
         youtube_url.change(populate_metadata, inputs=[youtube_url], outputs=[img, title])
 demo.launch()