anzorq's picture
Update app.py
29c16a4 verified
raw
history blame
2.49 kB
import spaces
import os
import gradio as gr
import torch
import torchaudio
from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
from pytube import YouTube
from transformers import pipeline
pipe = pipeline(model="anzorq/w2v-bert-2.0-kbd", device=0)
@spaces.GPU
def transcribe_speech(audio):
if audio is None: # Handle the NoneType error for microphone input
return "No audio received."
return pipe(audio, chunk_length_s=10)['text']#, return_timestamps='word')
def transcribe_from_youtube(url):
# Download audio from YouTube using pytube
yt = YouTube(url)
audio_path = yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
# Transcribe the downloaded audio
transcription = transcribe_speech(audio_path)
# Clean up the downloaded file
os.remove(audio_path)
return transcription
def populate_metadata(url):
yt = YouTube(url)
return yt.thumbnail_url, yt.title
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.HTML(
"""
<div style="text-align: center; max-width: 500px; margin: 0 auto;">
<div>
<h1>Youtube Speech Transcription</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%">
Speech to text transcription of Youtube videos using Wav2Vec2-BERT
</p>
</div>
"""
)
with gr.Tab("Microphone Input"):
gr.Markdown("## Transcribe speech from microphone")
mic_audio = gr.Audio(sources="microphone", type="filepath", label="Speak into your microphone")
transcribe_button = gr.Button("Transcribe")
transcription_output = gr.Textbox(label="Transcription")
transcribe_button.click(fn=transcribe_speech, inputs=mic_audio, outputs=transcription_output)
with gr.Tab("YouTube URL"):
gr.Markdown("## Transcribe speech from YouTube video")
youtube_url = gr.Textbox(label="Enter YouTube video URL")
title = gr.Label(label="Video Title")
img = gr.Image(label="Thumbnail", height=120, width=120)
transcribe_button = gr.Button("Transcribe")
transcription_output = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
transcribe_button.click(fn=transcribe_from_youtube, inputs=youtube_url, outputs=transcription_output)
youtube_url.change(populate_metadata, inputs=[youtube_url], outputs=[img, title])
demo.launch()