Spaces:
Runtime error
Runtime error
import gradio as gr | |
from pytube import YouTube | |
from transformers import pipeline | |
import os | |
pipe = pipeline(model="GIanlucaRub/whisper-tiny-it-6") # change to "your-username/the-name-you-picked" | |
def transcribe_yt(link): | |
yt = YouTube(link) | |
audio = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp3") | |
text = pipe(audio)["text"] | |
os.remove(audio) | |
return text | |
def transcribe_audio(audio): | |
text = pipe(audio)["text"] | |
return text | |
def populate_metadata(link): | |
yt = YouTube(link) | |
return yt.thumbnail_url, yt.title | |
title="Youtube Whisperer" | |
description="Speech to text transcription of Youtube videos using OpenAI's Whisper" | |
block = gr.Blocks() | |
with block: | |
gr.HTML( | |
""" | |
<div style="text-align: center; max-width: 500px; margin: 0 auto;"> | |
<div> | |
<h1>Youtube Whisperer</h1> | |
</div> | |
<p style="margin-bottom: 10px; font-size: 94%"> | |
Speech to text transcription of Youtube videos using OpenAI's Whisper | |
</p> | |
</div> | |
""" | |
) | |
with gr.Group(): | |
with gr.Box(): | |
text = gr.Textbox( | |
label="Transcription", | |
placeholder="Transcription Output", | |
lines=5) | |
microphone=gr.Audio(source="microphone", type="filepath") | |
with gr.Row().style(mobile_collapse=False, equal_height=True): | |
btn_microphone = gr.Button("Transcribe microphone audio") | |
audio_uploaded=gr.Audio(source="upload", type="filepath") | |
with gr.Row().style(mobile_collapse=False, equal_height=True): | |
btn_audio_uploaded = gr.Button("Transcribe audio uploaded") | |
link = gr.Textbox(label="YouTube Link") | |
with gr.Row().style(mobile_collapse=False, equal_height=True): | |
btn_youtube = gr.Button("Transcribe Youtube video") | |
with gr.Row().style(mobile_collapse=False, equal_height=True): | |
title = gr.Label(label="Video Title", placeholder="Title") | |
img = gr.Image(label="Thumbnail") | |
# Events | |
btn_youtube.click(transcribe_yt, inputs=[link], outputs=[text]) | |
btn_microphone.click(transcribe_audio, inputs=[microphone], outputs=[text]) | |
btn_audio_uploaded.click(transcribe_audio, inputs=[audio_uploaded], outputs=[text]) | |
link.change(populate_metadata, inputs=[link], outputs=[img, title]) | |
block.launch(debug=True) |