chinhon's picture
Update app.py
02f063b verified
raw
history blame
1.82 kB
import gradio as gr
from pytube import YouTube
import whisper
# define function for transcription
def whisper_transcript(model_size, url, audio_file):
if url:
link = YouTube(url)
source = link.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
else:
source = audio_file
if model_size.endswith(".en"):
language = "english"
else:
language = None
options = whisper.DecodingOptions(without_timestamps=True)
loaded_model = whisper.load_model(model_size)
transcript = loaded_model.transcribe(source, language=language)
return transcript["text"]
# define Gradio app interface
gradio_ui = gr.Interface(
fn=whisper_transcript,
title="Transcribe multi-lingual audio clips with Whisper",
description="**How to use**: Select a model, paste in a Youtube link or upload an audio clip, then click submit. If your clip is **100% in English, select models ending in ‘.en’**. If the clip is in other languages, or a mix of languages, select models without ‘.en’",
article="**Note**: The larger the model size selected or the longer the audio clip, the more time it would take to process the transcript.",
inputs=[
gr.Dropdown(
label="Select Model",
choices=[
"tiny.en",
"base.en",
"small.en",
"medium.en",
"tiny",
"base",
"small",
"medium",
"large",
],
value="base",
),
gr.Textbox(label="Paste YouTube link here"),
gr.Audio(label="Upload Audio File", sources=["upload", "microphone"], type="filepath"),
],
outputs=gr.Textbox(label="Whisper Transcript"),
)
gradio_ui.queue().launch()