import os import gradio as gr from transformers import pipeline from pytube import YouTube pipe = pipeline(model="irena/whisper-small-sv-SE") def yt(link): yt = YouTube(link) stream = yt.streams.filter(only_audio=True)[0] stream.download(filename="audio.mp3") text = pipe("audio.mp3")["text"] return text def transcribe(audio): text = pipe(audio)["text"] return text demo = gr.Blocks() iface = gr.Interface( fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text", title="Whisper Small Swedish-Microphone", description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model. An audio for recognize.", ) yt = gr.Interface( fn=yt, inputs=[gr.inputs.Textbox(lines=1, label="Youtube URL")], outputs=["html", "text"], title="Whisper Small Swedish-Youtube", description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model. A Youtube URL for recognize." ) with demo: gr.TabbedInterface([iface, yt], ["Transcribe Audio", "Transcribe YouTube"]) demo.launch(enable_queue=True)