import os import gradio as gr from transformers import pipeline from pytube import YouTube pipe = pipeline(model="Manbearpig01/whisper-small-hi") #https://www.youtube.com/watch?v=IagbSHyZ5iA def yt(link): yt = YouTube(link) stream = yt.streams.filter(only_audio=True)[0] stream.download(filename="audio.mp4") text = pipe("audio.mp4")["text"] return text def transcribe(audio): text = pipe(audio)["text"] return text demo = gr.Blocks() iface = gr.Interface( fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text", title="Whisper Small Swedish-Microphone", description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model. An audio for recognize.", ) yt = gr.Interface( fn=yt, inputs=[gr.inputs.Textbox(lines=1, label="Youtube URL")], outputs=["text"], title="Whisper Small Swedish-Youtube", description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model. A Youtube URL for recognize. Suggest link1: https://www.youtube.com/watch?v=IagbSHyZ5iA link2:https://www.youtube.com/watch?v=gjvOMoDf4-4" ) with demo: gr.TabbedInterface([iface, yt], ["Transcribe Audio", "Transcribe YouTube"]) demo.launch(enable_queue=True)