import gradio as gr from transformers import pipeline from pytube import YouTube pipe = pipeline(model="irena/whisper-small-sv-SE") def transcribe_video(url): yt=YouTube(url).streams.filter(only_audio=True).all() audio=yt[0].download() text = pipe(audio)["text"] return text def transcribe_audio(audio): text = pipe(audio)["text"] return text def transcribe_file(audio): text = pipe(audio)["text"] return text audio = gr.Interface( fn=transcribe_audio, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text", title="Whisper Small Swedish", description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.", ) file = gr.Interface( fn=transcribe_file, inputs=[ gr.inputs.Audio(source="upload", type="filepath", optional=True), ], outputs="text", title="Whisper Small Swedish", description=( "Transcribe swedish audios" ) ) video = gr.Interface( fn=transcribe_video, inputs=gr.Textbox(label="Enter a YouTube URL:"), outputs="text", title="Whisper Small Swedish", description="Transcribe swedish videos from YouTube", ) demo = gr.TabbedInterface([audio, file, video], ["transcribe from microphone", "transcribe from local audios", "transcribe from youtube url"]) if __name__ == "__main__": demo.launch()