from transformers import pipeline import gradio as gr from pytube import YouTube pipe = pipeline(model = 'CsanadT/whisper-small-se') def live_performance(audio): text = pipe(audio)['text'] return text def url_performance(link): yt = YouTube(str(link)) audio= yt.streams.filter(only_audio=True).first() text = pipe(audio)['text'] return text with gr.Blocks() as demo: with gr.Tab('Live audio'): iface = gr.Interface( fn=live_performance, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text", title="Whisper Small Swedish", description="Real-time demo for swedish speech recognition using a fine-tuned Whisper small model." ) with gr.Tab('Transcription from URL'): iface = gr.Interface( fn=url_performance, inputs=gr.Textbox(label='Paste the UL here'), outputs="text", title="Whisper Small Swedish", description="Real-time demo for swedish speech recognition using a fine-tuned Whisper small model." ) demo.launch()