from transformers import pipeline
import gradio as gr
from pytube import YouTube

pipe = pipeline(model = 'CsanadT/checkpoints')

def live_performance(audio):
    text = pipe(audio)['text']
    return text

def url_performance(link):
    yt = YouTube(str(link))
    audio= yt.streams.filter(only_audio=True).first()
    text = pipe(audio)['text']
    return text

    

with gr.Blocks() as demo:
    with gr.Tab('Live audio'):
        iface = gr.Interface(
            fn=live_performance, 
            inputs=gr.Audio(source="microphone", type="filepath"), 
            outputs="text",
            title="Whisper Small Swedish",
            description="Real-time demo for swedish speech recognition using a fine-tuned Whisper small model."
        )

    with gr.Tab('Transcription from URL'):
        iface = gr.Interface(
            fn=url_performance, 
            inputs=gr.Textbox(label='Paste the UL here'), 
            outputs="text",
            title="Whisper Small Swedish",
            description="Real-time demo for swedish speech recognition using a fine-tuned Whisper small model."
        )

demo.launch()