import gradio as gr
import torch
from transformers import pipeline
import torch

pipe = pipeline("automatic-speech-recognition",
               "openai/whisper-large-v3",
               torch_dtype=torch.float16,
               device="cuda:0")

def transcribe(inputs):
    if inputs is None:
        raise gr.Error("No audio file submitted! Please record an audio before submitting your request.")

    text = pipe(inputs, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
    return text

demo = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(sources=["microphone", "upload"], type="filepath"),
    ],
    outputs="text",
    title="Whisper Large V3: Transcribe Audio",
    description=(
        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
        " checkpoint [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) and 🤗 Transformers to transcribe audio files"
        " of arbitrary length."
    ),
    allow_flagging="never",
)

demo.launch()