import gradio as gr import torch from transformers import pipeline import torch pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v3", torch_dtype=torch.float16, device="cuda:0") def transcribe(inputs): if inputs is None: raise gr.Error("No audio file submitted! Please record an audio before submitting your request.") text = pipe(inputs, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"] return text demo = gr.Interface( fn=transcribe, inputs=[ gr.Audio(sources=["microphone", "upload"], type="filepath"), ], outputs="text", title="Whisper Large V3: Transcribe Audio", description=( "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the" " checkpoint [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) and 🤗 Transformers to transcribe audio files" " of arbitrary length." ), allow_flagging="never", ) demo.launch()