import gradio as gr
import whisper

def speech_to_text(tmp_filename, model_size):
    model = whisper.load_model(model_size)
    result = model.transcribe(tmp_filename)

    return result["text"]


gr.Interface(
    title="Whisper by OpenAI",
    description="Whisper is an automatic speech recognition (ASR) system trained on 680,000 hours of multilingual and multitask supervised data collected from the web.",
    fn=speech_to_text,
    inputs=[

        gr.Audio(label="Record your voice on your mic",source="microphone", type="filepath"),
        gr.Dropdown(label="Select model size",value="base",choices=["tiny", "base", "small", "medium", "large"])],

    outputs="text").launch()