import gradio as gr
import whisper

def speech_to_text(tmp_filename, model_size):
    model = whisper.load_model(model_size)
    result = model.transcribe(tmp_filename)

    return result["text"]


gr.Interface(
    title="Give it a go, record something on your mic and let Whisper determine what you said.",
    fn=speech_to_text,
    inputs=[
        gr.Markdown(
            """
            # OpenAI | Whisper
            Whisper is an automatic speech recognition (ASR) system trained on 680,000 hours of multilingual and multitask supervised data collected from the web. We show that the use of such a large and diverse dataset leads to improved robustness to accents, background noise and technical language. Moreover, it enables transcription in multiple languages, as well as translation from those languages into English. We are open-sourcing models and inference code to serve as a foundation for building useful applications and for further research on robust speech processing.
            """),
        gr.Audio(title="Record your voice on your mic",source="microphone", type="filepath"),
        gr.Dropdown(label="Select model size",value="base",choices=["tiny", "base", "small", "medium", "large"])],

    outputs="text").launch()