import gradio as gr
import whisper
from wordcloud import WordCloud
import tempfile
import os

# Load the Whisper model
model = whisper.load_model("base")

def transcribe(audio):
    # Transcribe the audio file
    result = model.transcribe(audio)
    text = result['text']

    # Generate a word cloud
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    
    # Save the word cloud image to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
        wordcloud.to_file(tmpfile.name)
        return text, tmpfile.name

# Create the Gradio interface
demo = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath"),  # Allow file uploads
    outputs=["text", gr.Image(type="filepath")],  # Output both text and image
    title="SAUTI_V1",
    description="Upload an audio file to transcribe it to text and view a word cloud of the text. Disclaimer: This might run slow due to it been hosted on free tier"
)

# Launch the app
if __name__ == "__main__":
    demo.launch()