import gradio as gr import whisper from wordcloud import WordCloud import tempfile import os # Load the Whisper model model = whisper.load_model("base") def transcribe(audio): # Transcribe the audio file result = model.transcribe(audio) text = result['text'] # Generate a word cloud wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text) # Save the word cloud image to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile: wordcloud.to_file(tmpfile.name) return text, tmpfile.name # Create the Gradio interface demo = gr.Interface( fn=transcribe, inputs=gr.Audio(type="filepath"), # Allow file uploads outputs=["text", gr.Image(type="filepath")], # Output both text and image title="SAUTI_V1", description="Upload an audio file to transcribe it to text and view a word cloud of the text. Disclaimer: This might run slow due to it been hosted on free tier" ) # Launch the app if __name__ == "__main__": demo.launch()