File size: 1,063 Bytes
1411414
d3df9be
2de1bff
 
 
1411414
 
 
 
d3df9be
1411414
d3df9be
2de1bff
 
 
 
 
 
 
 
 
1411414
d3df9be
 
 
2de1bff
 
dd4750a
 
1411414
 
d3df9be
2de1bff
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import gradio as gr
import whisper
from wordcloud import WordCloud
import tempfile
import os

# Load the Whisper model
model = whisper.load_model("base")

def transcribe(audio):
    # Transcribe the audio file
    result = model.transcribe(audio)
    text = result['text']

    # Generate a word cloud
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    
    # Save the word cloud image to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmpfile:
        wordcloud.to_file(tmpfile.name)
        return text, tmpfile.name

# Create the Gradio interface
demo = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath"),  # Allow file uploads
    outputs=["text", gr.Image(type="filepath")],  # Output both text and image
    title="SAUTI_V1",
    description="Upload an audio file to transcribe it to text and view a word cloud of the text. Disclaimer: This might run slow due to it been hosted on free tier"
)

# Launch the app
if __name__ == "__main__":
    demo.launch()