Spaces:

fffiloni
/

whisper-to-stable-diffusion

Paused

fffiloni commited on Sep 22, 2022

Commit

99cb9ec

•

1 Parent(s): 9f29da8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ def get_transcribe(audio):
     mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
-    #_, probs = whisper_model.detect_language(mel)
     options = whisper.DecodingOptions(task="translate", fp16 = False)
     result = whisper.decode(whisper_model, mel, options)
@@ -41,10 +41,19 @@ def get_images(audio):
         else:
             images.append(image)
-    return images
 #inputs
-audio = gr.Audio(label="Input Audio", show_label=False, source="microphone", type="filepath")
 #outputs
-gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="gallery").style(grid=[2], height="auto")
-gr.Interface(fn=get_images, inputs=audio, outputs=gallery).queue(max_size=10).launch(enable_queue=True)

     mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
+    _, probs = whisper_model.detect_language(mel)
     options = whisper.DecodingOptions(task="translate", fp16 = False)
     result = whisper.decode(whisper_model, mel, options)
         else:
             images.append(image)
+    return prompt, images
 #inputs
+audio = gr.Audio(label="Input Audio of an image description", show_label=True, source="microphone", type="filepath")
 #outputs
+translated_prompt = gr.Textbox(label="Translated audio", lines=6)
+gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="gallery").style(grid=[1], height="auto")
+title="Whisper to Stable Diffusion"
+description="""
+<p style='text-align: center;'>
+This demo is running on CPU. Build by Sylvain <a href='https://twitter.com/fffiloni' target='_blank'>@fffiloni</a> • <img id='visitor-badge' alt='visitor badge' src='https://visitor-badge.glitch.me/badge?page_id=gradio-blocks.sd-img-variations' style='display: inline-block' />
+Record an audio description of an image, stop recording, then hit the Submit button to get 2 images from Stable Diffusion.
+Your audio will be translated to English, then sent as a prompt to stable diffusion.
+Try it in French ! ;)
+</p>
+"""
+gr.Interface(fn=get_images, inputs=audio, outputs=[translated_prompt, gallery]).queue(max_size=1000).launch(enable_queue=True)