Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ def get_transcribe(audio):
|
|
20 |
|
21 |
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
|
22 |
|
23 |
-
|
24 |
|
25 |
options = whisper.DecodingOptions(task="translate", fp16 = False)
|
26 |
result = whisper.decode(whisper_model, mel, options)
|
@@ -41,10 +41,19 @@ def get_images(audio):
|
|
41 |
else:
|
42 |
images.append(image)
|
43 |
|
44 |
-
return images
|
45 |
#inputs
|
46 |
-
audio = gr.Audio(label="Input Audio", show_label=
|
47 |
#outputs
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
|
22 |
|
23 |
+
_, probs = whisper_model.detect_language(mel)
|
24 |
|
25 |
options = whisper.DecodingOptions(task="translate", fp16 = False)
|
26 |
result = whisper.decode(whisper_model, mel, options)
|
|
|
41 |
else:
|
42 |
images.append(image)
|
43 |
|
44 |
+
return prompt, images
|
45 |
#inputs
|
46 |
+
audio = gr.Audio(label="Input Audio of an image description", show_label=True, source="microphone", type="filepath")
|
47 |
#outputs
|
48 |
+
translated_prompt = gr.Textbox(label="Translated audio", lines=6)
|
49 |
+
gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="gallery").style(grid=[1], height="auto")
|
50 |
+
title="Whisper to Stable Diffusion"
|
51 |
+
description="""
|
52 |
+
<p style='text-align: center;'>
|
53 |
+
This demo is running on CPU. Build by Sylvain <a href='https://twitter.com/fffiloni' target='_blank'>@fffiloni</a> • <img id='visitor-badge' alt='visitor badge' src='https://visitor-badge.glitch.me/badge?page_id=gradio-blocks.sd-img-variations' style='display: inline-block' />
|
54 |
+
Record an audio description of an image, stop recording, then hit the Submit button to get 2 images from Stable Diffusion.
|
55 |
+
Your audio will be translated to English, then sent as a prompt to stable diffusion.
|
56 |
+
Try it in French ! ;)
|
57 |
+
</p>
|
58 |
+
"""
|
59 |
+
gr.Interface(fn=get_images, inputs=audio, outputs=[translated_prompt, gallery]).queue(max_size=1000).launch(enable_queue=True)
|