fffiloni commited on
Commit
f5b40d5
Β·
1 Parent(s): 69e8714

added whisper load_audio

Browse files
Files changed (1) hide show
  1. app.py +4 -13
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import torch
3
-
4
  from diffusers import DiffusionPipeline
5
  from transformers import (
6
  WhisperForConditionalGeneration,
@@ -37,22 +37,13 @@ Community examples consist of both inference and training examples that have bee
37
  <a href='https://github.com/huggingface/diffusers/tree/main/examples/community#speech-to-image' target='_blank'> Click here for more information about community pipelines </a>
38
  </p>
39
  """
40
- audio_input = gr.Audio(source="microphone", type="numpy")
41
  image_output = gr.Image()
42
 
43
  def speech_to_text(audio_sample):
44
- #text = audio_sample["text"].lower()
45
- #print(text)
46
- #speech_data = audio_sample["audio"]["array"]
47
-
48
- print(f"""
49
- β€”β€”β€”β€”β€”β€”β€”β€”
50
- audio sample: {audio_sample}
51
- audio array: {audio_sample[1]}
52
- β€”β€”β€”β€”β€”β€”β€”β€”
53
- """)
54
 
55
- output = diffuser_pipeline(audio_sample[1])
 
56
 
57
  print(f"""
58
  β€”β€”β€”β€”β€”β€”β€”β€”
 
1
  import gradio as gr
2
  import torch
3
+ from whisper import load_audio
4
  from diffusers import DiffusionPipeline
5
  from transformers import (
6
  WhisperForConditionalGeneration,
 
37
  <a href='https://github.com/huggingface/diffusers/tree/main/examples/community#speech-to-image' target='_blank'> Click here for more information about community pipelines </a>
38
  </p>
39
  """
40
+ audio_input = gr.Audio(source="microphone", type="filepath")
41
  image_output = gr.Image()
42
 
43
  def speech_to_text(audio_sample):
 
 
 
 
 
 
 
 
 
 
44
 
45
+ process_audio = whisper.load_audio(audio_sample)
46
+ output = diffuser_pipeline(process_audio)
47
 
48
  print(f"""
49
  β€”β€”β€”β€”β€”β€”β€”β€”