truongghieu's picture
Update app.py
9cc7412
raw
history blame
869 Bytes
import speech_recognition as sr
import gradio as gr
import numpy as np
def recognize_speech(audio_data):
# nontype to wav
# tuple(sample_rate, numpy array)
# get numpy array
# get sample rate
# convert to AudioData
# recognize speech
# return text
audio_data = sr.AudioData(np.array(audio_data[1]), sample_rate=audio_data[0] , sample_width=2)
recognizer = sr.Recognizer()
try:
text = recognizer.recognize_google(audio_data)
return f"Recognized Speech: {text}"
except sr.UnknownValueError:
return "Speech Recognition could not understand audio."
except sr.RequestError as e:
return f"Could not request results from Google Speech Recognition service; {e}"
audio_input = gr.Audio(type="numpy")
iface = gr.Interface(fn=recognize_speech, inputs=audio_input , outputs="text")
iface.launch()