File size: 869 Bytes
b887071
adfbf42
e37f379
 
 
b887071
4a60c8d
3f8ba92
 
 
 
 
 
 
a266d09
b887071
 
 
 
 
 
 
 
 
9cc7412
9035dc4
3f8ba92
b887071
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import speech_recognition as sr
import gradio as gr
import numpy as np


def recognize_speech(audio_data):
    # nontype to wav
    # tuple(sample_rate, numpy array)
    # get numpy array
    # get sample rate
    # convert to AudioData
    # recognize speech
    # return text
    audio_data = sr.AudioData(np.array(audio_data[1]), sample_rate=audio_data[0] , sample_width=2)

    recognizer = sr.Recognizer()
    try:
        text = recognizer.recognize_google(audio_data)
        return f"Recognized Speech: {text}"
    except sr.UnknownValueError:
        return "Speech Recognition could not understand audio."
    except sr.RequestError as e:
        return f"Could not request results from Google Speech Recognition service; {e}"

audio_input = gr.Audio(type="numpy")

iface = gr.Interface(fn=recognize_speech, inputs=audio_input , outputs="text")
iface.launch()