Spaces:
Runtime error
Runtime error
pedropauletti
commited on
Commit
•
f90f3f5
1
Parent(s):
740c9e6
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,49 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
def to_audioClassification():
|
4 |
return {
|
@@ -7,7 +52,7 @@ def to_audioClassification():
|
|
7 |
speech_recognition: gr.Row(visible=False),
|
8 |
chatbot_qa: gr.Row(visible=False),
|
9 |
}
|
10 |
-
|
11 |
def to_realtimeAudioClassification():
|
12 |
return {
|
13 |
audio_classification: gr.Row(visible=False),
|
@@ -39,10 +84,10 @@ with gr.Blocks() as demo:
|
|
39 |
language = gr.Radio(["en-us", "pt-br"], label="Language", info="Choose the language to display the classification result and audio", value='en-us', interactive=True)
|
40 |
|
41 |
with gr.Row():
|
42 |
-
btn0 = gr.Button("Audio Classification", scale=1,
|
43 |
-
btn1 = gr.Button("Realtime Audio Classification", scale=1,size='lg')
|
44 |
-
btn2 = gr.Button("Speech Recognition", scale=1, size='lg')
|
45 |
-
btn3 = gr.Button("Help", scale=1, size='lg')
|
46 |
|
47 |
with gr.Row(visible=False) as audio_classification:
|
48 |
with gr.Column(min_width=700):
|
@@ -57,6 +102,12 @@ with gr.Blocks() as demo:
|
|
57 |
audioOutput = gr.Audio(label="Audio Output", interactive=False)
|
58 |
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
with gr.Row(visible=False) as realtime_classification:
|
61 |
with gr.Column(min_width=700):
|
62 |
input = gr.Audio(label="Audio Input", source="microphone", type="filepath",streaming=True, every=10)
|
@@ -65,6 +116,11 @@ with gr.Blocks() as demo:
|
|
65 |
with gr.Column(min_width=700):
|
66 |
output = gr.Label(label="Audio Classification")
|
67 |
|
|
|
|
|
|
|
|
|
|
|
68 |
with gr.Row(visible=False) as speech_recognition:
|
69 |
with gr.Column(min_width=700):
|
70 |
with gr.Accordion("Record an Audio", open=True):
|
@@ -75,13 +131,19 @@ with gr.Blocks() as demo:
|
|
75 |
with gr.Column(min_width=700):
|
76 |
output = gr.Label(label="Transcription")
|
77 |
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
with gr.Row(visible=False) as chatbot_qa:
|
80 |
chatbot = gr.Chatbot(
|
81 |
[],
|
82 |
elem_id="chatbot",
|
83 |
bubble_full_width=False,
|
84 |
-
|
85 |
min_width=2000
|
86 |
)
|
87 |
with gr.Row(min_width=2000):
|
@@ -92,7 +154,24 @@ with gr.Blocks() as demo:
|
|
92 |
container=False,
|
93 |
min_width=1000
|
94 |
)
|
95 |
-
submit = gr.Button(value="", size='sm', scale=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
|
98 |
btn0.click(fn=to_audioClassification, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa])
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
from helpers import load_model_file, load_wav_16k_mono_librosa, initialize_text_to_speech_model, load_label_mapping, predict_yamnet, classify, classify_realtime
|
4 |
+
from helpers import interface, interface_realtime, updateHistory, clearHistory, clear, format_dictionary, format_json
|
5 |
+
from helpers import generate_audio, TTS, TTS_ASR, TTS_chatbot, transcribe_speech, transcribe_speech_realtime, transcribe_realtime, translate_enpt
|
6 |
+
from helpers import chatbot_response, add_text
|
7 |
+
|
8 |
+
history = ""
|
9 |
+
last_answer = ""
|
10 |
+
|
11 |
+
examples_audio_classification = [
|
12 |
+
"content/crowd_laughing.mp3",
|
13 |
+
"content/nature-ambient-sound.mp3",
|
14 |
+
"content/talking-people.mp3",
|
15 |
+
"content/miaow_16k.wav",
|
16 |
+
]
|
17 |
+
|
18 |
+
examples_speech_recognition_en = [
|
19 |
+
"content/speech1-en.wav",
|
20 |
+
"content/speech2-en.wav",
|
21 |
+
]
|
22 |
+
examples_speech_recognition_ptbr = [
|
23 |
+
"content/speech1-ptbr.wav",
|
24 |
+
"content/speech2-ptbr.wav",
|
25 |
+
"content/speech3-ptbr.wav",
|
26 |
+
]
|
27 |
+
|
28 |
+
examples_chatbot_en = [
|
29 |
+
['How does SocialEar assist people with hearing disabilities?'],
|
30 |
+
['Give me suggestions on how to use SocialEar'],
|
31 |
+
['How does SocialEar work?'],
|
32 |
+
['Are SocialEar results accurate?'],
|
33 |
+
['What accessibility features does SocialEar offer?'],
|
34 |
+
['Does SocialEar collect personal data?'],
|
35 |
+
['Can I use SocialEar to identify songs and artists from recorded audio?'],
|
36 |
+
]
|
37 |
+
|
38 |
+
examples_chatbot_ptbr = [
|
39 |
+
['Como o SocialEar auxilia pessoas com deficiência auditiva?'],
|
40 |
+
['Dê-me sugestões sobre como usar o SocialEar'],
|
41 |
+
['Como funciona o SocialEar?'],
|
42 |
+
['Os resultados do SocialEar são precisos?'],
|
43 |
+
['Quais recursos de acessibilidade o SocialEar oferece?'],
|
44 |
+
['O SocialEar coleta dados pessoais?'],
|
45 |
+
['Posso usar o SocialEar para identificar músicas e artistas de áudio gravado?'],
|
46 |
+
]
|
47 |
|
48 |
def to_audioClassification():
|
49 |
return {
|
|
|
52 |
speech_recognition: gr.Row(visible=False),
|
53 |
chatbot_qa: gr.Row(visible=False),
|
54 |
}
|
55 |
+
|
56 |
def to_realtimeAudioClassification():
|
57 |
return {
|
58 |
audio_classification: gr.Row(visible=False),
|
|
|
84 |
language = gr.Radio(["en-us", "pt-br"], label="Language", info="Choose the language to display the classification result and audio", value='en-us', interactive=True)
|
85 |
|
86 |
with gr.Row():
|
87 |
+
btn0 = gr.Button("Audio Classification", scale=1, icon='content/Audio Classification.png', size='lg')
|
88 |
+
btn1 = gr.Button("Realtime Audio Classification", scale=1, icon='content/Realtime Audio Classification.png', size='lg')
|
89 |
+
btn2 = gr.Button("Speech Recognition", scale=1, icon='content/Speech Recognition.png', size='lg')
|
90 |
+
btn3 = gr.Button("Help", scale=1, icon='content/Chatbot.png', size='lg')
|
91 |
|
92 |
with gr.Row(visible=False) as audio_classification:
|
93 |
with gr.Column(min_width=700):
|
|
|
102 |
audioOutput = gr.Audio(label="Audio Output", interactive=False)
|
103 |
|
104 |
|
105 |
+
inputRecord.stop_recording(interface, [inputRecord, language], [output])
|
106 |
+
inputUpload.upload(interface, [inputUpload, language], [output])
|
107 |
+
btn.click(fn=TTS, inputs=[output, language], outputs=audioOutput)
|
108 |
+
|
109 |
+
examples = gr.Examples(fn=interface, examples=examples_audio_classification, inputs=[inputRecord], outputs=[output], run_on_click=True)
|
110 |
+
|
111 |
with gr.Row(visible=False) as realtime_classification:
|
112 |
with gr.Column(min_width=700):
|
113 |
input = gr.Audio(label="Audio Input", source="microphone", type="filepath",streaming=True, every=10)
|
|
|
116 |
with gr.Column(min_width=700):
|
117 |
output = gr.Label(label="Audio Classification")
|
118 |
|
119 |
+
input.change(interface_realtime, [input, language], output)
|
120 |
+
input.change(updateHistory, None, historyOutput)
|
121 |
+
input.start_recording(clearHistory, None, historyOutput)
|
122 |
+
|
123 |
+
|
124 |
with gr.Row(visible=False) as speech_recognition:
|
125 |
with gr.Column(min_width=700):
|
126 |
with gr.Accordion("Record an Audio", open=True):
|
|
|
131 |
with gr.Column(min_width=700):
|
132 |
output = gr.Label(label="Transcription")
|
133 |
|
134 |
+
|
135 |
+
inputRecord.stop_recording(transcribe_speech, [inputRecord, language], [output])
|
136 |
+
inputUpload.upload(transcribe_speech, [inputUpload, language], [output])
|
137 |
+
|
138 |
+
examplesSpeechEn = gr.Examples(fn=transcribe_speech, examples=examples_speech_recognition_en, inputs=[inputRecord], outputs=[output], run_on_click=True, label="English Examples")
|
139 |
+
# examplesSpeechPtbr = gr.Examples(fn=transcribe_speech, examples=examples_speech_recognition_ptbr, inputs=[inputRecord], outputs=[output], run_on_click=True, label="Portuguese Examples")
|
140 |
+
|
141 |
with gr.Row(visible=False) as chatbot_qa:
|
142 |
chatbot = gr.Chatbot(
|
143 |
[],
|
144 |
elem_id="chatbot",
|
145 |
bubble_full_width=False,
|
146 |
+
avatar_images=(None, "content/avatar-socialear.png"),
|
147 |
min_width=2000
|
148 |
)
|
149 |
with gr.Row(min_width=2000):
|
|
|
154 |
container=False,
|
155 |
min_width=1000
|
156 |
)
|
157 |
+
submit = gr.Button(value="", size='sm', scale=1, icon='content/send-icon.png')
|
158 |
+
|
159 |
+
|
160 |
+
inputRecord = gr.Audio(label="Record a question", source="microphone", type="filepath", min_width=600)
|
161 |
+
btn = gr.Button(value="Listen the answer")
|
162 |
+
audioOutput = gr.Audio(interactive=False, min_width=600)
|
163 |
+
|
164 |
+
txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
|
165 |
+
chatbot_response, [chatbot, language], chatbot)
|
166 |
+
txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
|
167 |
+
submit.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
|
168 |
+
chatbot_response, [chatbot, language], chatbot).then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
|
169 |
+
inputRecord.stop_recording(transcribe_speech, [inputRecord, language], [txt])
|
170 |
+
btn.click(fn=TTS_chatbot, inputs=[language], outputs=audioOutput)
|
171 |
+
|
172 |
+
with gr.Row(min_width=2000):
|
173 |
+
examplesChatbotEn = gr.Examples(examples=examples_chatbot_en, inputs=[txt], label="English Examples")
|
174 |
+
examplesChatbotPtbr = gr.Examples(examples=examples_chatbot_ptbr, inputs=[txt], label="Portuguese Examples")
|
175 |
|
176 |
|
177 |
btn0.click(fn=to_audioClassification, outputs=[audio_classification, realtime_classification, speech_recognition, chatbot_qa])
|