Beomseok-LEE
commited on
Commit
•
c6ba3b6
1
Parent(s):
b3f8d37
Update example table front-end and logic, adding cache example
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import librosa
|
|
|
4 |
|
5 |
from asr.run_asr import run_asr_inference, load_asr_model
|
6 |
from nlu.run_nlu import run_nlu_inference, load_nlu_model
|
@@ -18,6 +19,8 @@ description=[
|
|
18 |
f"""For more details on the implementation, check our {blog_post_link}.""",
|
19 |
]
|
20 |
|
|
|
|
|
21 |
examples = [
|
22 |
"resources/audios/utt_286.wav",
|
23 |
"resources/audios/utt_2414.wav",
|
@@ -59,6 +62,7 @@ slots = [
|
|
59 |
[ 'Other', 'Other', 'Other', 'Other', 'Other', 'Other', 'Other', 'media_type', 'media_type', 'media_type']
|
60 |
]
|
61 |
|
|
|
62 |
|
63 |
utter_ack_text = """This is an output of the European Project UTTER (Unified Transcription and Translation for Extended Reality) funded by European Union’s Horizon Europe Research and Innovation programme under grant agreement number 101070631.
|
64 |
For more information please visit https://he-utter.eu/"""
|
@@ -71,19 +75,6 @@ nle_logo = """<a href="https://europe.naverlabs.com/" target="_blank"><img src="
|
|
71 |
fbk_logo = """<a href="https://mt.fbk.eu/" target="_blank"><img src="https://huggingface.co/spaces/naver/French-SLU-DEMO-Interspeech2024/resolve/main/resources/logos/FBK_logo.png" width="100" height="100"></a>"""
|
72 |
|
73 |
|
74 |
-
table = f"""
|
75 |
-
| File | Transcription | Slots | Intent |
|
76 |
-
| ------------ | ------------------- | ---------- | -----------|
|
77 |
-
| {examples[0].split("/")[-1]} | {transcriptions[0]} | {slots[0]} | {intents[0]} |
|
78 |
-
| {examples[1].split("/")[-1]} | {transcriptions[1]} | {slots[1]} | {intents[1]} |
|
79 |
-
| {examples[2].split("/")[-1]} | {transcriptions[2]} | {slots[2]} | {intents[2]} |
|
80 |
-
| {examples[3].split("/")[-1]} | {transcriptions[3]} | {slots[3]} | {intents[3]} |
|
81 |
-
| {examples[4].split("/")[-1]} | {transcriptions[4]} | {slots[4]} | {intents[4]} |
|
82 |
-
| {examples[5].split("/")[-1]} | {transcriptions[5]} | {slots[5]} | {intents[5]} |
|
83 |
-
| {examples[6].split("/")[-1]} | {transcriptions[6]} | {slots[6]} | {intents[6]} |
|
84 |
-
| {examples[7].split("/")[-1]} | {transcriptions[7]} | {slots[7]} | {intents[7]} |
|
85 |
-
""".strip()
|
86 |
-
|
87 |
############### calls
|
88 |
|
89 |
def run_inference(audio_file):
|
@@ -126,6 +117,10 @@ with demo:
|
|
126 |
show_share_button=False,
|
127 |
max_length=20,
|
128 |
)
|
|
|
|
|
|
|
|
|
129 |
|
130 |
output = gr.HighlightedText(label="ASR result + NLU result")
|
131 |
|
@@ -137,8 +132,12 @@ with demo:
|
|
137 |
)
|
138 |
|
139 |
with gr.Row():
|
140 |
-
gr.Examples(
|
141 |
-
|
|
|
|
|
|
|
|
|
142 |
|
143 |
gr.Markdown("# Aknowledgments")
|
144 |
gr.Markdown(utter_ack_text)
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import librosa
|
4 |
+
import os
|
5 |
|
6 |
from asr.run_asr import run_asr_inference, load_asr_model
|
7 |
from nlu.run_nlu import run_nlu_inference, load_nlu_model
|
|
|
19 |
f"""For more details on the implementation, check our {blog_post_link}.""",
|
20 |
]
|
21 |
|
22 |
+
CACHE_EXAMPLES = os.getenv("CACHE_EXAMPLES") == "1"
|
23 |
+
|
24 |
examples = [
|
25 |
"resources/audios/utt_286.wav",
|
26 |
"resources/audios/utt_2414.wav",
|
|
|
62 |
[ 'Other', 'Other', 'Other', 'Other', 'Other', 'Other', 'Other', 'media_type', 'media_type', 'media_type']
|
63 |
]
|
64 |
|
65 |
+
example_list = [[example, transcription, slot, intent] for example, transcription, slot, intent in zip(examples, transcriptions, slots, intents)]
|
66 |
|
67 |
utter_ack_text = """This is an output of the European Project UTTER (Unified Transcription and Translation for Extended Reality) funded by European Union’s Horizon Europe Research and Innovation programme under grant agreement number 101070631.
|
68 |
For more information please visit https://he-utter.eu/"""
|
|
|
75 |
fbk_logo = """<a href="https://mt.fbk.eu/" target="_blank"><img src="https://huggingface.co/spaces/naver/French-SLU-DEMO-Interspeech2024/resolve/main/resources/logos/FBK_logo.png" width="100" height="100"></a>"""
|
76 |
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
############### calls
|
79 |
|
80 |
def run_inference(audio_file):
|
|
|
117 |
show_share_button=False,
|
118 |
max_length=20,
|
119 |
)
|
120 |
+
with gr.Row(visible=False):
|
121 |
+
_transcription = gr.Textbox(label="Transcription")
|
122 |
+
_slot = gr.Textbox(label="Slots")
|
123 |
+
_intent = gr.Textbox(label="Intent")
|
124 |
|
125 |
output = gr.HighlightedText(label="ASR result + NLU result")
|
126 |
|
|
|
132 |
)
|
133 |
|
134 |
with gr.Row():
|
135 |
+
gr.Examples(
|
136 |
+
label="Examples(Speech-MASSIVE test utterances):",
|
137 |
+
examples=example_list,
|
138 |
+
inputs=[audio_file, _transcription, _slot, _intent],
|
139 |
+
cache_examples=CACHE_EXAMPLES,
|
140 |
+
)
|
141 |
|
142 |
gr.Markdown("# Aknowledgments")
|
143 |
gr.Markdown(utter_ack_text)
|