Spaces:
Paused
Paused
MikeTangoEcho
commited on
Commit
•
79c8857
1
Parent(s):
11efa99
feat: update app
Browse files
app.py
CHANGED
@@ -47,41 +47,56 @@ tc = pipeline(
|
|
47 |
# - or a bytes object (recommended for streaming),
|
48 |
# - or a tuple of (sample rate in Hz, audio data as numpy array)
|
49 |
def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
|
|
|
|
|
50 |
if audio is None:
|
51 |
return "..."
|
52 |
# TODO Manage str/Path
|
53 |
|
54 |
-
logger.debug("====> Transcribe")
|
55 |
-
|
56 |
text = ""
|
|
|
57 |
# https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
|
58 |
# Whisper input format for tuple differ from output provided by gradio audio component
|
59 |
-
if asr_model.startswith("openai/whisper"):
|
60 |
sampling_rate, raw = audio
|
61 |
|
62 |
# Convert to mono if stereo
|
63 |
if raw.ndim > 1:
|
64 |
raw = raw.mean(axis=1)
|
65 |
|
66 |
-
|
|
|
67 |
raw /= np.max(np.abs(raw))
|
68 |
|
69 |
-
inputs = {"sampling_rate": sampling_rate, "raw": raw}
|
70 |
|
71 |
logger.debug(inputs)
|
72 |
|
73 |
transcript = asr(inputs)
|
74 |
text = transcript['text']
|
75 |
|
76 |
-
logger.debug(
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
entities = tc(text)
|
79 |
|
80 |
-
|
81 |
|
82 |
# TODO Add Text Classification for sentiment analysis
|
83 |
return {"text": text, "entities": entities}
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
# ---
|
86 |
|
87 |
# Gradio
|
@@ -96,8 +111,8 @@ input_audio = gr.Audio(
|
|
96 |
|
97 |
## App
|
98 |
|
99 |
-
|
100 |
-
|
101 |
inputs=[
|
102 |
input_audio
|
103 |
],
|
@@ -111,5 +126,28 @@ gradio_app = gr.Interface(
|
|
111 |
flagging_mode="never"
|
112 |
)
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
## Start!
|
115 |
gradio_app.launch()
|
|
|
47 |
# - or a bytes object (recommended for streaming),
|
48 |
# - or a tuple of (sample rate in Hz, audio data as numpy array)
|
49 |
def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
|
50 |
+
logger.debug(">Transcribe")
|
51 |
+
|
52 |
if audio is None:
|
53 |
return "..."
|
54 |
# TODO Manage str/Path
|
55 |
|
|
|
|
|
56 |
text = ""
|
57 |
+
|
58 |
# https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
|
59 |
# Whisper input format for tuple differ from output provided by gradio audio component
|
60 |
+
if asr_model.startswith("openai/whisper") and type(audio) is tuple:
|
61 |
sampling_rate, raw = audio
|
62 |
|
63 |
# Convert to mono if stereo
|
64 |
if raw.ndim > 1:
|
65 |
raw = raw.mean(axis=1)
|
66 |
|
67 |
+
# Convert according to asr_torch_dtype
|
68 |
+
raw = raw.astype(np.float16 if type(asr_torch_dtype) is torch.float16 else np.float32)
|
69 |
raw /= np.max(np.abs(raw))
|
70 |
|
71 |
+
inputs = {"sampling_rate": sampling_rate, "raw": raw}
|
72 |
|
73 |
logger.debug(inputs)
|
74 |
|
75 |
transcript = asr(inputs)
|
76 |
text = transcript['text']
|
77 |
|
78 |
+
logger.debug(text)
|
79 |
+
|
80 |
+
return text
|
81 |
+
|
82 |
+
def tokenize(text: str):
|
83 |
+
logger.debug(">Tokenize")
|
84 |
|
85 |
entities = tc(text)
|
86 |
|
87 |
+
logger.debug(entities)
|
88 |
|
89 |
# TODO Add Text Classification for sentiment analysis
|
90 |
return {"text": text, "entities": entities}
|
91 |
|
92 |
+
def classify(text: str):
|
93 |
+
logger.debug(">Classify")
|
94 |
+
|
95 |
+
return None
|
96 |
+
|
97 |
+
def transcribe_tokenize(*arg):
|
98 |
+
return tokenize(transcribe(arg))
|
99 |
+
|
100 |
# ---
|
101 |
|
102 |
# Gradio
|
|
|
111 |
|
112 |
## App
|
113 |
|
114 |
+
asrner_app = gr.Interface(
|
115 |
+
transcribe_tokenize,
|
116 |
inputs=[
|
117 |
input_audio
|
118 |
],
|
|
|
126 |
flagging_mode="never"
|
127 |
)
|
128 |
|
129 |
+
ner_app = gr.Interface(
|
130 |
+
tokenize,
|
131 |
+
inputs=[
|
132 |
+
gr.Textbox()
|
133 |
+
],
|
134 |
+
outputs=[
|
135 |
+
gr.HighlightedText()
|
136 |
+
],
|
137 |
+
title="NERSBX",
|
138 |
+
description=(
|
139 |
+
"Tokenize, Classify"
|
140 |
+
),
|
141 |
+
flagging_mode="never"
|
142 |
+
)
|
143 |
+
|
144 |
+
gradio_app = gr.TabbedInterface(
|
145 |
+
interface_list=[
|
146 |
+
asrner_app,
|
147 |
+
ner_app
|
148 |
+
],
|
149 |
+
title="ASRNERSBX"
|
150 |
+
)
|
151 |
+
|
152 |
## Start!
|
153 |
gradio_app.launch()
|