Spaces:

MikeTangoEcho
/

asrnersbx

Paused

App Files Files Community

MikeTangoEcho commited on Nov 16

Commit

79c8857

•

1 Parent(s): 11efa99

feat: update app

Browse files

Files changed (1) hide show

app.py +47 -9

app.py CHANGED Viewed

@@ -47,41 +47,56 @@ tc = pipeline(
 # - or a bytes object (recommended for streaming),
 # - or a tuple of (sample rate in Hz, audio data as numpy array)
 def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
     if audio is None:
         return "..."
     # TODO Manage str/Path
-    logger.debug("====> Transcribe")
     text = ""
     # https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
     # Whisper input format for tuple differ from output provided by gradio audio component
-    if asr_model.startswith("openai/whisper"):
         sampling_rate, raw = audio
         # Convert to mono if stereo
         if raw.ndim > 1:
             raw = raw.mean(axis=1)
-        raw = raw.astype(np.float32) # Convert to asr_torch_dtype
         raw /= np.max(np.abs(raw))
-        inputs = {"sampling_rate": sampling_rate, "raw": raw} # if type(audio) is tuple else audio
         logger.debug(inputs)
         transcript = asr(inputs)
         text = transcript['text']
-    logger.debug("====> Tokenize:[" + text + "]")
     entities = tc(text)
-    #logger.debug("Classify:[" + entities + "]")
     # TODO Add Text Classification for sentiment analysis
     return {"text": text, "entities": entities}
 # ---
 # Gradio
@@ -96,8 +111,8 @@ input_audio = gr.Audio(
 ## App
-gradio_app = gr.Interface(
-    transcribe,
     inputs=[
         input_audio
     ],
@@ -111,5 +126,28 @@ gradio_app = gr.Interface(
     flagging_mode="never"
 )
 ## Start!
 gradio_app.launch()

 # - or a bytes object (recommended for streaming),
 # - or a tuple of (sample rate in Hz, audio data as numpy array)
 def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
+    logger.debug(">Transcribe")
     if audio is None:
         return "..."
     # TODO Manage str/Path
     text = ""
     # https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
     # Whisper input format for tuple differ from output provided by gradio audio component
+    if asr_model.startswith("openai/whisper") and type(audio) is tuple:
         sampling_rate, raw = audio
         # Convert to mono if stereo
         if raw.ndim > 1:
             raw = raw.mean(axis=1)
+        # Convert according to asr_torch_dtype
+        raw = raw.astype(np.float16 if type(asr_torch_dtype) is torch.float16 else np.float32)
         raw /= np.max(np.abs(raw))
+        inputs = {"sampling_rate": sampling_rate, "raw": raw}
         logger.debug(inputs)
         transcript = asr(inputs)
         text = transcript['text']
+    logger.debug(text)
+    return text
+def tokenize(text: str):
+    logger.debug(">Tokenize")
     entities = tc(text)
+    logger.debug(entities)
     # TODO Add Text Classification for sentiment analysis
     return {"text": text, "entities": entities}
+def classify(text: str):
+    logger.debug(">Classify")
+    return None
+def transcribe_tokenize(*arg):
+    return tokenize(transcribe(arg))
 # ---
 # Gradio
 ## App
+asrner_app = gr.Interface(
+    transcribe_tokenize,
     inputs=[
         input_audio
     ],
     flagging_mode="never"
 )
+ner_app = gr.Interface(
+    tokenize,
+    inputs=[
+        gr.Textbox()
+    ],
+    outputs=[
+        gr.HighlightedText()
+    ],
+    title="NERSBX",
+    description=(
+        "Tokenize, Classify"
+    ),
+    flagging_mode="never"
+)
+gradio_app = gr.TabbedInterface(
+    interface_list=[
+        asrner_app,
+        ner_app
+    ],
+    title="ASRNERSBX"
+)
 ## Start!
 gradio_app.launch()