Spaces:

MikeTangoEcho
/

asrnersbx

Paused

MikeTangoEcho commited on Nov 10

Commit

11efa99

•

1 Parent(s): c092255

fix: app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -51,7 +51,7 @@ def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
         return "..."
     # TODO Manage str/Path
-    logger.debug("Transcribe")
     text = ""
     # https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
@@ -63,14 +63,17 @@ def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
         if raw.ndim > 1:
             raw = raw.mean(axis=1)
-        raw = raw.astype(np.float32)
         raw /= np.max(np.abs(raw))
-        inputs = {"sampling_rate": sampling_rate, "raw": raw} if type(audio) is tuple else audio
         transcript = asr(inputs)
         text = transcript['text']
-    logger.debug("Tokenize:[" + text + "]")
     entities = tc(text)

         return "..."
     # TODO Manage str/Path
+    logger.debug("====> Transcribe")
     text = ""
     # https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
         if raw.ndim > 1:
             raw = raw.mean(axis=1)
+        raw = raw.astype(np.float32) # Convert to asr_torch_dtype
         raw /= np.max(np.abs(raw))
+        inputs = {"sampling_rate": sampling_rate, "raw": raw} # if type(audio) is tuple else audio
+        logger.debug(inputs)
         transcript = asr(inputs)
         text = transcript['text']
+    logger.debug("====> Tokenize:[" + text + "]")
     entities = tc(text)