MikeTangoEcho commited on
Commit
11efa99
1 Parent(s): c092255

fix: app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -51,7 +51,7 @@ def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
51
  return "..."
52
  # TODO Manage str/Path
53
 
54
- logger.debug("Transcribe")
55
 
56
  text = ""
57
  # https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
@@ -63,14 +63,17 @@ def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
63
  if raw.ndim > 1:
64
  raw = raw.mean(axis=1)
65
 
66
- raw = raw.astype(np.float32)
67
  raw /= np.max(np.abs(raw))
68
 
69
- inputs = {"sampling_rate": sampling_rate, "raw": raw} if type(audio) is tuple else audio
 
 
 
70
  transcript = asr(inputs)
71
  text = transcript['text']
72
 
73
- logger.debug("Tokenize:[" + text + "]")
74
 
75
  entities = tc(text)
76
 
 
51
  return "..."
52
  # TODO Manage str/Path
53
 
54
+ logger.debug("====> Transcribe")
55
 
56
  text = ""
57
  # https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
 
63
  if raw.ndim > 1:
64
  raw = raw.mean(axis=1)
65
 
66
+ raw = raw.astype(np.float32) # Convert to asr_torch_dtype
67
  raw /= np.max(np.abs(raw))
68
 
69
+ inputs = {"sampling_rate": sampling_rate, "raw": raw} # if type(audio) is tuple else audio
70
+
71
+ logger.debug(inputs)
72
+
73
  transcript = asr(inputs)
74
  text = transcript['text']
75
 
76
+ logger.debug("====> Tokenize:[" + text + "]")
77
 
78
  entities = tc(text)
79