Spaces:
Paused
Paused
MikeTangoEcho
commited on
Commit
•
c092255
1
Parent(s):
75b7975
fix: app.py
Browse files
app.py
CHANGED
@@ -57,7 +57,16 @@ def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
|
|
57 |
# https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
|
58 |
# Whisper input format for tuple differ from output provided by gradio audio component
|
59 |
if asr_model.startswith("openai/whisper"):
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
transcript = asr(inputs)
|
62 |
text = transcript['text']
|
63 |
|
|
|
57 |
# https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
|
58 |
# Whisper input format for tuple differ from output provided by gradio audio component
|
59 |
if asr_model.startswith("openai/whisper"):
|
60 |
+
sampling_rate, raw = audio
|
61 |
+
|
62 |
+
# Convert to mono if stereo
|
63 |
+
if raw.ndim > 1:
|
64 |
+
raw = raw.mean(axis=1)
|
65 |
+
|
66 |
+
raw = raw.astype(np.float32)
|
67 |
+
raw /= np.max(np.abs(raw))
|
68 |
+
|
69 |
+
inputs = {"sampling_rate": sampling_rate, "raw": raw} if type(audio) is tuple else audio
|
70 |
transcript = asr(inputs)
|
71 |
text = transcript['text']
|
72 |
|