Spaces:
Sleeping
Sleeping
txya900619
commited on
Commit
•
4902da8
1
Parent(s):
7b59ebe
fix: multi channel audio can't recog bug
Browse files
app.py
CHANGED
@@ -16,6 +16,9 @@ models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml"))
|
|
16 |
def automatic_speech_recognition(model_id: str, dialect_id: str, audio_data: str):
|
17 |
model = models_config[model_id]["model"][dialect_id]
|
18 |
sample_rate, audio_array = audio_data
|
|
|
|
|
|
|
19 |
audio_bytes = audio_array.tobytes()
|
20 |
|
21 |
rec = KaldiRecognizer(model, sample_rate)
|
@@ -100,8 +103,8 @@ with demo:
|
|
100 |
# 臺灣客語語音辨識系統
|
101 |
### Taiwanese Hakka Automatic-Speech-Recognition System
|
102 |
### 研發
|
103 |
-
- **[李鴻欣 Hung-Shin Lee](mailto:hungshinlee@gmail.com)
|
104 |
-
- **[陳力瑋 Li-Wei Chen](mailto:wayne900619@gmail.com)
|
105 |
"""
|
106 |
)
|
107 |
gr.Interface(
|
@@ -112,6 +115,7 @@ with demo:
|
|
112 |
gr.Audio(
|
113 |
label="上傳或錄音",
|
114 |
type="numpy",
|
|
|
115 |
waveform_options=gr.WaveformOptions(
|
116 |
sample_rate=16000,
|
117 |
),
|
|
|
16 |
def automatic_speech_recognition(model_id: str, dialect_id: str, audio_data: str):
|
17 |
model = models_config[model_id]["model"][dialect_id]
|
18 |
sample_rate, audio_array = audio_data
|
19 |
+
if audio_array.ndim == 2:
|
20 |
+
audio_array = audio_array[:, 0]
|
21 |
+
|
22 |
audio_bytes = audio_array.tobytes()
|
23 |
|
24 |
rec = KaldiRecognizer(model, sample_rate)
|
|
|
103 |
# 臺灣客語語音辨識系統
|
104 |
### Taiwanese Hakka Automatic-Speech-Recognition System
|
105 |
### 研發
|
106 |
+
- **[李鴻欣 Hung-Shin Lee](mailto:hungshinlee@gmail.com)(聯和科創 United Link Co., Ltd.)**
|
107 |
+
- **[陳力瑋 Li-Wei Chen](mailto:wayne900619@gmail.com)(聯和科創 United Link Co., Ltd.)**
|
108 |
"""
|
109 |
)
|
110 |
gr.Interface(
|
|
|
115 |
gr.Audio(
|
116 |
label="上傳或錄音",
|
117 |
type="numpy",
|
118 |
+
format="wav",
|
119 |
waveform_options=gr.WaveformOptions(
|
120 |
sample_rate=16000,
|
121 |
),
|