txya900619 commited on
Commit
4902da8
1 Parent(s): 7b59ebe

fix: multi channel audio can't recog bug

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -16,6 +16,9 @@ models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml"))
16
  def automatic_speech_recognition(model_id: str, dialect_id: str, audio_data: str):
17
  model = models_config[model_id]["model"][dialect_id]
18
  sample_rate, audio_array = audio_data
 
 
 
19
  audio_bytes = audio_array.tobytes()
20
 
21
  rec = KaldiRecognizer(model, sample_rate)
@@ -100,8 +103,8 @@ with demo:
100
  # 臺灣客語語音辨識系統
101
  ### Taiwanese Hakka Automatic-Speech-Recognition System
102
  ### 研發
103
- - **[李鴻欣 Hung-Shin Lee](mailto:hungshinlee@gmail.com)(諾思資訊 North Co., Ltd.)**
104
- - **[陳力瑋 Li-Wei Chen](mailto:wayne900619@gmail.com)(諾思資訊 North Co., Ltd.)**
105
  """
106
  )
107
  gr.Interface(
@@ -112,6 +115,7 @@ with demo:
112
  gr.Audio(
113
  label="上傳或錄音",
114
  type="numpy",
 
115
  waveform_options=gr.WaveformOptions(
116
  sample_rate=16000,
117
  ),
 
16
  def automatic_speech_recognition(model_id: str, dialect_id: str, audio_data: str):
17
  model = models_config[model_id]["model"][dialect_id]
18
  sample_rate, audio_array = audio_data
19
+ if audio_array.ndim == 2:
20
+ audio_array = audio_array[:, 0]
21
+
22
  audio_bytes = audio_array.tobytes()
23
 
24
  rec = KaldiRecognizer(model, sample_rate)
 
103
  # 臺灣客語語音辨識系統
104
  ### Taiwanese Hakka Automatic-Speech-Recognition System
105
  ### 研發
106
+ - **[李鴻欣 Hung-Shin Lee](mailto:hungshinlee@gmail.com)(聯和科創 United Link Co., Ltd.)**
107
+ - **[陳力瑋 Li-Wei Chen](mailto:wayne900619@gmail.com)(聯和科創 United Link Co., Ltd.)**
108
  """
109
  )
110
  gr.Interface(
 
115
  gr.Audio(
116
  label="上傳或錄音",
117
  type="numpy",
118
+ format="wav",
119
  waveform_options=gr.WaveformOptions(
120
  sample_rate=16000,
121
  ),