txya900619 commited on
Commit
7b59ebe
1 Parent(s): c26bd10

feat: init upload

Browse files
Files changed (3) hide show
  1. app.py +126 -0
  2. configs/models.yaml +15 -0
  3. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import gradio as gr
4
+ from omegaconf import OmegaConf
5
+ from huggingface_hub import snapshot_download
6
+ from vosk import Model, KaldiRecognizer
7
+
8
+ def load_vosk(model_id: str):
9
+ model_dir = snapshot_download(model_id)
10
+ return Model(model_path=model_dir)
11
+
12
+ OmegaConf.register_new_resolver("load_vosk", load_vosk)
13
+
14
+ models_config = OmegaConf.to_object(OmegaConf.load("configs/models.yaml"))
15
+
16
+ def automatic_speech_recognition(model_id: str, dialect_id: str, audio_data: str):
17
+ model = models_config[model_id]["model"][dialect_id]
18
+ sample_rate, audio_array = audio_data
19
+ audio_bytes = audio_array.tobytes()
20
+
21
+ rec = KaldiRecognizer(model, sample_rate)
22
+
23
+ rec.SetWords(True)
24
+
25
+ results = []
26
+
27
+ for start in range(0, len(audio_bytes), 4000):
28
+ end = min(start + 4000, len(audio_bytes))
29
+ data = audio_bytes[start:end]
30
+ if rec.AcceptWaveform(data):
31
+ raw_result = json.loads(rec.Result())
32
+ results.append(raw_result)
33
+
34
+ final_result = json.loads(rec.FinalResult())
35
+ results.append(final_result)
36
+
37
+ filtered_lines = []
38
+
39
+ for result in results:
40
+ result["text"] = result["text"].replace(" ", "")
41
+ if len(result["text"]) > 0:
42
+ filtered_lines.append(result["text"])
43
+
44
+
45
+ return ",".join(filtered_lines) + "。"
46
+
47
+
48
+ def when_model_selected(model_id: str):
49
+ model_config = models_config[model_id]
50
+
51
+ dialect_drop_down_choices = [
52
+ (k, v) for k, v in model_config["dialect_mapping"].items()
53
+ ]
54
+
55
+ return gr.update(
56
+ choices=dialect_drop_down_choices,
57
+ value=dialect_drop_down_choices[0][1],
58
+ )
59
+
60
+
61
+ demo = gr.Blocks(
62
+ title="臺灣客語語音辨識系統",
63
+ css="@import url(https://tauhu.tw/tauhu-oo.css);",
64
+ theme=gr.themes.Default(
65
+ font=(
66
+ "tauhu-oo",
67
+ gr.themes.GoogleFont("Source Sans Pro"),
68
+ "ui-sans-serif",
69
+ "system-ui",
70
+ "sans-serif",
71
+ )
72
+ ),
73
+ )
74
+
75
+ with demo:
76
+ default_model_id = list(models_config.keys())[0]
77
+ model_drop_down = gr.Dropdown(
78
+ models_config.keys(),
79
+ value=default_model_id,
80
+ label="模型",
81
+ )
82
+
83
+ dialect_drop_down = gr.Dropdown(
84
+ choices=[
85
+ (k, v)
86
+ for k, v in models_config[default_model_id]["dialect_mapping"].items()
87
+ ],
88
+ value=list(models_config[default_model_id]["dialect_mapping"].values())[0],
89
+ label="腔調",
90
+ )
91
+
92
+ model_drop_down.input(
93
+ when_model_selected,
94
+ inputs=[model_drop_down],
95
+ outputs=[dialect_drop_down],
96
+ )
97
+
98
+ gr.Markdown(
99
+ """
100
+ # 臺灣客語語音辨識系統
101
+ ### Taiwanese Hakka Automatic-Speech-Recognition System
102
+ ### 研發
103
+ - **[李鴻欣 Hung-Shin Lee](mailto:hungshinlee@gmail.com)(諾思資訊 North Co., Ltd.)**
104
+ - **[陳力瑋 Li-Wei Chen](mailto:wayne900619@gmail.com)(諾思資訊 North Co., Ltd.)**
105
+ """
106
+ )
107
+ gr.Interface(
108
+ automatic_speech_recognition,
109
+ inputs=[
110
+ model_drop_down,
111
+ dialect_drop_down,
112
+ gr.Audio(
113
+ label="上傳或錄音",
114
+ type="numpy",
115
+ waveform_options=gr.WaveformOptions(
116
+ sample_rate=16000,
117
+ ),
118
+ ),
119
+ ],
120
+ outputs=[
121
+ gr.Text(interactive=False, label="客語漢字"),
122
+ ],
123
+ allow_flagging="auto",
124
+ )
125
+
126
+ demo.launch()
configs/models.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ htia-0.1:
2
+ model:
3
+ htia_sixian: ${load_vosk:formospeech/kaldi-taiwanese-hakka-sixian}
4
+ htia_hailu: ${load_vosk:formospeech/kaldi-taiwanese-hakka-hailu}
5
+ htia_dapu: ${load_vosk:formospeech/kaldi-taiwanese-hakka-dapu}
6
+ htia_raoping: ${load_vosk:formospeech/kaldi-taiwanese-hakka-raoping}
7
+ htia_zhaoan: ${load_vosk:formospeech/kaldi-taiwanese-hakka-zhaoan}
8
+ htia_nansixian: ${load_vosk:formospeech/kaldi-taiwanese-hakka-nansixian}
9
+ dialect_mapping:
10
+ 四縣: htia_sixian
11
+ 海陸: htia_hailu
12
+ 大埔: htia_dapu
13
+ 饒平: htia_raoping
14
+ 詔安: htia_zhaoan
15
+ 南四縣: htia_nansixian
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ vosk
2
+ omegaconf