mychen76 commited on
Commit
03fa999
1 Parent(s): 962202b
Files changed (2) hide show
  1. README.md +9 -0
  2. available-vosk-models.json +415 -0
README.md CHANGED
@@ -1,3 +1,12 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+
5
+ ## Model list
6
+
7
+ This is the list of models compatible with Vosk-API.
8
+
9
+ Two types of models - big and small, small models are ideal for some limited task on mobile applications. They can run on smartphones, Raspberry Pi’s. They are also recommended for desktop applications. Small model typically is around 50Mb in size and requires about 300Mb of memory in runtime. Big models are for the high-accuracy transcription on the server. Most small model allow dynamic vocabulary reconfiguration. Big models are static the vocabulary can not be modified in runtime.
10
+
11
+ ## Credits:
12
+ alphacephei
available-vosk-models.json ADDED
@@ -0,0 +1,415 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "english": [
3
+ {
4
+ "vosk_model_id": "vosk-model-small-en-us-0.15",
5
+ "vosk_model_language": "en",
6
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip",
7
+ "vosk_model_size": "1.8G",
8
+ "vosk_model_word_error_rate_and_speed": "9.85 (librispeech test-clean) 10.38 (tedlium)",
9
+ "vosk_model_notes": "Lightweight wideband model for Android and RPi ",
10
+ "vosk_model_licenses": "Apache 2.0"
11
+ },
12
+ {
13
+ "vosk_model_id": "vosk-model-en-us-0.22",
14
+ "vosk_model_language": "en",
15
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip",
16
+ "vosk_model_size": "40M",
17
+ "vosk_model_word_error_rate_and_speed": "5.69 (librispeech test-clean) 6.05 (tedlium) 29.78(callcenter)",
18
+ "vosk_model_notes": "Accurate generic US English model",
19
+ "vosk_model_licenses": "Apache 2.0"
20
+ },
21
+ {
22
+ "vosk_model_id": "vosk-model-en-us-0.22-lgraph",
23
+ "vosk_model_language": "en",
24
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.22-lgraph.zip",
25
+ "vosk_model_size": "128M",
26
+ "vosk_model_word_error_rate_and_speed": "7.82 (librispeech) 8.20 (tedlium)",
27
+ "vosk_model_notes": "Big US English model with dynamic graph",
28
+ "vosk_model_licenses": "Apache 2.0"
29
+ },
30
+ {
31
+ "vosk_model_id": "vosk-model-en-us-0.42-gigaspeech",
32
+ "vosk_model_language": "en",
33
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.42-gigaspeech.zip",
34
+ "vosk_model_size": "2.3G",
35
+ "vosk_model_word_error_rate_and_speed": "5.64 (librispeech test-clean) 6.24 (tedlium) 30.17 (callcenter)",
36
+ "vosk_model_notes": "Accurate generic US English model trained by Kaldi on Gigaspeech. Mostly for podcasts, not for telephony",
37
+ "vosk_model_licenses": "Apache 2.0"
38
+ },
39
+ {
40
+ "vosk_model_id": "vosk-model-en-in-0.5",
41
+ "vosk_model_language": "en-indian",
42
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-in-0.5.zip",
43
+ "vosk_model_size": "1G",
44
+ "vosk_model_word_error_rate_and_speed": "36.12 (NPTEL Pure)",
45
+ "vosk_model_notes": "Generic Indian English model for telecom and broadcast",
46
+ "vosk_model_licenses": "Apache 2.0"
47
+ },
48
+ {
49
+ "vosk_model_id": "vosk-model-small-en-in-0.4",
50
+ "vosk_model_language": "en-indian",
51
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-en-in-0.4.zip",
52
+ "vosk_model_size": "36M",
53
+ "vosk_model_word_error_rate_and_speed": "49.05 (NPTEL Pure)",
54
+ "vosk_model_notes": "Lightweight Indian English model for mobile applications",
55
+ "vosk_model_licenses": "Apache 2.0"
56
+ }
57
+ ],
58
+ "chinese": [
59
+ {
60
+ "vosk_model_id": "vosk-model-small-cn-0.22",
61
+ "vosk_model_language": "cn",
62
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip",
63
+ "vosk_model_size": "42M",
64
+ "vosk_model_word_error_rate_and_speed": "23.54 (SpeechIO-02) 38.29 (SpeechIO-06) 17.15 (THCHS)",
65
+ "vosk_model_notes": "Lightweight model for Android and RPi",
66
+ "vosk_model_licenses": "Apache 2.0"
67
+ },
68
+ {
69
+ "vosk_model_id": "vosk-model-cn-0.22",
70
+ "vosk_model_language": "cn",
71
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-cn-0.22.zip",
72
+ "vosk_model_size": "1.3G",
73
+ "vosk_model_word_error_rate_and_speed": "13.98 (SpeechIO-02) 27.30 (SpeechIO-06) 7.43 (THCHS)",
74
+ "vosk_model_notes": "Big generic Chinese model for server processing",
75
+ "vosk_model_licenses": "Apache 2.0"
76
+ },
77
+ {
78
+ "vosk_model_id": "vosk-model-cn-kaldi-multicn-0.15",
79
+ "vosk_model_language": "cn",
80
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-cn-kaldi-multicn-0.15.zip",
81
+ "vosk_model_size": "1.5G",
82
+ "vosk_model_word_error_rate_and_speed": "17.44 (SpeechIO-02) 9.56 (THCHS)",
83
+ "vosk_model_notes": "Original Wideband Kaldi multi-cn model from Kaldi with Vosk LM",
84
+ "vosk_model_licenses": "Apache 2.0"
85
+ }
86
+ ],
87
+ "french": [
88
+ {
89
+ "vosk_model_id": "vosk-model-small-fr-0.22",
90
+ "vosk_model_language": "fr",
91
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-fr-0.22.zip",
92
+ "vosk_model_size": "41M",
93
+ "vosk_model_word_error_rate_and_speed": "23.95 (cv test) 19.30 (mtedx) 27.25 (podcast)",
94
+ "vosk_model_notes": "Lightweight wideband model for Android/iOS and RPi",
95
+ "vosk_model_licenses": "Apache 2.0"
96
+ },
97
+ {
98
+ "vosk_model_id": "vosk-model-fr-0.22",
99
+ "vosk_model_language": "fr",
100
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-fr-0.22.zip",
101
+ "vosk_model_size": "1.4G",
102
+ "vosk_model_word_error_rate_and_speed": "14.72 (cv test) 11.64 (mls) 13.10 (mtedx) 21.61 (podcast) 13.22 (voxpopuli)",
103
+ "vosk_model_notes": "Big accurate model for servers",
104
+ "vosk_model_licenses": "Apache 2.0"
105
+ }
106
+ ],
107
+ "spanish": [
108
+ {
109
+ "vosk_model_id": "vosk-model-small-es-0.42",
110
+ "vosk_model_language": "es",
111
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-es-0.42.zip",
112
+ "vosk_model_size": "39M",
113
+ "vosk_model_word_error_rate_and_speed": "16.02 (cv test) 16.72 (mtedx test) 11.21 (mls)",
114
+ "vosk_model_notes": "Lightweight wideband model for Android/iOS and RPi",
115
+ "vosk_model_licenses": "Apache 2.0"
116
+ },
117
+ {
118
+ "vosk_model_id": "vosk-model-es-0.42",
119
+ "vosk_model_language": "es",
120
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-es-0.42.zip",
121
+ "vosk_model_size": "1.4G",
122
+ "vosk_model_word_error_rate_and_speed": "7.50 (cv test) 10.05 (mtedx test) 5.84 (mls)",
123
+ "vosk_model_notes": "Big model for Spanish",
124
+ "vosk_model_licenses": "Apache 2.0"
125
+ }
126
+ ],
127
+ "german": [
128
+ {
129
+ "vosk_model_id": "vosk-model-de-0.21",
130
+ "vosk_model_language": "de",
131
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-de-0.21.zip",
132
+ "vosk_model_size": "1.9G",
133
+ "vosk_model_word_error_rate_and_speed": "9.83 (Tuda-de test), 24.00 (podcast) 12.82 (cv-test) 12.42 (mls) 33.26 (mtedx)",
134
+ "vosk_model_notes": "Big German model for telephony and server",
135
+ "vosk_model_licenses": "Apache 2.0"
136
+ },
137
+ {
138
+ "vosk_model_id": "vosk-model-small-de-zamia-0.3",
139
+ "vosk_model_language": "de",
140
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-de-zamia-0.3.zip",
141
+ "vosk_model_size": "1.4G",
142
+ "vosk_model_word_error_rate_and_speed": "14.81 (Tuda-de test, 37.46 (podcast)",
143
+ "vosk_model_notes": "Zamia f_250 small model repackaged (not recommended)",
144
+ "vosk_model_licenses": "Apache 2.0"
145
+ },
146
+ {
147
+ "vosk_model_id": "vosk-model-small-de-0.15",
148
+ "vosk_model_language": "de",
149
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-de-zamia-0.3.zip",
150
+ "vosk_model_size": "45M",
151
+ "vosk_model_word_error_rate_and_speed": "13.75 (Tuda-de test), 30.67 (podcast)",
152
+ "vosk_model_notes": "Lightweight wideband model for Android and RPi",
153
+ "vosk_model_licenses": "Apache 2.0"
154
+ }
155
+ ],
156
+ "portuguese": [
157
+ {
158
+ "vosk_model_id": "vosk-model-small-pt-0.3",
159
+ "vosk_model_language": "pt",
160
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-pt-0.3.zip",
161
+ "vosk_model_size": "1.9G",
162
+ "vosk_model_word_error_rate_and_speed": "68.92 (coraa dev) 32.60 (cv test)",
163
+ "vosk_model_notes": "Lightweight wideband model for Android and RPi",
164
+ "vosk_model_licenses": "Apache 2.0"
165
+ },
166
+ {
167
+ "vosk_model_id": "vosk-model-pt-fb-v0.1.1-20220516_2113",
168
+ "vosk_model_language": "pt",
169
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-pt-fb-v0.1.1-20220516_2113.zip",
170
+ "vosk_model_size": "1.6G",
171
+ "vosk_model_word_error_rate_and_speed": "54.34 (coraa dev) 27.70 (cv test)",
172
+ "vosk_model_notes": "Big model from FalaBrazil",
173
+ "vosk_model_licenses": "Apache 2.0"
174
+ }
175
+ ],
176
+ "greek": [
177
+ {
178
+ "vosk_model_id": "vosk-model-el-gr-0.7",
179
+ "vosk_model_language": "gr",
180
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-el-gr-0.7.zip",
181
+ "vosk_model_size": "1.1G",
182
+ "vosk_model_word_error_rate_and_speed": "TBD",
183
+ "vosk_model_notes": "Big narrowband Greek model for server processing, not extremely accurate though",
184
+ "vosk_model_licenses": "Apache 2.0"
185
+ }
186
+ ],
187
+ "vietnamese": [
188
+ {
189
+ "vosk_model_id": "vosk-model-small-vn-0.4",
190
+ "vosk_model_language": "vn",
191
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-vn-0.4.zip",
192
+ "vosk_model_size": "32M",
193
+ "vosk_model_word_error_rate_and_speed": "15.70 (Vivos test)",
194
+ "vosk_model_notes": "Lightweight Vietnamese model",
195
+ "vosk_model_licenses": "Apache 2.0"
196
+ },
197
+ {
198
+ "vosk_model_id": "vosk-model-vn-0.4",
199
+ "vosk_model_language": "vn",
200
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-vn-0.4.zip",
201
+ "vosk_model_size": "78M",
202
+ "vosk_model_word_error_rate_and_speed": "15.70 (Vivos test)",
203
+ "vosk_model_notes": "Bigger Vietnamese model for server",
204
+ "vosk_model_licenses": "Apache 2.0"
205
+ }
206
+ ],
207
+ "italian": [
208
+ {
209
+ "vosk_model_id": "vosk-model-small-it-0.22",
210
+ "vosk_model_language": "it",
211
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-it-0.22.zip",
212
+ "vosk_model_size": "48M",
213
+ "vosk_model_word_error_rate_and_speed": "16.88 (cv test) 25.87 (mls) 17.01 (mtedx)",
214
+ "vosk_model_notes": "Lightweight model for Android and RPi",
215
+ "vosk_model_licenses": "Apache 2.0"
216
+ },
217
+ {
218
+ "vosk_model_id": "vosk-model-it-0.22",
219
+ "vosk_model_language": "it",
220
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-vn-0.4.zip",
221
+ "vosk_model_size": "1.2G",
222
+ "vosk_model_word_error_rate_and_speed": "8.10 (cv test) 15.68 (mls) 11.23 (mtedx)",
223
+ "vosk_model_notes": "Big generic Italian model for servers",
224
+ "vosk_model_licenses": "Apache 2.0"
225
+ }
226
+ ],
227
+ "dutch": [
228
+ {
229
+ "vosk_model_id": "vosk-model-small-nl-0.22",
230
+ "vosk_model_language": "nl",
231
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-nl-0.22.zip",
232
+ "vosk_model_size": "39M",
233
+ "vosk_model_word_error_rate_and_speed": "22.45 (cv test) 26.80 (tv) 25.84 (mls) 24.09 (voxpopuli)",
234
+ "vosk_model_notes": "Lightweight model for Dutch",
235
+ "vosk_model_licenses": "Apache 2.0"
236
+ }
237
+ ],
238
+ "arabic": [
239
+ {
240
+ "vosk_model_id": "vosk-model-ar-mgb2-0.4",
241
+ "vosk_model_language": "ar",
242
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-ar-mgb2-0.4.zip",
243
+ "vosk_model_size": "318M",
244
+ "vosk_model_word_error_rate_and_speed": "16.40 (MGB-2 dev set)",
245
+ "vosk_model_notes": "Repackaged Arabic model trained on MGB2 dataset from Kaldi",
246
+ "vosk_model_licenses": "Apache 2.0"
247
+ },
248
+ {
249
+ "vosk_model_id": "vosk-model-ar-0.22-linto-1.1.0",
250
+ "vosk_model_language": "ar",
251
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-ar-0.22-linto-1.1.0.zip",
252
+ "vosk_model_size": "1.3G",
253
+ "vosk_model_word_error_rate_and_speed": "16.40 (MGB-2 dev set)",
254
+ "vosk_model_notes": "52.87 (cv test) 28.50 (MBG-2 dev set) 1.0xRT",
255
+ "vosk_model_licenses": "Apache 2.0"
256
+ }
257
+ ],
258
+ "farsi": [
259
+ {
260
+ "vosk_model_id": "vosk-model-small-fa-0.4",
261
+ "vosk_model_language": "ph",
262
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-fa-0.4.zip",
263
+ "vosk_model_size": "47M",
264
+ "vosk_model_word_error_rate_and_speed": "TBD",
265
+ "vosk_model_notes": "Lightweight wideband model for Android and RPi for Farsi (Persian)",
266
+ "vosk_model_licenses": "Apache 2.0"
267
+ },
268
+ {
269
+ "vosk_model_id": "vosk-model-fa-0.5",
270
+ "vosk_model_language": "ph",
271
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-fa-0.5.zip",
272
+ "vosk_model_size": "1G",
273
+ "vosk_model_word_error_rate_and_speed": "TBD",
274
+ "vosk_model_notes": "Model with large vocabulary, not yet accurate but better than before (Persian)",
275
+ "vosk_model_licenses": "Apache 2.0"
276
+ }
277
+ ],
278
+ "filipino": [
279
+ {
280
+ "vosk_model_id": "vosk-model-tl-ph-generic-0.6",
281
+ "vosk_model_language": "ph",
282
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-tl-ph-generic-0.6.zip",
283
+ "vosk_model_size": "320M",
284
+ "vosk_model_word_error_rate_and_speed": "TBD",
285
+ "vosk_model_notes": "Medium wideband model for Filipino (Tagalog) by feddybear",
286
+ "vosk_model_licenses": "CC-BY-NC-SA 4.0"
287
+ }
288
+ ],
289
+ "ukrainian": [
290
+ {
291
+ "vosk_model_id": "vosk-model-small-uk-v3-small",
292
+ "vosk_model_language": "uk",
293
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-uk-v3-small.zip",
294
+ "vosk_model_size": "133M",
295
+ "vosk_model_word_error_rate_and_speed": "TBD",
296
+ "vosk_model_notes": "Small model from Speech Recognition for Ukrainian",
297
+ "vosk_model_licenses": "Apache 2.0"
298
+ },
299
+ {
300
+ "vosk_model_id": "vosk-model-uk-v3",
301
+ "vosk_model_language": "uk",
302
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-uk-v3.zip",
303
+ "vosk_model_size": "343M",
304
+ "vosk_model_word_error_rate_and_speed": "TBD",
305
+ "vosk_model_notes": "Bigger model from Speech Recognition for Ukrainian",
306
+ "vosk_model_licenses": "Apache 2.0"
307
+ }
308
+ ],
309
+ "swedish": [
310
+ {
311
+ "vosk_model_id": "vosk-model-small-sv-rhasspy-0.15",
312
+ "vosk_model_language": "sv",
313
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-sv-rhasspy-0.15.zip",
314
+ "vosk_model_size": "289M",
315
+ "vosk_model_word_error_rate_and_speed": "TBD",
316
+ "vosk_model_notes": "Repackaged model from Rhasspy project",
317
+ "vosk_model_licenses": "Apache 2.0"
318
+ }
319
+ ],
320
+ "japanese": [
321
+ {
322
+ "vosk_model_id": "vosk-model-small-ja-0.22",
323
+ "vosk_model_language": "ja",
324
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-ja-0.22.zip",
325
+ "vosk_model_size": "48M",
326
+ "vosk_model_word_error_rate_and_speed": "9.52(csj CER) 17.07(ted10k CER)",
327
+ "vosk_model_notes": "Lightweight wideband model for Japanese",
328
+ "vosk_model_licenses": "Apache 2.0"
329
+ },
330
+ {
331
+ "vosk_model_id": "vosk-model-ja-0.22",
332
+ "vosk_model_language": "ja",
333
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-ja-0.22.zip",
334
+ "vosk_model_size": "1Gb",
335
+ "vosk_model_word_error_rate_and_speed": "8.40(csj CER) 13.91(ted10k CER)",
336
+ "vosk_model_notes": "Big model for Japanese",
337
+ "vosk_model_licenses": "Apache 2.0"
338
+ }
339
+ ],
340
+ "hindi": [
341
+ {
342
+ "vosk_model_id": "vosk-model-small-hi-0.22",
343
+ "vosk_model_language": "hi",
344
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-hi-0.22.zip",
345
+ "vosk_model_size": "42M",
346
+ "vosk_model_word_error_rate_and_speed": "20.89 (IITM Challenge) 24.72 (MUCS Challenge)",
347
+ "vosk_model_notes": "Lightweight model for Hindi",
348
+ "vosk_model_licenses": "Apache 2.0"
349
+ },
350
+ {
351
+ "vosk_model_id": "vosk-model-hi-0.22",
352
+ "vosk_model_language": "hi",
353
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-hi-0.22.zip",
354
+ "vosk_model_size": "1.5Gb",
355
+ "vosk_model_word_error_rate_and_speed": "14.85 (CV Test) 14.83 (IITM Challenge) 13.11 (MUCS Challenge)",
356
+ "vosk_model_notes": "Big accurate model for servers",
357
+ "vosk_model_licenses": "Apache 2.0"
358
+ }
359
+ ],
360
+ "czech": [
361
+ {
362
+ "vosk_model_id": "vosk-model-small-cs-0.4-rhasspy",
363
+ "vosk_model_language": "cs",
364
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-cs-0.4-rhasspy.zip",
365
+ "vosk_model_size": "44M",
366
+ "vosk_model_word_error_rate_and_speed": "21.29 (CV Test)",
367
+ "vosk_model_notes": "Lightweight model for Czech from Rhasspy project",
368
+ "vosk_model_licenses": "Apache 2.0"
369
+ }
370
+ ],
371
+ "polish": [
372
+ {
373
+ "vosk_model_id": "vosk-model-small-pl-0.22",
374
+ "vosk_model_language": "pl",
375
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-pl-0.22.zip",
376
+ "vosk_model_size": "50M",
377
+ "vosk_model_word_error_rate_and_speed": "18.36 (CV Test) 16.88 (MLS Test) 11.55 (Voxpopuli Test)",
378
+ "vosk_model_notes": "Lightweight model for Polish",
379
+ "vosk_model_licenses": "Apache 2.0"
380
+ }
381
+ ],
382
+ "uzbek": [
383
+ {
384
+ "vosk_model_id": "vosk-model-small-uz-0.22",
385
+ "vosk_model_language": "uz",
386
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-uz-0.22.zip",
387
+ "vosk_model_size": "82M",
388
+ "vosk_model_word_error_rate_and_speed": "13.54 (CV Test) 12.92 (IS2AI USC test)",
389
+ "vosk_model_notes": "Lightweight model for Uzbek",
390
+ "vosk_model_licenses": "Apache 2.0"
391
+ }
392
+ ],
393
+ "korean": [
394
+ {
395
+ "vosk_model_id": "vosk-model-small-ko-0.22",
396
+ "vosk_model_language": "ko",
397
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-ko-0.22.zip",
398
+ "vosk_model_size": "82M",
399
+ "vosk_model_word_error_rate_and_speed": "28.1 (Zeroth Test)",
400
+ "vosk_model_notes": "Lightweight model for Korean",
401
+ "vosk_model_licenses": "Apache 2.0"
402
+ }
403
+ ],
404
+ "speaker_identification": [
405
+ {
406
+ "vosk_model_id": "vosk-model-spk-0.4",
407
+ "vosk_model_language": "TBD",
408
+ "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-spk-0.4.zip",
409
+ "vosk_model_size": "13M",
410
+ "vosk_model_word_error_rate_and_speed": "TBD",
411
+ "vosk_model_notes": "Model for speaker identification, should work for all languages",
412
+ "vosk_model_licenses": "Apache 2.0"
413
+ }
414
+ ]
415
+ }