|
{ |
|
"english": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-en-us-0.15", |
|
"vosk_model_language": "en", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip", |
|
"vosk_model_size": "1.8G", |
|
"vosk_model_word_error_rate_and_speed": "9.85 (librispeech test-clean) 10.38 (tedlium)", |
|
"vosk_model_notes": "Lightweight wideband model for Android and RPi ", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-en-us-0.22", |
|
"vosk_model_language": "en", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip", |
|
"vosk_model_size": "40M", |
|
"vosk_model_word_error_rate_and_speed": "5.69 (librispeech test-clean) 6.05 (tedlium) 29.78(callcenter)", |
|
"vosk_model_notes": "Accurate generic US English model", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-en-us-0.22-lgraph", |
|
"vosk_model_language": "en", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.22-lgraph.zip", |
|
"vosk_model_size": "128M", |
|
"vosk_model_word_error_rate_and_speed": "7.82 (librispeech) 8.20 (tedlium)", |
|
"vosk_model_notes": "Big US English model with dynamic graph", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-en-us-0.42-gigaspeech", |
|
"vosk_model_language": "en", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.42-gigaspeech.zip", |
|
"vosk_model_size": "2.3G", |
|
"vosk_model_word_error_rate_and_speed": "5.64 (librispeech test-clean) 6.24 (tedlium) 30.17 (callcenter)", |
|
"vosk_model_notes": "Accurate generic US English model trained by Kaldi on Gigaspeech. Mostly for podcasts, not for telephony", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-en-in-0.5", |
|
"vosk_model_language": "en-indian", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-in-0.5.zip", |
|
"vosk_model_size": "1G", |
|
"vosk_model_word_error_rate_and_speed": "36.12 (NPTEL Pure)", |
|
"vosk_model_notes": "Generic Indian English model for telecom and broadcast", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-small-en-in-0.4", |
|
"vosk_model_language": "en-indian", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-en-in-0.4.zip", |
|
"vosk_model_size": "36M", |
|
"vosk_model_word_error_rate_and_speed": "49.05 (NPTEL Pure)", |
|
"vosk_model_notes": "Lightweight Indian English model for mobile applications", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"chinese": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-cn-0.22", |
|
"vosk_model_language": "cn", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip", |
|
"vosk_model_size": "42M", |
|
"vosk_model_word_error_rate_and_speed": "23.54 (SpeechIO-02) 38.29 (SpeechIO-06) 17.15 (THCHS)", |
|
"vosk_model_notes": "Lightweight model for Android and RPi", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-cn-0.22", |
|
"vosk_model_language": "cn", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-cn-0.22.zip", |
|
"vosk_model_size": "1.3G", |
|
"vosk_model_word_error_rate_and_speed": "13.98 (SpeechIO-02) 27.30 (SpeechIO-06) 7.43 (THCHS)", |
|
"vosk_model_notes": "Big generic Chinese model for server processing", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-cn-kaldi-multicn-0.15", |
|
"vosk_model_language": "cn", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-cn-kaldi-multicn-0.15.zip", |
|
"vosk_model_size": "1.5G", |
|
"vosk_model_word_error_rate_and_speed": "17.44 (SpeechIO-02) 9.56 (THCHS)", |
|
"vosk_model_notes": "Original Wideband Kaldi multi-cn model from Kaldi with Vosk LM", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"french": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-fr-0.22", |
|
"vosk_model_language": "fr", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-fr-0.22.zip", |
|
"vosk_model_size": "41M", |
|
"vosk_model_word_error_rate_and_speed": "23.95 (cv test) 19.30 (mtedx) 27.25 (podcast)", |
|
"vosk_model_notes": "Lightweight wideband model for Android/iOS and RPi", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-fr-0.22", |
|
"vosk_model_language": "fr", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-fr-0.22.zip", |
|
"vosk_model_size": "1.4G", |
|
"vosk_model_word_error_rate_and_speed": "14.72 (cv test) 11.64 (mls) 13.10 (mtedx) 21.61 (podcast) 13.22 (voxpopuli)", |
|
"vosk_model_notes": "Big accurate model for servers", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"spanish": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-es-0.42", |
|
"vosk_model_language": "es", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-es-0.42.zip", |
|
"vosk_model_size": "39M", |
|
"vosk_model_word_error_rate_and_speed": "16.02 (cv test) 16.72 (mtedx test) 11.21 (mls)", |
|
"vosk_model_notes": "Lightweight wideband model for Android/iOS and RPi", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-es-0.42", |
|
"vosk_model_language": "es", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-es-0.42.zip", |
|
"vosk_model_size": "1.4G", |
|
"vosk_model_word_error_rate_and_speed": "7.50 (cv test) 10.05 (mtedx test) 5.84 (mls)", |
|
"vosk_model_notes": "Big model for Spanish", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"german": [ |
|
{ |
|
"vosk_model_id": "vosk-model-de-0.21", |
|
"vosk_model_language": "de", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-de-0.21.zip", |
|
"vosk_model_size": "1.9G", |
|
"vosk_model_word_error_rate_and_speed": "9.83 (Tuda-de test), 24.00 (podcast) 12.82 (cv-test) 12.42 (mls) 33.26 (mtedx)", |
|
"vosk_model_notes": "Big German model for telephony and server", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-small-de-zamia-0.3", |
|
"vosk_model_language": "de", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-de-zamia-0.3.zip", |
|
"vosk_model_size": "1.4G", |
|
"vosk_model_word_error_rate_and_speed": "14.81 (Tuda-de test, 37.46 (podcast)", |
|
"vosk_model_notes": "Zamia f_250 small model repackaged (not recommended)", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-small-de-0.15", |
|
"vosk_model_language": "de", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-de-zamia-0.3.zip", |
|
"vosk_model_size": "45M", |
|
"vosk_model_word_error_rate_and_speed": "13.75 (Tuda-de test), 30.67 (podcast)", |
|
"vosk_model_notes": "Lightweight wideband model for Android and RPi", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"portuguese": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-pt-0.3", |
|
"vosk_model_language": "pt", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-pt-0.3.zip", |
|
"vosk_model_size": "1.9G", |
|
"vosk_model_word_error_rate_and_speed": "68.92 (coraa dev) 32.60 (cv test)", |
|
"vosk_model_notes": "Lightweight wideband model for Android and RPi", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-pt-fb-v0.1.1-20220516_2113", |
|
"vosk_model_language": "pt", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-pt-fb-v0.1.1-20220516_2113.zip", |
|
"vosk_model_size": "1.6G", |
|
"vosk_model_word_error_rate_and_speed": "54.34 (coraa dev) 27.70 (cv test)", |
|
"vosk_model_notes": "Big model from FalaBrazil", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"greek": [ |
|
{ |
|
"vosk_model_id": "vosk-model-el-gr-0.7", |
|
"vosk_model_language": "gr", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-el-gr-0.7.zip", |
|
"vosk_model_size": "1.1G", |
|
"vosk_model_word_error_rate_and_speed": "TBD", |
|
"vosk_model_notes": "Big narrowband Greek model for server processing, not extremely accurate though", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"vietnamese": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-vn-0.4", |
|
"vosk_model_language": "vn", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-vn-0.4.zip", |
|
"vosk_model_size": "32M", |
|
"vosk_model_word_error_rate_and_speed": "15.70 (Vivos test)", |
|
"vosk_model_notes": "Lightweight Vietnamese model", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-vn-0.4", |
|
"vosk_model_language": "vn", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-vn-0.4.zip", |
|
"vosk_model_size": "78M", |
|
"vosk_model_word_error_rate_and_speed": "15.70 (Vivos test)", |
|
"vosk_model_notes": "Bigger Vietnamese model for server", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"italian": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-it-0.22", |
|
"vosk_model_language": "it", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-it-0.22.zip", |
|
"vosk_model_size": "48M", |
|
"vosk_model_word_error_rate_and_speed": "16.88 (cv test) 25.87 (mls) 17.01 (mtedx)", |
|
"vosk_model_notes": "Lightweight model for Android and RPi", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-it-0.22", |
|
"vosk_model_language": "it", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-vn-0.4.zip", |
|
"vosk_model_size": "1.2G", |
|
"vosk_model_word_error_rate_and_speed": "8.10 (cv test) 15.68 (mls) 11.23 (mtedx)", |
|
"vosk_model_notes": "Big generic Italian model for servers", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"dutch": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-nl-0.22", |
|
"vosk_model_language": "nl", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-nl-0.22.zip", |
|
"vosk_model_size": "39M", |
|
"vosk_model_word_error_rate_and_speed": "22.45 (cv test) 26.80 (tv) 25.84 (mls) 24.09 (voxpopuli)", |
|
"vosk_model_notes": "Lightweight model for Dutch", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"arabic": [ |
|
{ |
|
"vosk_model_id": "vosk-model-ar-mgb2-0.4", |
|
"vosk_model_language": "ar", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-ar-mgb2-0.4.zip", |
|
"vosk_model_size": "318M", |
|
"vosk_model_word_error_rate_and_speed": "16.40 (MGB-2 dev set)", |
|
"vosk_model_notes": "Repackaged Arabic model trained on MGB2 dataset from Kaldi", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-ar-0.22-linto-1.1.0", |
|
"vosk_model_language": "ar", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-ar-0.22-linto-1.1.0.zip", |
|
"vosk_model_size": "1.3G", |
|
"vosk_model_word_error_rate_and_speed": "16.40 (MGB-2 dev set)", |
|
"vosk_model_notes": "52.87 (cv test) 28.50 (MBG-2 dev set) 1.0xRT", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"farsi": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-fa-0.4", |
|
"vosk_model_language": "ph", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-fa-0.4.zip", |
|
"vosk_model_size": "47M", |
|
"vosk_model_word_error_rate_and_speed": "TBD", |
|
"vosk_model_notes": "Lightweight wideband model for Android and RPi for Farsi (Persian)", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-fa-0.5", |
|
"vosk_model_language": "ph", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-fa-0.5.zip", |
|
"vosk_model_size": "1G", |
|
"vosk_model_word_error_rate_and_speed": "TBD", |
|
"vosk_model_notes": "Model with large vocabulary, not yet accurate but better than before (Persian)", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"filipino": [ |
|
{ |
|
"vosk_model_id": "vosk-model-tl-ph-generic-0.6", |
|
"vosk_model_language": "ph", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-tl-ph-generic-0.6.zip", |
|
"vosk_model_size": "320M", |
|
"vosk_model_word_error_rate_and_speed": "TBD", |
|
"vosk_model_notes": "Medium wideband model for Filipino (Tagalog) by feddybear", |
|
"vosk_model_licenses": "CC-BY-NC-SA 4.0" |
|
} |
|
], |
|
"ukrainian": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-uk-v3-small", |
|
"vosk_model_language": "uk", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-uk-v3-small.zip", |
|
"vosk_model_size": "133M", |
|
"vosk_model_word_error_rate_and_speed": "TBD", |
|
"vosk_model_notes": "Small model from Speech Recognition for Ukrainian", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-uk-v3", |
|
"vosk_model_language": "uk", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-uk-v3.zip", |
|
"vosk_model_size": "343M", |
|
"vosk_model_word_error_rate_and_speed": "TBD", |
|
"vosk_model_notes": "Bigger model from Speech Recognition for Ukrainian", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"swedish": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-sv-rhasspy-0.15", |
|
"vosk_model_language": "sv", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-sv-rhasspy-0.15.zip", |
|
"vosk_model_size": "289M", |
|
"vosk_model_word_error_rate_and_speed": "TBD", |
|
"vosk_model_notes": "Repackaged model from Rhasspy project", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"japanese": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-ja-0.22", |
|
"vosk_model_language": "ja", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-ja-0.22.zip", |
|
"vosk_model_size": "48M", |
|
"vosk_model_word_error_rate_and_speed": "9.52(csj CER) 17.07(ted10k CER)", |
|
"vosk_model_notes": "Lightweight wideband model for Japanese", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-ja-0.22", |
|
"vosk_model_language": "ja", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-ja-0.22.zip", |
|
"vosk_model_size": "1Gb", |
|
"vosk_model_word_error_rate_and_speed": "8.40(csj CER) 13.91(ted10k CER)", |
|
"vosk_model_notes": "Big model for Japanese", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"hindi": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-hi-0.22", |
|
"vosk_model_language": "hi", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-hi-0.22.zip", |
|
"vosk_model_size": "42M", |
|
"vosk_model_word_error_rate_and_speed": "20.89 (IITM Challenge) 24.72 (MUCS Challenge)", |
|
"vosk_model_notes": "Lightweight model for Hindi", |
|
"vosk_model_licenses": "Apache 2.0" |
|
}, |
|
{ |
|
"vosk_model_id": "vosk-model-hi-0.22", |
|
"vosk_model_language": "hi", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-hi-0.22.zip", |
|
"vosk_model_size": "1.5Gb", |
|
"vosk_model_word_error_rate_and_speed": "14.85 (CV Test) 14.83 (IITM Challenge) 13.11 (MUCS Challenge)", |
|
"vosk_model_notes": "Big accurate model for servers", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"czech": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-cs-0.4-rhasspy", |
|
"vosk_model_language": "cs", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-cs-0.4-rhasspy.zip", |
|
"vosk_model_size": "44M", |
|
"vosk_model_word_error_rate_and_speed": "21.29 (CV Test)", |
|
"vosk_model_notes": "Lightweight model for Czech from Rhasspy project", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"polish": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-pl-0.22", |
|
"vosk_model_language": "pl", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-pl-0.22.zip", |
|
"vosk_model_size": "50M", |
|
"vosk_model_word_error_rate_and_speed": "18.36 (CV Test) 16.88 (MLS Test) 11.55 (Voxpopuli Test)", |
|
"vosk_model_notes": "Lightweight model for Polish", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"uzbek": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-uz-0.22", |
|
"vosk_model_language": "uz", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-uz-0.22.zip", |
|
"vosk_model_size": "82M", |
|
"vosk_model_word_error_rate_and_speed": "13.54 (CV Test) 12.92 (IS2AI USC test)", |
|
"vosk_model_notes": "Lightweight model for Uzbek", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"korean": [ |
|
{ |
|
"vosk_model_id": "vosk-model-small-ko-0.22", |
|
"vosk_model_language": "ko", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-ko-0.22.zip", |
|
"vosk_model_size": "82M", |
|
"vosk_model_word_error_rate_and_speed": "28.1 (Zeroth Test)", |
|
"vosk_model_notes": "Lightweight model for Korean", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
], |
|
"speaker_identification": [ |
|
{ |
|
"vosk_model_id": "vosk-model-spk-0.4", |
|
"vosk_model_language": "TBD", |
|
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-spk-0.4.zip", |
|
"vosk_model_size": "13M", |
|
"vosk_model_word_error_rate_and_speed": "TBD", |
|
"vosk_model_notes": "Model for speaker identification, should work for all languages", |
|
"vosk_model_licenses": "Apache 2.0" |
|
} |
|
] |
|
} |