vosk-models / available-vosk-models.json
mychen76's picture
initial
03fa999
raw
history blame
20.1 kB
{
"english": [
{
"vosk_model_id": "vosk-model-small-en-us-0.15",
"vosk_model_language": "en",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip",
"vosk_model_size": "1.8G",
"vosk_model_word_error_rate_and_speed": "9.85 (librispeech test-clean) 10.38 (tedlium)",
"vosk_model_notes": "Lightweight wideband model for Android and RPi ",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-en-us-0.22",
"vosk_model_language": "en",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip",
"vosk_model_size": "40M",
"vosk_model_word_error_rate_and_speed": "5.69 (librispeech test-clean) 6.05 (tedlium) 29.78(callcenter)",
"vosk_model_notes": "Accurate generic US English model",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-en-us-0.22-lgraph",
"vosk_model_language": "en",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.22-lgraph.zip",
"vosk_model_size": "128M",
"vosk_model_word_error_rate_and_speed": "7.82 (librispeech) 8.20 (tedlium)",
"vosk_model_notes": "Big US English model with dynamic graph",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-en-us-0.42-gigaspeech",
"vosk_model_language": "en",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.42-gigaspeech.zip",
"vosk_model_size": "2.3G",
"vosk_model_word_error_rate_and_speed": "5.64 (librispeech test-clean) 6.24 (tedlium) 30.17 (callcenter)",
"vosk_model_notes": "Accurate generic US English model trained by Kaldi on Gigaspeech. Mostly for podcasts, not for telephony",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-en-in-0.5",
"vosk_model_language": "en-indian",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-in-0.5.zip",
"vosk_model_size": "1G",
"vosk_model_word_error_rate_and_speed": "36.12 (NPTEL Pure)",
"vosk_model_notes": "Generic Indian English model for telecom and broadcast",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-small-en-in-0.4",
"vosk_model_language": "en-indian",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-en-in-0.4.zip",
"vosk_model_size": "36M",
"vosk_model_word_error_rate_and_speed": "49.05 (NPTEL Pure)",
"vosk_model_notes": "Lightweight Indian English model for mobile applications",
"vosk_model_licenses": "Apache 2.0"
}
],
"chinese": [
{
"vosk_model_id": "vosk-model-small-cn-0.22",
"vosk_model_language": "cn",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip",
"vosk_model_size": "42M",
"vosk_model_word_error_rate_and_speed": "23.54 (SpeechIO-02) 38.29 (SpeechIO-06) 17.15 (THCHS)",
"vosk_model_notes": "Lightweight model for Android and RPi",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-cn-0.22",
"vosk_model_language": "cn",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-cn-0.22.zip",
"vosk_model_size": "1.3G",
"vosk_model_word_error_rate_and_speed": "13.98 (SpeechIO-02) 27.30 (SpeechIO-06) 7.43 (THCHS)",
"vosk_model_notes": "Big generic Chinese model for server processing",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-cn-kaldi-multicn-0.15",
"vosk_model_language": "cn",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-cn-kaldi-multicn-0.15.zip",
"vosk_model_size": "1.5G",
"vosk_model_word_error_rate_and_speed": "17.44 (SpeechIO-02) 9.56 (THCHS)",
"vosk_model_notes": "Original Wideband Kaldi multi-cn model from Kaldi with Vosk LM",
"vosk_model_licenses": "Apache 2.0"
}
],
"french": [
{
"vosk_model_id": "vosk-model-small-fr-0.22",
"vosk_model_language": "fr",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-fr-0.22.zip",
"vosk_model_size": "41M",
"vosk_model_word_error_rate_and_speed": "23.95 (cv test) 19.30 (mtedx) 27.25 (podcast)",
"vosk_model_notes": "Lightweight wideband model for Android/iOS and RPi",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-fr-0.22",
"vosk_model_language": "fr",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-fr-0.22.zip",
"vosk_model_size": "1.4G",
"vosk_model_word_error_rate_and_speed": "14.72 (cv test) 11.64 (mls) 13.10 (mtedx) 21.61 (podcast) 13.22 (voxpopuli)",
"vosk_model_notes": "Big accurate model for servers",
"vosk_model_licenses": "Apache 2.0"
}
],
"spanish": [
{
"vosk_model_id": "vosk-model-small-es-0.42",
"vosk_model_language": "es",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-es-0.42.zip",
"vosk_model_size": "39M",
"vosk_model_word_error_rate_and_speed": "16.02 (cv test) 16.72 (mtedx test) 11.21 (mls)",
"vosk_model_notes": "Lightweight wideband model for Android/iOS and RPi",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-es-0.42",
"vosk_model_language": "es",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-es-0.42.zip",
"vosk_model_size": "1.4G",
"vosk_model_word_error_rate_and_speed": "7.50 (cv test) 10.05 (mtedx test) 5.84 (mls)",
"vosk_model_notes": "Big model for Spanish",
"vosk_model_licenses": "Apache 2.0"
}
],
"german": [
{
"vosk_model_id": "vosk-model-de-0.21",
"vosk_model_language": "de",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-de-0.21.zip",
"vosk_model_size": "1.9G",
"vosk_model_word_error_rate_and_speed": "9.83 (Tuda-de test), 24.00 (podcast) 12.82 (cv-test) 12.42 (mls) 33.26 (mtedx)",
"vosk_model_notes": "Big German model for telephony and server",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-small-de-zamia-0.3",
"vosk_model_language": "de",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-de-zamia-0.3.zip",
"vosk_model_size": "1.4G",
"vosk_model_word_error_rate_and_speed": "14.81 (Tuda-de test, 37.46 (podcast)",
"vosk_model_notes": "Zamia f_250 small model repackaged (not recommended)",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-small-de-0.15",
"vosk_model_language": "de",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-de-zamia-0.3.zip",
"vosk_model_size": "45M",
"vosk_model_word_error_rate_and_speed": "13.75 (Tuda-de test), 30.67 (podcast)",
"vosk_model_notes": "Lightweight wideband model for Android and RPi",
"vosk_model_licenses": "Apache 2.0"
}
],
"portuguese": [
{
"vosk_model_id": "vosk-model-small-pt-0.3",
"vosk_model_language": "pt",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-pt-0.3.zip",
"vosk_model_size": "1.9G",
"vosk_model_word_error_rate_and_speed": "68.92 (coraa dev) 32.60 (cv test)",
"vosk_model_notes": "Lightweight wideband model for Android and RPi",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-pt-fb-v0.1.1-20220516_2113",
"vosk_model_language": "pt",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-pt-fb-v0.1.1-20220516_2113.zip",
"vosk_model_size": "1.6G",
"vosk_model_word_error_rate_and_speed": "54.34 (coraa dev) 27.70 (cv test)",
"vosk_model_notes": "Big model from FalaBrazil",
"vosk_model_licenses": "Apache 2.0"
}
],
"greek": [
{
"vosk_model_id": "vosk-model-el-gr-0.7",
"vosk_model_language": "gr",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-el-gr-0.7.zip",
"vosk_model_size": "1.1G",
"vosk_model_word_error_rate_and_speed": "TBD",
"vosk_model_notes": "Big narrowband Greek model for server processing, not extremely accurate though",
"vosk_model_licenses": "Apache 2.0"
}
],
"vietnamese": [
{
"vosk_model_id": "vosk-model-small-vn-0.4",
"vosk_model_language": "vn",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-vn-0.4.zip",
"vosk_model_size": "32M",
"vosk_model_word_error_rate_and_speed": "15.70 (Vivos test)",
"vosk_model_notes": "Lightweight Vietnamese model",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-vn-0.4",
"vosk_model_language": "vn",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-vn-0.4.zip",
"vosk_model_size": "78M",
"vosk_model_word_error_rate_and_speed": "15.70 (Vivos test)",
"vosk_model_notes": "Bigger Vietnamese model for server",
"vosk_model_licenses": "Apache 2.0"
}
],
"italian": [
{
"vosk_model_id": "vosk-model-small-it-0.22",
"vosk_model_language": "it",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-it-0.22.zip",
"vosk_model_size": "48M",
"vosk_model_word_error_rate_and_speed": "16.88 (cv test) 25.87 (mls) 17.01 (mtedx)",
"vosk_model_notes": "Lightweight model for Android and RPi",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-it-0.22",
"vosk_model_language": "it",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-vn-0.4.zip",
"vosk_model_size": "1.2G",
"vosk_model_word_error_rate_and_speed": "8.10 (cv test) 15.68 (mls) 11.23 (mtedx)",
"vosk_model_notes": "Big generic Italian model for servers",
"vosk_model_licenses": "Apache 2.0"
}
],
"dutch": [
{
"vosk_model_id": "vosk-model-small-nl-0.22",
"vosk_model_language": "nl",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-nl-0.22.zip",
"vosk_model_size": "39M",
"vosk_model_word_error_rate_and_speed": "22.45 (cv test) 26.80 (tv) 25.84 (mls) 24.09 (voxpopuli)",
"vosk_model_notes": "Lightweight model for Dutch",
"vosk_model_licenses": "Apache 2.0"
}
],
"arabic": [
{
"vosk_model_id": "vosk-model-ar-mgb2-0.4",
"vosk_model_language": "ar",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-ar-mgb2-0.4.zip",
"vosk_model_size": "318M",
"vosk_model_word_error_rate_and_speed": "16.40 (MGB-2 dev set)",
"vosk_model_notes": "Repackaged Arabic model trained on MGB2 dataset from Kaldi",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-ar-0.22-linto-1.1.0",
"vosk_model_language": "ar",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-ar-0.22-linto-1.1.0.zip",
"vosk_model_size": "1.3G",
"vosk_model_word_error_rate_and_speed": "16.40 (MGB-2 dev set)",
"vosk_model_notes": "52.87 (cv test) 28.50 (MBG-2 dev set) 1.0xRT",
"vosk_model_licenses": "Apache 2.0"
}
],
"farsi": [
{
"vosk_model_id": "vosk-model-small-fa-0.4",
"vosk_model_language": "ph",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-fa-0.4.zip",
"vosk_model_size": "47M",
"vosk_model_word_error_rate_and_speed": "TBD",
"vosk_model_notes": "Lightweight wideband model for Android and RPi for Farsi (Persian)",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-fa-0.5",
"vosk_model_language": "ph",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-fa-0.5.zip",
"vosk_model_size": "1G",
"vosk_model_word_error_rate_and_speed": "TBD",
"vosk_model_notes": "Model with large vocabulary, not yet accurate but better than before (Persian)",
"vosk_model_licenses": "Apache 2.0"
}
],
"filipino": [
{
"vosk_model_id": "vosk-model-tl-ph-generic-0.6",
"vosk_model_language": "ph",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-tl-ph-generic-0.6.zip",
"vosk_model_size": "320M",
"vosk_model_word_error_rate_and_speed": "TBD",
"vosk_model_notes": "Medium wideband model for Filipino (Tagalog) by feddybear",
"vosk_model_licenses": "CC-BY-NC-SA 4.0"
}
],
"ukrainian": [
{
"vosk_model_id": "vosk-model-small-uk-v3-small",
"vosk_model_language": "uk",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-uk-v3-small.zip",
"vosk_model_size": "133M",
"vosk_model_word_error_rate_and_speed": "TBD",
"vosk_model_notes": "Small model from Speech Recognition for Ukrainian",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-uk-v3",
"vosk_model_language": "uk",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-uk-v3.zip",
"vosk_model_size": "343M",
"vosk_model_word_error_rate_and_speed": "TBD",
"vosk_model_notes": "Bigger model from Speech Recognition for Ukrainian",
"vosk_model_licenses": "Apache 2.0"
}
],
"swedish": [
{
"vosk_model_id": "vosk-model-small-sv-rhasspy-0.15",
"vosk_model_language": "sv",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-sv-rhasspy-0.15.zip",
"vosk_model_size": "289M",
"vosk_model_word_error_rate_and_speed": "TBD",
"vosk_model_notes": "Repackaged model from Rhasspy project",
"vosk_model_licenses": "Apache 2.0"
}
],
"japanese": [
{
"vosk_model_id": "vosk-model-small-ja-0.22",
"vosk_model_language": "ja",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-ja-0.22.zip",
"vosk_model_size": "48M",
"vosk_model_word_error_rate_and_speed": "9.52(csj CER) 17.07(ted10k CER)",
"vosk_model_notes": "Lightweight wideband model for Japanese",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-ja-0.22",
"vosk_model_language": "ja",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-ja-0.22.zip",
"vosk_model_size": "1Gb",
"vosk_model_word_error_rate_and_speed": "8.40(csj CER) 13.91(ted10k CER)",
"vosk_model_notes": "Big model for Japanese",
"vosk_model_licenses": "Apache 2.0"
}
],
"hindi": [
{
"vosk_model_id": "vosk-model-small-hi-0.22",
"vosk_model_language": "hi",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-hi-0.22.zip",
"vosk_model_size": "42M",
"vosk_model_word_error_rate_and_speed": "20.89 (IITM Challenge) 24.72 (MUCS Challenge)",
"vosk_model_notes": "Lightweight model for Hindi",
"vosk_model_licenses": "Apache 2.0"
},
{
"vosk_model_id": "vosk-model-hi-0.22",
"vosk_model_language": "hi",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-hi-0.22.zip",
"vosk_model_size": "1.5Gb",
"vosk_model_word_error_rate_and_speed": "14.85 (CV Test) 14.83 (IITM Challenge) 13.11 (MUCS Challenge)",
"vosk_model_notes": "Big accurate model for servers",
"vosk_model_licenses": "Apache 2.0"
}
],
"czech": [
{
"vosk_model_id": "vosk-model-small-cs-0.4-rhasspy",
"vosk_model_language": "cs",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-cs-0.4-rhasspy.zip",
"vosk_model_size": "44M",
"vosk_model_word_error_rate_and_speed": "21.29 (CV Test)",
"vosk_model_notes": "Lightweight model for Czech from Rhasspy project",
"vosk_model_licenses": "Apache 2.0"
}
],
"polish": [
{
"vosk_model_id": "vosk-model-small-pl-0.22",
"vosk_model_language": "pl",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-pl-0.22.zip",
"vosk_model_size": "50M",
"vosk_model_word_error_rate_and_speed": "18.36 (CV Test) 16.88 (MLS Test) 11.55 (Voxpopuli Test)",
"vosk_model_notes": "Lightweight model for Polish",
"vosk_model_licenses": "Apache 2.0"
}
],
"uzbek": [
{
"vosk_model_id": "vosk-model-small-uz-0.22",
"vosk_model_language": "uz",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-uz-0.22.zip",
"vosk_model_size": "82M",
"vosk_model_word_error_rate_and_speed": "13.54 (CV Test) 12.92 (IS2AI USC test)",
"vosk_model_notes": "Lightweight model for Uzbek",
"vosk_model_licenses": "Apache 2.0"
}
],
"korean": [
{
"vosk_model_id": "vosk-model-small-ko-0.22",
"vosk_model_language": "ko",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-ko-0.22.zip",
"vosk_model_size": "82M",
"vosk_model_word_error_rate_and_speed": "28.1 (Zeroth Test)",
"vosk_model_notes": "Lightweight model for Korean",
"vosk_model_licenses": "Apache 2.0"
}
],
"speaker_identification": [
{
"vosk_model_id": "vosk-model-spk-0.4",
"vosk_model_language": "TBD",
"vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-spk-0.4.zip",
"vosk_model_size": "13M",
"vosk_model_word_error_rate_and_speed": "TBD",
"vosk_model_notes": "Model for speaker identification, should work for all languages",
"vosk_model_licenses": "Apache 2.0"
}
]
}