e2-tts-hakka-test / tokenizer.json
txya900619's picture
Upload tokenizer.json with huggingface_hub
6ff48ca verified
raw
history blame
16.1 kB
{
"version": "1.0",
"truncation": null,
"padding": {
"strategy": "BatchLongest",
"direction": "Right",
"pad_to_multiple_of": null,
"pad_id": 0,
"pad_type_id": 0,
"pad_token": "<pad>"
},
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<sil>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": null,
"post_processor": null,
"decoder": null,
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<pad>": 0,
"<sil>": 1,
" ": 2,
"1": 3,
"2": 4,
"3": 5,
"4": 6,
"5": 7,
"_": 8,
"a": 9,
"b": 10,
"e": 11,
"f": 12,
"h": 13,
"i": 14,
"k": 15,
"l": 16,
"m": 17,
"n": 18,
"o": 19,
"p": 20,
"s": 21,
"t": 22,
"u": 23,
"v": 24,
"ŋ": 25,
"ɕ": 26,
"ɛ": 27,
"ɨ": 28,
"ʊ": 29,
"ʰ": 30,
"̩": 31,
"͡": 32,
"_5": 33,
"_55": 34,
"_2": 35,
"_24": 36,
"11": 37,
"_11": 38,
"31": 39,
"_31": 40,
"_55 ": 41,
"t͡": 42,
"i_55": 43,
"t͡s": 44,
"_24 ": 45,
"_11 ": 46,
"_31 ": 47,
"oŋ": 48,
"t_2": 49,
"tʰ": 50,
"i_24": 51,
"e_55 ": 52,
"i_11": 53,
"en": 54,
"t͡ɕ": 55,
"i_55 ": 56,
"in": 57,
"i_31": 58,
"o_55": 59,
"ke_55 ": 60,
"uŋ": 61,
"t͡sʰ": 62,
"t͡ɕʰ": 63,
"a_55": 64,
"aŋ": 65,
"u_55": 66,
"k_2": 67,
"sɨ": 68,
"a_24": 69,
"kʰ": 70,
"an": 71,
"e_55": 72,
"it_2": 73,
"i_11 ": 74,
"o_11": 75,
"u_24": 76,
"un": 77,
"on": 78,
"to": 79,
"e_31": 80,
"et_2": 81,
"t_5": 82,
"a_31": 83,
"u_31": 84,
"u_24 ": 85,
"pʰ": 86,
"o_55i_55 ": 87,
"u_55 ": 88,
"a_11": 89,
"am": 90,
"ŋin": 91,
"t͡ɕʰi_55": 92,
"k_5": 93,
"i_2": 94,
"a_24 ": 95,
"u_11": 96,
"̩_11": 97,
"ak_2": 98,
"o_31": 99,
"e_24": 100,
"e_31 ": 101,
"i_11en": 102,
"m̩_11": 103,
"e_11": 104,
"lo_11": 105,
"a_55i_55": 106,
"oŋ_24": 107,
"oŋ_31": 108,
"he_55 ": 109,
"uk_2": 110,
"uŋ_24": 111,
"o_24": 112,
"en_24": 113,
"ke_55": 114,
"i_24 ": 115,
"ki_24": 116,
"ŋa_11": 117,
"i_55en": 118,
"p_5": 119,
"i_5": 120,
"a_55 ": 121,
"ki_11 ": 122,
"et_2 ": 123,
"oŋ_55": 124,
"ok_5": 125,
"ŋin_11": 126,
"aŋ_24": 127,
"i_24u_24 ": 128,
"u_31 ": 129,
"i_31 ": 130,
"o_55i_55": 131,
"it_2 ": 132,
"tʰuŋ": 133,
"oŋ_55 ": 134,
"to_55 ": 135,
"im": 136,
"tʰe_11": 137,
"i_11en_11": 138,
"ŋin_11 ": 139,
"t͡sɨ": 140,
"ok_2": 141,
"mo_11": 142,
"vo_55i_55 ": 143,
"o_31 ": 144,
"ŋa_11i_11 ": 145,
"o_55 ": 146,
"sɨt_5": 147,
"i_55en_55": 148,
"ti_55 ": 149,
"han": 150,
"ten": 151,
"lo_11i_11 ": 152,
"tʰa_55i_55": 153,
"to_31 ": 154,
"t͡ɕʰi_55u_55 ": 155,
"oŋ_11": 156,
"ut_2": 157,
"t͡ɕʰin": 158,
"ak_2 ": 159,
"oŋ_24 ": 160,
"i_31a_31": 161,
"t͡so_55": 162,
"a_31 ": 163,
"kin": 164,
"u_11 ": 165,
"pun": 166,
"se_55": 167,
"an_24": 168,
"et_5": 169,
"on_55": 170,
"te_24": 171,
"o_11 ": 172,
"i_31oŋ_31": 173,
"ŋ̩": 174,
"su_31": 175,
"oŋ_31 ": 176,
"aŋ_11": 177,
"tʰe_11u_11": 178,
"i_11en_11 ": 179,
"i_31a_31 ": 180,
"a_55i_55 ": 181,
"t͡ɕʰi_55u_55": 182,
"p_2": 183,
"aŋ_55": 184,
"lo_11i_11": 185,
"i_2et_2": 186,
"i_24en_24": 187,
"i_55uŋ": 188,
"ho_31": 189,
"koŋ_31": 190,
"sɨn": 191,
"he_55": 192,
"am_24": 193,
"at_2": 194,
"hi_55 ": 195,
"ap_5": 196,
"i_55a_55": 197,
"ten_31 ": 198,
"i_24u_24": 199,
"on_24": 200,
"t͡sʰut_2": 201,
"en_24 ": 202,
"t͡sʰɨ": 203,
"an_31": 204,
"t͡suŋ": 205,
"im_24": 206,
"ki_31": 207,
"ko_55": 208,
"te_24u_24 ": 209,
"aŋ_24 ": 210,
"ho_31 ": 211,
"ka_24": 212,
"ŋi_11": 213,
"tet_2 ": 214,
"an_55": 215,
"ki_11": 216,
"ka_24 ": 217,
"tu_55": 218,
"sɨ_55": 219,
"ŋ̩_11 ": 220,
"oŋ_11 ": 221,
"i_5et_5": 222,
"han_11": 223,
"kʰon_55": 224,
"ɕi_24": 225,
"sɨ_11": 226,
"in_24": 227,
"ap_2": 228,
"ko_55 ": 229,
"ha_24": 230,
"ti_24": 231,
"in_55": 232,
"kʰi_11": 233,
"uk_5": 234,
"i_55oŋ_55": 235,
"li_11": 236,
"mo_11 ": 237,
"ki_2": 238,
"uŋ_11": 239,
"e_31u_31": 240,
"ki_55": 241,
"un_11": 242,
"tʰuŋ_11 ": 243,
"hi_55": 244,
"i_31en": 245,
"t͡sak_2 ": 246,
"toŋ_24": 247,
"saŋ_24": 248,
"ku_24": 249,
"vuk_2": 250,
"e_24u_24": 251,
"aŋ_55 ": 252,
"e_55u_55": 253,
"pun_24 ": 254,
"pa_31": 255
},
"merges": [
[
"_",
"5"
],
[
"_5",
"5"
],
[
"_",
"2"
],
[
"_2",
"4"
],
[
"1",
"1"
],
[
"_",
"11"
],
[
"3",
"1"
],
[
"_",
"31"
],
[
"_55",
" "
],
[
"t",
"͡"
],
[
"i",
"_55"
],
[
"t͡",
"s"
],
[
"_24",
" "
],
[
"_11",
" "
],
[
"_31",
" "
],
[
"o",
"ŋ"
],
[
"t",
"_2"
],
[
"t",
"ʰ"
],
[
"i",
"_24"
],
[
"e",
"_55 "
],
[
"i",
"_11"
],
[
"e",
"n"
],
[
"t͡",
"ɕ"
],
[
"i",
"_55 "
],
[
"i",
"n"
],
[
"i",
"_31"
],
[
"o",
"_55"
],
[
"k",
"e_55 "
],
[
"u",
"ŋ"
],
[
"t͡s",
"ʰ"
],
[
"t͡ɕ",
"ʰ"
],
[
"a",
"_55"
],
[
"a",
"ŋ"
],
[
"u",
"_55"
],
[
"k",
"_2"
],
[
"s",
"ɨ"
],
[
"a",
"_24"
],
[
"k",
"ʰ"
],
[
"a",
"n"
],
[
"e",
"_55"
],
[
"i",
"t_2"
],
[
"i",
"_11 "
],
[
"o",
"_11"
],
[
"u",
"_24"
],
[
"u",
"n"
],
[
"o",
"n"
],
[
"t",
"o"
],
[
"e",
"_31"
],
[
"e",
"t_2"
],
[
"t",
"_5"
],
[
"a",
"_31"
],
[
"u",
"_31"
],
[
"u",
"_24 "
],
[
"p",
"ʰ"
],
[
"o_55",
"i_55 "
],
[
"u",
"_55 "
],
[
"a",
"_11"
],
[
"a",
"m"
],
[
"ŋ",
"in"
],
[
"t͡ɕʰ",
"i_55"
],
[
"k",
"_5"
],
[
"i",
"_2"
],
[
"a",
"_24 "
],
[
"u",
"_11"
],
[
"̩",
"_11"
],
[
"a",
"k_2"
],
[
"o",
"_31"
],
[
"e",
"_24"
],
[
"e",
"_31 "
],
[
"i_11",
"en"
],
[
"m",
"̩_11"
],
[
"e",
"_11"
],
[
"l",
"o_11"
],
[
"a_55",
"i_55"
],
[
"oŋ",
"_24"
],
[
"oŋ",
"_31"
],
[
"h",
"e_55 "
],
[
"u",
"k_2"
],
[
"uŋ",
"_24"
],
[
"o",
"_24"
],
[
"en",
"_24"
],
[
"k",
"e_55"
],
[
"i",
"_24 "
],
[
"k",
"i_24"
],
[
"ŋ",
"a_11"
],
[
"i_55",
"en"
],
[
"p",
"_5"
],
[
"i",
"_5"
],
[
"a",
"_55 "
],
[
"k",
"i_11 "
],
[
"et_2",
" "
],
[
"oŋ",
"_55"
],
[
"o",
"k_5"
],
[
"ŋin",
"_11"
],
[
"aŋ",
"_24"
],
[
"i_24",
"u_24 "
],
[
"u",
"_31 "
],
[
"i",
"_31 "
],
[
"o_55",
"i_55"
],
[
"it_2",
" "
],
[
"tʰ",
"uŋ"
],
[
"oŋ",
"_55 "
],
[
"to",
"_55 "
],
[
"i",
"m"
],
[
"tʰ",
"e_11"
],
[
"i_11en",
"_11"
],
[
"ŋin",
"_11 "
],
[
"t͡s",
"ɨ"
],
[
"o",
"k_2"
],
[
"m",
"o_11"
],
[
"v",
"o_55i_55 "
],
[
"o",
"_31 "
],
[
"ŋa_11",
"i_11 "
],
[
"o",
"_55 "
],
[
"sɨ",
"t_5"
],
[
"i_55en",
"_55"
],
[
"t",
"i_55 "
],
[
"h",
"an"
],
[
"t",
"en"
],
[
"lo_11",
"i_11 "
],
[
"tʰ",
"a_55i_55"
],
[
"to",
"_31 "
],
[
"t͡ɕʰi_55",
"u_55 "
],
[
"oŋ",
"_11"
],
[
"u",
"t_2"
],
[
"t͡ɕʰ",
"in"
],
[
"ak_2",
" "
],
[
"oŋ",
"_24 "
],
[
"i_31",
"a_31"
],
[
"t͡s",
"o_55"
],
[
"a",
"_31 "
],
[
"k",
"in"
],
[
"u",
"_11 "
],
[
"p",
"un"
],
[
"s",
"e_55"
],
[
"an",
"_24"
],
[
"e",
"t_5"
],
[
"on",
"_55"
],
[
"t",
"e_24"
],
[
"o",
"_11 "
],
[
"i_31",
"oŋ_31"
],
[
"ŋ",
"̩"
],
[
"s",
"u_31"
],
[
"oŋ",
"_31 "
],
[
"aŋ",
"_11"
],
[
"tʰe_11",
"u_11"
],
[
"i_11en",
"_11 "
],
[
"i_31",
"a_31 "
],
[
"a_55",
"i_55 "
],
[
"t͡ɕʰi_55",
"u_55"
],
[
"p",
"_2"
],
[
"aŋ",
"_55"
],
[
"lo_11",
"i_11"
],
[
"i_2",
"et_2"
],
[
"i_24",
"en_24"
],
[
"i_55",
"uŋ"
],
[
"h",
"o_31"
],
[
"k",
"oŋ_31"
],
[
"sɨ",
"n"
],
[
"h",
"e_55"
],
[
"am",
"_24"
],
[
"a",
"t_2"
],
[
"h",
"i_55 "
],
[
"a",
"p_5"
],
[
"i_55",
"a_55"
],
[
"ten",
"_31 "
],
[
"i_24",
"u_24"
],
[
"on",
"_24"
],
[
"t͡sʰ",
"ut_2"
],
[
"en",
"_24 "
],
[
"t͡sʰ",
"ɨ"
],
[
"an",
"_31"
],
[
"t͡s",
"uŋ"
],
[
"im",
"_24"
],
[
"k",
"i_31"
],
[
"k",
"o_55"
],
[
"te_24",
"u_24 "
],
[
"aŋ",
"_24 "
],
[
"h",
"o_31 "
],
[
"k",
"a_24"
],
[
"ŋ",
"i_11"
],
[
"t",
"et_2 "
],
[
"an",
"_55"
],
[
"k",
"i_11"
],
[
"k",
"a_24 "
],
[
"t",
"u_55"
],
[
"sɨ",
"_55"
],
[
"ŋ̩",
"_11 "
],
[
"oŋ",
"_11 "
],
[
"i_5",
"et_5"
],
[
"han",
"_11"
],
[
"kʰ",
"on_55"
],
[
"ɕ",
"i_24"
],
[
"sɨ",
"_11"
],
[
"in",
"_24"
],
[
"a",
"p_2"
],
[
"k",
"o_55 "
],
[
"h",
"a_24"
],
[
"t",
"i_24"
],
[
"in",
"_55"
],
[
"kʰ",
"i_11"
],
[
"u",
"k_5"
],
[
"i_55",
"oŋ_55"
],
[
"l",
"i_11"
],
[
"m",
"o_11 "
],
[
"k",
"i_2"
],
[
"uŋ",
"_11"
],
[
"e_31",
"u_31"
],
[
"k",
"i_55"
],
[
"un",
"_11"
],
[
"tʰuŋ",
"_11 "
],
[
"h",
"i_55"
],
[
"i_31",
"en"
],
[
"t͡s",
"ak_2 "
],
[
"t",
"oŋ_24"
],
[
"s",
"aŋ_24"
],
[
"k",
"u_24"
],
[
"v",
"uk_2"
],
[
"e_24",
"u_24"
],
[
"aŋ",
"_55 "
],
[
"e_55",
"u_55"
],
[
"pun",
"_24 "
],
[
"p",
"a_31"
]
]
}
}