e2-tts-hakka-test / tokenizer.json
txya900619's picture
Upload tokenizer.json with huggingface_hub
8b2fe6b verified
{
"version": "1.0",
"truncation": null,
"padding": {
"strategy": "BatchLongest",
"direction": "Right",
"pad_to_multiple_of": null,
"pad_id": 0,
"pad_type_id": 0,
"pad_token": "<pad>"
},
"added_tokens": [
{
"id": 0,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<sil>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": null,
"post_processor": null,
"decoder": null,
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"<pad>": 0,
"<sil>": 1,
" ": 2,
"1": 3,
"2": 4,
"3": 5,
"4": 6,
"5": 7,
"_": 8,
"a": 9,
"b": 10,
"d": 11,
"e": 12,
"f": 13,
"h": 14,
"i": 15,
"j": 16,
"k": 17,
"l": 18,
"m": 19,
"n": 20,
"o": 21,
"p": 22,
"s": 23,
"t": 24,
"u": 25,
"v": 26,
"w": 27,
"z": 28,
"æ": 29,
"ð": 30,
"ŋ": 31,
"ɑ": 32,
"ɔ": 33,
"ɕ": 34,
"ə": 35,
"ɚ": 36,
"ɛ": 37,
"ɡ": 38,
"ɨ": 39,
"ɪ": 40,
"ɹ": 41,
"ʃ": 42,
"ʊ": 43,
"ʌ": 44,
"ʒ": 45,
"ʰ": 46,
"̩": 47,
"͡": 48,
"θ": 49,
"_5": 50,
"_55": 51,
"_2": 52,
"_24": 53,
"11": 54,
"_11": 55,
"31": 56,
"_31": 57,
"_55 ": 58,
"t͡": 59,
"t͡s": 60,
"_24 ": 61,
"i_55": 62,
"_11 ": 63,
"_31 ": 64,
"tʰ": 65,
"t_2": 66,
"i_24": 67,
"i_11": 68,
"oŋ": 69,
"en": 70,
"i_55 ": 71,
"e_55 ": 72,
"uŋ": 73,
"in": 74,
"i_31": 75,
"t͡sʰ": 76,
"sɨ": 77,
"t͡ɕ": 78,
"o_55": 79,
"kʰ": 80,
"ke_55 ": 81,
"an": 82,
"u_55": 83,
"k_2": 84,
"aŋ": 85,
"t͡ɕʰ": 86,
"a_55": 87,
"t_2 ": 88,
"a_24": 89,
"un": 90,
"_5 ": 91,
"o_11": 92,
"u_24": 93,
"u_55 ": 94,
"am": 95,
"u_24 ": 96,
"e_55": 97,
"pʰ": 98,
"on": 99,
"i_11 ": 100,
"u_31": 101,
"to": 102,
"o_55i_55 ": 103,
"i_11en": 104,
"a_31": 105,
"it_2": 106,
"i_2": 107,
"e_31": 108,
"i_5": 109,
"uŋ_24": 110,
"k_2 ": 111,
"ŋin": 112,
"t͡sɨ": 113,
"i_55en": 114,
"ho": 115,
"a_24 ": 116,
"et_2 ": 117,
"i_24 ": 118,
"t_5": 119,
"ko": 120,
"a_11": 121,
"e_24": 122,
"u_11": 123,
"et_2": 124,
"tʰuŋ": 125,
"ap": 126,
"oŋ_24": 127,
"lo_11": 128,
"k_5": 129,
"a_55i_55": 130,
"he_55 ": 131,
"i_24u_24 ": 132,
"oŋ_31": 133,
"u_31 ": 134,
"t͡ɕʰi_55": 135,
"i_24en": 136,
"t_5 ": 137,
"it_2 ": 138,
"i_11en_11 ": 139,
"im": 140,
"i_31 ": 141,
"a_55 ": 142,
"e_31 ": 143,
"̩_11": 144,
"e_11": 145,
"in_24": 146,
"m̩_11": 147,
"sɨp": 148,
"oŋ_55 ": 149,
"i_55en_55": 150,
"i_11en_11": 151,
"vo_55i_55 ": 152,
"k_5 ": 153,
"an_11": 154,
"to_55 ": 155,
"o_24": 156,
"ti_55 ": 157,
"uk_2": 158,
"ki_24": 159,
"u_11 ": 160,
"ke_55": 161,
"oŋ_55": 162,
"oŋ_31 ": 163,
"oŋ_24 ": 164,
"sɨ_55": 165,
"o_55i_55": 166,
"ak_2": 167,
"o_31": 168,
"ŋin_11": 169,
"lo_11i_11 ": 170,
"ŋin_11 ": 171,
"tʰa_55i_55": 172,
"tʰo_11": 173,
"aŋ_24": 174,
"ŋi_5": 175,
"ak_2 ": 176,
"i_31a_31": 177,
"uŋ_24 ": 178,
"i_55uŋ": 179,
"sɨn": 180,
"to_31 ": 181,
"t͡so_55": 182,
"t͡sʰɨ": 183,
"ŋ̩": 184,
"in_11": 185,
"in_55": 186,
"i_55en_55 ": 187,
"ŋi_55": 188,
"oŋ_11 ": 189,
"sɨp_5": 190,
"oŋ_11": 191,
"ki_31": 192,
"i_24en_24": 193,
"a_31 ": 194,
"aŋ_11": 195,
"am_24": 196,
"tʰe_11": 197,
"ten": 198,
"t͡sʰu": 199,
"han_11": 200,
"pun": 201,
"at_2": 202,
"a_55i_55 ": 203,
"ki_11 ": 204,
"u_55i_55 ": 205,
"ku": 206,
"i_31oŋ_31": 207,
"e_24u_24": 208,
"e_31u_31": 209,
"an_24": 210,
"ka_24": 211,
"ka_24 ": 212,
"kʰi_11": 213,
"an_24 ": 214,
"tʰo_11i_11": 215,
"mo_11": 216,
"ki_55": 217,
"ku_2": 218,
"ko_55 ": 219,
"ŋa_11": 220,
"e_55u_55": 221,
"t͡sɨn": 222,
"t͡so": 223,
"kin": 224,
"im_24": 225,
"ŋi_11en_11 ": 226,
"e_24u_24 ": 227,
"tet_2 ": 228,
"i_24en_24 ": 229,
"ip": 230,
"on_24": 231,
"un_11": 232,
"kuŋ_24": 233,
"su_31": 234,
"ap_2": 235,
"sɨ_55 ": 236,
"aŋ_11 ": 237,
"on_55": 238,
"tʰi_55": 239,
"et_5 ": 240,
"tʰuŋ_11 ": 241,
"an_55": 242,
"o_11 ": 243,
"uk_2 ": 244,
"t͡suŋ": 245,
"ho_31": 246,
"i_2et_2": 247,
"i_31en": 248,
"ku_24": 249,
"i_24u_24": 250,
"aŋ_55": 251,
"ŋi_11": 252,
"an_55 ": 253,
"an_31": 254,
"hi_55 ": 255
},
"merges": [
[
"_",
"5"
],
[
"_5",
"5"
],
[
"_",
"2"
],
[
"_2",
"4"
],
[
"1",
"1"
],
[
"_",
"11"
],
[
"3",
"1"
],
[
"_",
"31"
],
[
"_55",
" "
],
[
"t",
"͡"
],
[
"t͡",
"s"
],
[
"_24",
" "
],
[
"i",
"_55"
],
[
"_11",
" "
],
[
"_31",
" "
],
[
"t",
"ʰ"
],
[
"t",
"_2"
],
[
"i",
"_24"
],
[
"i",
"_11"
],
[
"o",
"ŋ"
],
[
"e",
"n"
],
[
"i",
"_55 "
],
[
"e",
"_55 "
],
[
"u",
"ŋ"
],
[
"i",
"n"
],
[
"i",
"_31"
],
[
"t͡s",
"ʰ"
],
[
"s",
"ɨ"
],
[
"t͡",
"ɕ"
],
[
"o",
"_55"
],
[
"k",
"ʰ"
],
[
"k",
"e_55 "
],
[
"a",
"n"
],
[
"u",
"_55"
],
[
"k",
"_2"
],
[
"a",
"ŋ"
],
[
"t͡ɕ",
"ʰ"
],
[
"a",
"_55"
],
[
"t_2",
" "
],
[
"a",
"_24"
],
[
"u",
"n"
],
[
"_5",
" "
],
[
"o",
"_11"
],
[
"u",
"_24"
],
[
"u",
"_55 "
],
[
"a",
"m"
],
[
"u",
"_24 "
],
[
"e",
"_55"
],
[
"p",
"ʰ"
],
[
"o",
"n"
],
[
"i",
"_11 "
],
[
"u",
"_31"
],
[
"t",
"o"
],
[
"o_55",
"i_55 "
],
[
"i_11",
"en"
],
[
"a",
"_31"
],
[
"i",
"t_2"
],
[
"i",
"_2"
],
[
"e",
"_31"
],
[
"i",
"_5"
],
[
"uŋ",
"_24"
],
[
"k_2",
" "
],
[
"ŋ",
"in"
],
[
"t͡s",
"ɨ"
],
[
"i_55",
"en"
],
[
"h",
"o"
],
[
"a",
"_24 "
],
[
"e",
"t_2 "
],
[
"i",
"_24 "
],
[
"t",
"_5"
],
[
"k",
"o"
],
[
"a",
"_11"
],
[
"e",
"_24"
],
[
"u",
"_11"
],
[
"e",
"t_2"
],
[
"tʰ",
"uŋ"
],
[
"a",
"p"
],
[
"oŋ",
"_24"
],
[
"l",
"o_11"
],
[
"k",
"_5"
],
[
"a_55",
"i_55"
],
[
"h",
"e_55 "
],
[
"i_24",
"u_24 "
],
[
"oŋ",
"_31"
],
[
"u",
"_31 "
],
[
"t͡ɕʰ",
"i_55"
],
[
"i_24",
"en"
],
[
"t",
"_5 "
],
[
"i",
"t_2 "
],
[
"i_11en",
"_11 "
],
[
"i",
"m"
],
[
"i",
"_31 "
],
[
"a",
"_55 "
],
[
"e",
"_31 "
],
[
"̩",
"_11"
],
[
"e",
"_11"
],
[
"in",
"_24"
],
[
"m",
"̩_11"
],
[
"sɨ",
"p"
],
[
"oŋ",
"_55 "
],
[
"i_55en",
"_55"
],
[
"i_11en",
"_11"
],
[
"v",
"o_55i_55 "
],
[
"k",
"_5 "
],
[
"an",
"_11"
],
[
"to",
"_55 "
],
[
"o",
"_24"
],
[
"t",
"i_55 "
],
[
"u",
"k_2"
],
[
"k",
"i_24"
],
[
"u",
"_11 "
],
[
"k",
"e_55"
],
[
"oŋ",
"_55"
],
[
"oŋ",
"_31 "
],
[
"oŋ",
"_24 "
],
[
"sɨ",
"_55"
],
[
"o_55",
"i_55"
],
[
"a",
"k_2"
],
[
"o",
"_31"
],
[
"ŋin",
"_11"
],
[
"lo_11",
"i_11 "
],
[
"ŋin",
"_11 "
],
[
"tʰ",
"a_55i_55"
],
[
"tʰ",
"o_11"
],
[
"aŋ",
"_24"
],
[
"ŋ",
"i_5"
],
[
"a",
"k_2 "
],
[
"i_31",
"a_31"
],
[
"uŋ",
"_24 "
],
[
"i_55",
"uŋ"
],
[
"sɨ",
"n"
],
[
"to",
"_31 "
],
[
"t͡s",
"o_55"
],
[
"t͡sʰ",
"ɨ"
],
[
"ŋ",
"̩"
],
[
"in",
"_11"
],
[
"in",
"_55"
],
[
"i_55en",
"_55 "
],
[
"ŋ",
"i_55"
],
[
"oŋ",
"_11 "
],
[
"sɨp",
"_5"
],
[
"oŋ",
"_11"
],
[
"k",
"i_31"
],
[
"i_24en",
"_24"
],
[
"a",
"_31 "
],
[
"aŋ",
"_11"
],
[
"am",
"_24"
],
[
"tʰ",
"e_11"
],
[
"t",
"en"
],
[
"t͡sʰ",
"u"
],
[
"h",
"an_11"
],
[
"p",
"un"
],
[
"a",
"t_2"
],
[
"a_55",
"i_55 "
],
[
"k",
"i_11 "
],
[
"u_55",
"i_55 "
],
[
"k",
"u"
],
[
"i_31",
"oŋ_31"
],
[
"e_24",
"u_24"
],
[
"e_31",
"u_31"
],
[
"an",
"_24"
],
[
"k",
"a_24"
],
[
"k",
"a_24 "
],
[
"kʰ",
"i_11"
],
[
"an",
"_24 "
],
[
"tʰo_11",
"i_11"
],
[
"m",
"o_11"
],
[
"k",
"i_55"
],
[
"ku",
"_2"
],
[
"ko",
"_55 "
],
[
"ŋ",
"a_11"
],
[
"e_55",
"u_55"
],
[
"t͡sɨ",
"n"
],
[
"t͡s",
"o"
],
[
"k",
"in"
],
[
"im",
"_24"
],
[
"ŋ",
"i_11en_11 "
],
[
"e_24",
"u_24 "
],
[
"t",
"et_2 "
],
[
"i_24en",
"_24 "
],
[
"i",
"p"
],
[
"on",
"_24"
],
[
"un",
"_11"
],
[
"k",
"uŋ_24"
],
[
"s",
"u_31"
],
[
"ap",
"_2"
],
[
"sɨ",
"_55 "
],
[
"aŋ",
"_11 "
],
[
"on",
"_55"
],
[
"tʰ",
"i_55"
],
[
"e",
"t_5 "
],
[
"tʰuŋ",
"_11 "
],
[
"an",
"_55"
],
[
"o",
"_11 "
],
[
"u",
"k_2 "
],
[
"t͡s",
"uŋ"
],
[
"ho",
"_31"
],
[
"i_2",
"et_2"
],
[
"i_31",
"en"
],
[
"k",
"u_24"
],
[
"i_24",
"u_24"
],
[
"aŋ",
"_55"
],
[
"ŋ",
"i_11"
],
[
"an",
"_55 "
],
[
"an",
"_31"
],
[
"h",
"i_55 "
]
]
}
}