{ "version": "1.0", "truncation": null, "padding": { "strategy": "BatchLongest", "direction": "Right", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "" }, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": null, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, " ": 2, "1": 3, "2": 4, "3": 5, "4": 6, "5": 7, "_": 8, "a": 9, "b": 10, "e": 11, "f": 12, "h": 13, "i": 14, "k": 15, "l": 16, "m": 17, "n": 18, "o": 19, "p": 20, "s": 21, "t": 22, "u": 23, "v": 24, "ŋ": 25, "ɕ": 26, "ɛ": 27, "ɨ": 28, "ʊ": 29, "ʰ": 30, "̩": 31, "͡": 32, "_5": 33, "_55": 34, "_2": 35, "_24": 36, "11": 37, "_11": 38, "31": 39, "_31": 40, "_55 ": 41, "t͡": 42, "i_55": 43, "t͡s": 44, "_24 ": 45, "_11 ": 46, "_31 ": 47, "oŋ": 48, "t_2": 49, "tʰ": 50, "i_24": 51, "e_55 ": 52, "i_11": 53, "en": 54, "t͡ɕ": 55, "i_55 ": 56, "in": 57, "i_31": 58, "o_55": 59, "ke_55 ": 60, "uŋ": 61, "t͡sʰ": 62, "t͡ɕʰ": 63, "a_55": 64, "aŋ": 65, "u_55": 66, "k_2": 67, "sɨ": 68, "a_24": 69, "kʰ": 70, "an": 71, "e_55": 72, "it_2": 73, "i_11 ": 74, "o_11": 75, "u_24": 76, "un": 77, "on": 78, "to": 79, "e_31": 80, "et_2": 81, "t_5": 82, "a_31": 83, "u_31": 84, "u_24 ": 85, "pʰ": 86, "o_55i_55 ": 87, "u_55 ": 88, "a_11": 89, "am": 90, "ŋin": 91, "t͡ɕʰi_55": 92, "k_5": 93, "i_2": 94, "a_24 ": 95, "u_11": 96, "̩_11": 97, "ak_2": 98, "o_31": 99, "e_24": 100, "e_31 ": 101, "i_11en": 102, "m̩_11": 103, "e_11": 104, "lo_11": 105, "a_55i_55": 106, "oŋ_24": 107, "oŋ_31": 108, "he_55 ": 109, "uk_2": 110, "uŋ_24": 111, "o_24": 112, "en_24": 113, "ke_55": 114, "i_24 ": 115, "ki_24": 116, "ŋa_11": 117, "i_55en": 118, "p_5": 119, "i_5": 120, "a_55 ": 121, "ki_11 ": 122, "et_2 ": 123, "oŋ_55": 124, "ok_5": 125, "ŋin_11": 126, "aŋ_24": 127, "i_24u_24 ": 128, "u_31 ": 129, "i_31 ": 130, "o_55i_55": 131, "it_2 ": 132, "tʰuŋ": 133, "oŋ_55 ": 134, "to_55 ": 135, "im": 136, "tʰe_11": 137, "i_11en_11": 138, "ŋin_11 ": 139, "t͡sɨ": 140, "ok_2": 141, "mo_11": 142, "vo_55i_55 ": 143, "o_31 ": 144, "ŋa_11i_11 ": 145, "o_55 ": 146, "sɨt_5": 147, "i_55en_55": 148, "ti_55 ": 149, "han": 150, "ten": 151, "lo_11i_11 ": 152, "tʰa_55i_55": 153, "to_31 ": 154, "t͡ɕʰi_55u_55 ": 155, "oŋ_11": 156, "ut_2": 157, "t͡ɕʰin": 158, "ak_2 ": 159, "oŋ_24 ": 160, "i_31a_31": 161, "t͡so_55": 162, "a_31 ": 163, "kin": 164, "u_11 ": 165, "pun": 166, "se_55": 167, "an_24": 168, "et_5": 169, "on_55": 170, "te_24": 171, "o_11 ": 172, "i_31oŋ_31": 173, "ŋ̩": 174, "su_31": 175, "oŋ_31 ": 176, "aŋ_11": 177, "tʰe_11u_11": 178, "i_11en_11 ": 179, "i_31a_31 ": 180, "a_55i_55 ": 181, "t͡ɕʰi_55u_55": 182, "p_2": 183, "aŋ_55": 184, "lo_11i_11": 185, "i_2et_2": 186, "i_24en_24": 187, "i_55uŋ": 188, "ho_31": 189, "koŋ_31": 190, "sɨn": 191, "he_55": 192, "am_24": 193, "at_2": 194, "hi_55 ": 195, "ap_5": 196, "i_55a_55": 197, "ten_31 ": 198, "i_24u_24": 199, "on_24": 200, "t͡sʰut_2": 201, "en_24 ": 202, "t͡sʰɨ": 203, "an_31": 204, "t͡suŋ": 205, "im_24": 206, "ki_31": 207, "ko_55": 208, "te_24u_24 ": 209, "aŋ_24 ": 210, "ho_31 ": 211, "ka_24": 212, "ŋi_11": 213, "tet_2 ": 214, "an_55": 215, "ki_11": 216, "ka_24 ": 217, "tu_55": 218, "sɨ_55": 219, "ŋ̩_11 ": 220, "oŋ_11 ": 221, "i_5et_5": 222, "han_11": 223, "kʰon_55": 224, "ɕi_24": 225, "sɨ_11": 226, "in_24": 227, "ap_2": 228, "ko_55 ": 229, "ha_24": 230, "ti_24": 231, "in_55": 232, "kʰi_11": 233, "uk_5": 234, "i_55oŋ_55": 235, "li_11": 236, "mo_11 ": 237, "ki_2": 238, "uŋ_11": 239, "e_31u_31": 240, "ki_55": 241, "un_11": 242, "tʰuŋ_11 ": 243, "hi_55": 244, "i_31en": 245, "t͡sak_2 ": 246, "toŋ_24": 247, "saŋ_24": 248, "ku_24": 249, "vuk_2": 250, "e_24u_24": 251, "aŋ_55 ": 252, "e_55u_55": 253, "pun_24 ": 254, "pa_31": 255 }, "merges": [ [ "_", "5" ], [ "_5", "5" ], [ "_", "2" ], [ "_2", "4" ], [ "1", "1" ], [ "_", "11" ], [ "3", "1" ], [ "_", "31" ], [ "_55", " " ], [ "t", "͡" ], [ "i", "_55" ], [ "t͡", "s" ], [ "_24", " " ], [ "_11", " " ], [ "_31", " " ], [ "o", "ŋ" ], [ "t", "_2" ], [ "t", "ʰ" ], [ "i", "_24" ], [ "e", "_55 " ], [ "i", "_11" ], [ "e", "n" ], [ "t͡", "ɕ" ], [ "i", "_55 " ], [ "i", "n" ], [ "i", "_31" ], [ "o", "_55" ], [ "k", "e_55 " ], [ "u", "ŋ" ], [ "t͡s", "ʰ" ], [ "t͡ɕ", "ʰ" ], [ "a", "_55" ], [ "a", "ŋ" ], [ "u", "_55" ], [ "k", "_2" ], [ "s", "ɨ" ], [ "a", "_24" ], [ "k", "ʰ" ], [ "a", "n" ], [ "e", "_55" ], [ "i", "t_2" ], [ "i", "_11 " ], [ "o", "_11" ], [ "u", "_24" ], [ "u", "n" ], [ "o", "n" ], [ "t", "o" ], [ "e", "_31" ], [ "e", "t_2" ], [ "t", "_5" ], [ "a", "_31" ], [ "u", "_31" ], [ "u", "_24 " ], [ "p", "ʰ" ], [ "o_55", "i_55 " ], [ "u", "_55 " ], [ "a", "_11" ], [ "a", "m" ], [ "ŋ", "in" ], [ "t͡ɕʰ", "i_55" ], [ "k", "_5" ], [ "i", "_2" ], [ "a", "_24 " ], [ "u", "_11" ], [ "̩", "_11" ], [ "a", "k_2" ], [ "o", "_31" ], [ "e", "_24" ], [ "e", "_31 " ], [ "i_11", "en" ], [ "m", "̩_11" ], [ "e", "_11" ], [ "l", "o_11" ], [ "a_55", "i_55" ], [ "oŋ", "_24" ], [ "oŋ", "_31" ], [ "h", "e_55 " ], [ "u", "k_2" ], [ "uŋ", "_24" ], [ "o", "_24" ], [ "en", "_24" ], [ "k", "e_55" ], [ "i", "_24 " ], [ "k", "i_24" ], [ "ŋ", "a_11" ], [ "i_55", "en" ], [ "p", "_5" ], [ "i", "_5" ], [ "a", "_55 " ], [ "k", "i_11 " ], [ "et_2", " " ], [ "oŋ", "_55" ], [ "o", "k_5" ], [ "ŋin", "_11" ], [ "aŋ", "_24" ], [ "i_24", "u_24 " ], [ "u", "_31 " ], [ "i", "_31 " ], [ "o_55", "i_55" ], [ "it_2", " " ], [ "tʰ", "uŋ" ], [ "oŋ", "_55 " ], [ "to", "_55 " ], [ "i", "m" ], [ "tʰ", "e_11" ], [ "i_11en", "_11" ], [ "ŋin", "_11 " ], [ "t͡s", "ɨ" ], [ "o", "k_2" ], [ "m", "o_11" ], [ "v", "o_55i_55 " ], [ "o", "_31 " ], [ "ŋa_11", "i_11 " ], [ "o", "_55 " ], [ "sɨ", "t_5" ], [ "i_55en", "_55" ], [ "t", "i_55 " ], [ "h", "an" ], [ "t", "en" ], [ "lo_11", "i_11 " ], [ "tʰ", "a_55i_55" ], [ "to", "_31 " ], [ "t͡ɕʰi_55", "u_55 " ], [ "oŋ", "_11" ], [ "u", "t_2" ], [ "t͡ɕʰ", "in" ], [ "ak_2", " " ], [ "oŋ", "_24 " ], [ "i_31", "a_31" ], [ "t͡s", "o_55" ], [ "a", "_31 " ], [ "k", "in" ], [ "u", "_11 " ], [ "p", "un" ], [ "s", "e_55" ], [ "an", "_24" ], [ "e", "t_5" ], [ "on", "_55" ], [ "t", "e_24" ], [ "o", "_11 " ], [ "i_31", "oŋ_31" ], [ "ŋ", "̩" ], [ "s", "u_31" ], [ "oŋ", "_31 " ], [ "aŋ", "_11" ], [ "tʰe_11", "u_11" ], [ "i_11en", "_11 " ], [ "i_31", "a_31 " ], [ "a_55", "i_55 " ], [ "t͡ɕʰi_55", "u_55" ], [ "p", "_2" ], [ "aŋ", "_55" ], [ "lo_11", "i_11" ], [ "i_2", "et_2" ], [ "i_24", "en_24" ], [ "i_55", "uŋ" ], [ "h", "o_31" ], [ "k", "oŋ_31" ], [ "sɨ", "n" ], [ "h", "e_55" ], [ "am", "_24" ], [ "a", "t_2" ], [ "h", "i_55 " ], [ "a", "p_5" ], [ "i_55", "a_55" ], [ "ten", "_31 " ], [ "i_24", "u_24" ], [ "on", "_24" ], [ "t͡sʰ", "ut_2" ], [ "en", "_24 " ], [ "t͡sʰ", "ɨ" ], [ "an", "_31" ], [ "t͡s", "uŋ" ], [ "im", "_24" ], [ "k", "i_31" ], [ "k", "o_55" ], [ "te_24", "u_24 " ], [ "aŋ", "_24 " ], [ "h", "o_31 " ], [ "k", "a_24" ], [ "ŋ", "i_11" ], [ "t", "et_2 " ], [ "an", "_55" ], [ "k", "i_11" ], [ "k", "a_24 " ], [ "t", "u_55" ], [ "sɨ", "_55" ], [ "ŋ̩", "_11 " ], [ "oŋ", "_11 " ], [ "i_5", "et_5" ], [ "han", "_11" ], [ "kʰ", "on_55" ], [ "ɕ", "i_24" ], [ "sɨ", "_11" ], [ "in", "_24" ], [ "a", "p_2" ], [ "k", "o_55 " ], [ "h", "a_24" ], [ "t", "i_24" ], [ "in", "_55" ], [ "kʰ", "i_11" ], [ "u", "k_5" ], [ "i_55", "oŋ_55" ], [ "l", "i_11" ], [ "m", "o_11 " ], [ "k", "i_2" ], [ "uŋ", "_11" ], [ "e_31", "u_31" ], [ "k", "i_55" ], [ "un", "_11" ], [ "tʰuŋ", "_11 " ], [ "h", "i_55" ], [ "i_31", "en" ], [ "t͡s", "ak_2 " ], [ "t", "oŋ_24" ], [ "s", "aŋ_24" ], [ "k", "u_24" ], [ "v", "uk_2" ], [ "e_24", "u_24" ], [ "aŋ", "_55 " ], [ "e_55", "u_55" ], [ "pun", "_24 " ], [ "p", "a_31" ] ] } }