{ "version": "1.0", "truncation": null, "padding": { "strategy": "BatchLongest", "direction": "Right", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "" }, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": null, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, " ": 2, "1": 3, "2": 4, "3": 5, "4": 6, "5": 7, "_": 8, "a": 9, "b": 10, "d": 11, "e": 12, "f": 13, "h": 14, "i": 15, "j": 16, "k": 17, "l": 18, "m": 19, "n": 20, "o": 21, "p": 22, "s": 23, "t": 24, "u": 25, "v": 26, "w": 27, "z": 28, "æ": 29, "ð": 30, "ŋ": 31, "ɑ": 32, "ɔ": 33, "ɕ": 34, "ə": 35, "ɚ": 36, "ɛ": 37, "ɡ": 38, "ɨ": 39, "ɪ": 40, "ɹ": 41, "ʃ": 42, "ʊ": 43, "ʌ": 44, "ʒ": 45, "ʰ": 46, "̩": 47, "͡": 48, "θ": 49, "_5": 50, "_55": 51, "_2": 52, "_24": 53, "11": 54, "_11": 55, "31": 56, "_31": 57, "_55 ": 58, "t͡": 59, "t͡s": 60, "_24 ": 61, "i_55": 62, "_11 ": 63, "_31 ": 64, "tʰ": 65, "t_2": 66, "i_24": 67, "i_11": 68, "oŋ": 69, "en": 70, "i_55 ": 71, "e_55 ": 72, "uŋ": 73, "in": 74, "i_31": 75, "t͡sʰ": 76, "sɨ": 77, "t͡ɕ": 78, "o_55": 79, "kʰ": 80, "ke_55 ": 81, "an": 82, "u_55": 83, "k_2": 84, "aŋ": 85, "t͡ɕʰ": 86, "a_55": 87, "t_2 ": 88, "a_24": 89, "un": 90, "_5 ": 91, "o_11": 92, "u_24": 93, "u_55 ": 94, "am": 95, "u_24 ": 96, "e_55": 97, "pʰ": 98, "on": 99, "i_11 ": 100, "u_31": 101, "to": 102, "o_55i_55 ": 103, "i_11en": 104, "a_31": 105, "it_2": 106, "i_2": 107, "e_31": 108, "i_5": 109, "uŋ_24": 110, "k_2 ": 111, "ŋin": 112, "t͡sɨ": 113, "i_55en": 114, "ho": 115, "a_24 ": 116, "et_2 ": 117, "i_24 ": 118, "t_5": 119, "ko": 120, "a_11": 121, "e_24": 122, "u_11": 123, "et_2": 124, "tʰuŋ": 125, "ap": 126, "oŋ_24": 127, "lo_11": 128, "k_5": 129, "a_55i_55": 130, "he_55 ": 131, "i_24u_24 ": 132, "oŋ_31": 133, "u_31 ": 134, "t͡ɕʰi_55": 135, "i_24en": 136, "t_5 ": 137, "it_2 ": 138, "i_11en_11 ": 139, "im": 140, "i_31 ": 141, "a_55 ": 142, "e_31 ": 143, "̩_11": 144, "e_11": 145, "in_24": 146, "m̩_11": 147, "sɨp": 148, "oŋ_55 ": 149, "i_55en_55": 150, "i_11en_11": 151, "vo_55i_55 ": 152, "k_5 ": 153, "an_11": 154, "to_55 ": 155, "o_24": 156, "ti_55 ": 157, "uk_2": 158, "ki_24": 159, "u_11 ": 160, "ke_55": 161, "oŋ_55": 162, "oŋ_31 ": 163, "oŋ_24 ": 164, "sɨ_55": 165, "o_55i_55": 166, "ak_2": 167, "o_31": 168, "ŋin_11": 169, "lo_11i_11 ": 170, "ŋin_11 ": 171, "tʰa_55i_55": 172, "tʰo_11": 173, "aŋ_24": 174, "ŋi_5": 175, "ak_2 ": 176, "i_31a_31": 177, "uŋ_24 ": 178, "i_55uŋ": 179, "sɨn": 180, "to_31 ": 181, "t͡so_55": 182, "t͡sʰɨ": 183, "ŋ̩": 184, "in_11": 185, "in_55": 186, "i_55en_55 ": 187, "ŋi_55": 188, "oŋ_11 ": 189, "sɨp_5": 190, "oŋ_11": 191, "ki_31": 192, "i_24en_24": 193, "a_31 ": 194, "aŋ_11": 195, "am_24": 196, "tʰe_11": 197, "ten": 198, "t͡sʰu": 199, "han_11": 200, "pun": 201, "at_2": 202, "a_55i_55 ": 203, "ki_11 ": 204, "u_55i_55 ": 205, "ku": 206, "i_31oŋ_31": 207, "e_24u_24": 208, "e_31u_31": 209, "an_24": 210, "ka_24": 211, "ka_24 ": 212, "kʰi_11": 213, "an_24 ": 214, "tʰo_11i_11": 215, "mo_11": 216, "ki_55": 217, "ku_2": 218, "ko_55 ": 219, "ŋa_11": 220, "e_55u_55": 221, "t͡sɨn": 222, "t͡so": 223, "kin": 224, "im_24": 225, "ŋi_11en_11 ": 226, "e_24u_24 ": 227, "tet_2 ": 228, "i_24en_24 ": 229, "ip": 230, "on_24": 231, "un_11": 232, "kuŋ_24": 233, "su_31": 234, "ap_2": 235, "sɨ_55 ": 236, "aŋ_11 ": 237, "on_55": 238, "tʰi_55": 239, "et_5 ": 240, "tʰuŋ_11 ": 241, "an_55": 242, "o_11 ": 243, "uk_2 ": 244, "t͡suŋ": 245, "ho_31": 246, "i_2et_2": 247, "i_31en": 248, "ku_24": 249, "i_24u_24": 250, "aŋ_55": 251, "ŋi_11": 252, "an_55 ": 253, "an_31": 254, "hi_55 ": 255 }, "merges": [ [ "_", "5" ], [ "_5", "5" ], [ "_", "2" ], [ "_2", "4" ], [ "1", "1" ], [ "_", "11" ], [ "3", "1" ], [ "_", "31" ], [ "_55", " " ], [ "t", "͡" ], [ "t͡", "s" ], [ "_24", " " ], [ "i", "_55" ], [ "_11", " " ], [ "_31", " " ], [ "t", "ʰ" ], [ "t", "_2" ], [ "i", "_24" ], [ "i", "_11" ], [ "o", "ŋ" ], [ "e", "n" ], [ "i", "_55 " ], [ "e", "_55 " ], [ "u", "ŋ" ], [ "i", "n" ], [ "i", "_31" ], [ "t͡s", "ʰ" ], [ "s", "ɨ" ], [ "t͡", "ɕ" ], [ "o", "_55" ], [ "k", "ʰ" ], [ "k", "e_55 " ], [ "a", "n" ], [ "u", "_55" ], [ "k", "_2" ], [ "a", "ŋ" ], [ "t͡ɕ", "ʰ" ], [ "a", "_55" ], [ "t_2", " " ], [ "a", "_24" ], [ "u", "n" ], [ "_5", " " ], [ "o", "_11" ], [ "u", "_24" ], [ "u", "_55 " ], [ "a", "m" ], [ "u", "_24 " ], [ "e", "_55" ], [ "p", "ʰ" ], [ "o", "n" ], [ "i", "_11 " ], [ "u", "_31" ], [ "t", "o" ], [ "o_55", "i_55 " ], [ "i_11", "en" ], [ "a", "_31" ], [ "i", "t_2" ], [ "i", "_2" ], [ "e", "_31" ], [ "i", "_5" ], [ "uŋ", "_24" ], [ "k_2", " " ], [ "ŋ", "in" ], [ "t͡s", "ɨ" ], [ "i_55", "en" ], [ "h", "o" ], [ "a", "_24 " ], [ "e", "t_2 " ], [ "i", "_24 " ], [ "t", "_5" ], [ "k", "o" ], [ "a", "_11" ], [ "e", "_24" ], [ "u", "_11" ], [ "e", "t_2" ], [ "tʰ", "uŋ" ], [ "a", "p" ], [ "oŋ", "_24" ], [ "l", "o_11" ], [ "k", "_5" ], [ "a_55", "i_55" ], [ "h", "e_55 " ], [ "i_24", "u_24 " ], [ "oŋ", "_31" ], [ "u", "_31 " ], [ "t͡ɕʰ", "i_55" ], [ "i_24", "en" ], [ "t", "_5 " ], [ "i", "t_2 " ], [ "i_11en", "_11 " ], [ "i", "m" ], [ "i", "_31 " ], [ "a", "_55 " ], [ "e", "_31 " ], [ "̩", "_11" ], [ "e", "_11" ], [ "in", "_24" ], [ "m", "̩_11" ], [ "sɨ", "p" ], [ "oŋ", "_55 " ], [ "i_55en", "_55" ], [ "i_11en", "_11" ], [ "v", "o_55i_55 " ], [ "k", "_5 " ], [ "an", "_11" ], [ "to", "_55 " ], [ "o", "_24" ], [ "t", "i_55 " ], [ "u", "k_2" ], [ "k", "i_24" ], [ "u", "_11 " ], [ "k", "e_55" ], [ "oŋ", "_55" ], [ "oŋ", "_31 " ], [ "oŋ", "_24 " ], [ "sɨ", "_55" ], [ "o_55", "i_55" ], [ "a", "k_2" ], [ "o", "_31" ], [ "ŋin", "_11" ], [ "lo_11", "i_11 " ], [ "ŋin", "_11 " ], [ "tʰ", "a_55i_55" ], [ "tʰ", "o_11" ], [ "aŋ", "_24" ], [ "ŋ", "i_5" ], [ "a", "k_2 " ], [ "i_31", "a_31" ], [ "uŋ", "_24 " ], [ "i_55", "uŋ" ], [ "sɨ", "n" ], [ "to", "_31 " ], [ "t͡s", "o_55" ], [ "t͡sʰ", "ɨ" ], [ "ŋ", "̩" ], [ "in", "_11" ], [ "in", "_55" ], [ "i_55en", "_55 " ], [ "ŋ", "i_55" ], [ "oŋ", "_11 " ], [ "sɨp", "_5" ], [ "oŋ", "_11" ], [ "k", "i_31" ], [ "i_24en", "_24" ], [ "a", "_31 " ], [ "aŋ", "_11" ], [ "am", "_24" ], [ "tʰ", "e_11" ], [ "t", "en" ], [ "t͡sʰ", "u" ], [ "h", "an_11" ], [ "p", "un" ], [ "a", "t_2" ], [ "a_55", "i_55 " ], [ "k", "i_11 " ], [ "u_55", "i_55 " ], [ "k", "u" ], [ "i_31", "oŋ_31" ], [ "e_24", "u_24" ], [ "e_31", "u_31" ], [ "an", "_24" ], [ "k", "a_24" ], [ "k", "a_24 " ], [ "kʰ", "i_11" ], [ "an", "_24 " ], [ "tʰo_11", "i_11" ], [ "m", "o_11" ], [ "k", "i_55" ], [ "ku", "_2" ], [ "ko", "_55 " ], [ "ŋ", "a_11" ], [ "e_55", "u_55" ], [ "t͡sɨ", "n" ], [ "t͡s", "o" ], [ "k", "in" ], [ "im", "_24" ], [ "ŋ", "i_11en_11 " ], [ "e_24", "u_24 " ], [ "t", "et_2 " ], [ "i_24en", "_24 " ], [ "i", "p" ], [ "on", "_24" ], [ "un", "_11" ], [ "k", "uŋ_24" ], [ "s", "u_31" ], [ "ap", "_2" ], [ "sɨ", "_55 " ], [ "aŋ", "_11 " ], [ "on", "_55" ], [ "tʰ", "i_55" ], [ "e", "t_5 " ], [ "tʰuŋ", "_11 " ], [ "an", "_55" ], [ "o", "_11 " ], [ "u", "k_2 " ], [ "t͡s", "uŋ" ], [ "ho", "_31" ], [ "i_2", "et_2" ], [ "i_31", "en" ], [ "k", "u_24" ], [ "i_24", "u_24" ], [ "aŋ", "_55" ], [ "ŋ", "i_11" ], [ "an", "_55 " ], [ "an", "_31" ], [ "h", "i_55 " ] ] } }