{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": false, "use_regex": true }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "vocab": { "": 0, "": 1, "": 2, "\"": 3, "'": 4, ",": 5, "-": 6, ".": 7, "<": 8, ">": 9, "A": 10, "B": 11, "C": 12, "D": 13, "G": 14, "H": 15, "I": 16, "M": 17, "N": 18, "O": 19, "S": 20, "a": 21, "b": 22, "c": 23, "d": 24, "e": 25, "f": 26, "g": 27, "h": 28, "i": 29, "j": 30, "k": 31, "l": 32, "m": 33, "n": 34, "o": 35, "p": 36, "r": 37, "s": 38, "t": 39, "u": 40, "w": 41, "x": 42, "y": 43, "²": 44, "Ì": 45, "Ġ": 46, "st": 47, "nd": 48, "ar": 49, "end": 50, "Ġ<": 51, "star": 52, "start": 53, "hl": 54, "̲": 55, "wi": 56, "ii": 57, "Ġg": 58, "aa": 59, "oo": 60, "Ġn": 61, "Ġwi": 62, "Ġ'": 63, "Ġii": 64, "an": 65, "Ġy": 66, "Ġl": 67, "Ii": 68, "ĠIi": 69, "oohl": 70, "ee": 71, "im": 72, "Ġwil": 73, "Ġh": 74, "whl": 75, "Ġhl": 76, "ag": 77, "dii": 78, "nii": 79, "ts": 80, "xwi": 81, "Ġd": 82, "Ġha": 83, "uu": 84, "Ġnee": 85, "xs": 86, "Ġyu": 87, "Ġa": 88, "ip": 89, "kwhl": 90, "wihl": 91, "gi": 92, "Ġk": 93, "xw": 94, "'m": 95, "Ġxs": 96, "Ġdim": 97, "Ġneedii": 98, "igi": 99 }, "merges": [ "s t", "n d", "a r", "e nd", "Ġ <", "st ar", "star t", "h l", "Ì ²", "w i", "i i", "Ġ g", "a a", "o o", "Ġ n", "Ġ wi", "Ġ '", "Ġ ii", "a n", "Ġ y", "Ġ l", "I i", "Ġ Ii", "oo hl", "e e", "i m", "Ġwi l", "Ġ h", "w hl", "Ġ hl", "a g", "d ii", "n ii", "t s", "x wi", "Ġ d", "Ġh a", "u u", "Ġn ee", "x s", "Ġy u", "Ġ a", "i p", "k whl", "wi hl", "g i", "Ġ k", "x w", "' m", "Ġ xs", "Ġd im", "Ġnee dii", "i gi" ] } }