Upload tokenizer
5891ad3
|
{ |
|
"$": 1, |
|
"&": 2, |
|
"-": 3, |
|
"=": 4, |
|
"[PAD]": 219, |
|
"[UNK]": 218, |
|
"_": 5, |
|
"`": 6, |
|
"a": 7, |
|
"b": 8, |
|
"c": 9, |
|
"d": 10, |
|
"e": 11, |
|
"f": 12, |
|
"g": 13, |
|
"h": 14, |
|
"i": 15, |
|
"j": 16, |
|
"k": 17, |
|
"l": 18, |
|
"m": 19, |
|
"n": 20, |
|
"o": 21, |
|
"p": 22, |
|
"q": 23, |
|
"r": 24, |
|
"s": 25, |
|
"t": 26, |
|
"u": 27, |
|
"v": 28, |
|
"w": 29, |
|
"x": 30, |
|
"y": 31, |
|
"z": 32, |
|
"{": 33, |
|
"|": 0, |
|
"}": 35, |
|
"~": 36, |
|
"¨": 37, |
|
"ª": 38, |
|
"«": 39, |
|
"®": 40, |
|
"°": 41, |
|
"´": 42, |
|
"·": 43, |
|
"»": 44, |
|
"ß": 45, |
|
"à": 46, |
|
"á": 47, |
|
"â": 48, |
|
"ã": 49, |
|
"ä": 50, |
|
"å": 51, |
|
"æ": 52, |
|
"é": 53, |
|
"ê": 54, |
|
"ë": 55, |
|
"ì": 56, |
|
"í": 57, |
|
"î": 58, |
|
"ï": 59, |
|
"ð": 60, |
|
"ñ": 61, |
|
"ò": 62, |
|
"ó": 63, |
|
"ô": 64, |
|
"õ": 65, |
|
"ö": 66, |
|
"ø": 67, |
|
"ù": 68, |
|
"ú": 69, |
|
"û": 70, |
|
"ü": 71, |
|
"ý": 72, |
|
"þ": 73, |
|
"ā": 74, |
|
"ă": 75, |
|
"ć": 76, |
|
"č": 77, |
|
"đ": 78, |
|
"ė": 79, |
|
"ę": 80, |
|
"ě": 81, |
|
"ğ": 82, |
|
"ī": 83, |
|
"ı": 84, |
|
"ł": 85, |
|
"ń": 86, |
|
"ō": 87, |
|
"ő": 88, |
|
"œ": 89, |
|
"ř": 90, |
|
"ś": 91, |
|
"ş": 92, |
|
"š": 93, |
|
"ū": 94, |
|
"ź": 95, |
|
"ž": 96, |
|
"ș": 97, |
|
"ț": 98, |
|
"ə": 99, |
|
"ʷ": 100, |
|
"ʻ": 101, |
|
"ʽ": 102, |
|
"ʿ": 103, |
|
"ː": 104, |
|
"́": 105, |
|
"̇": 106, |
|
"ϙ": 107, |
|
"а": 108, |
|
"б": 109, |
|
"в": 110, |
|
"г": 111, |
|
"д": 112, |
|
"е": 113, |
|
"и": 114, |
|
"й": 115, |
|
"к": 116, |
|
"л": 117, |
|
"н": 118, |
|
"о": 119, |
|
"п": 120, |
|
"р": 121, |
|
"с": 122, |
|
"т": 123, |
|
"ч": 124, |
|
"ш": 125, |
|
"ы": 126, |
|
"ь": 127, |
|
"ю": 128, |
|
"я": 129, |
|
"ё": 130, |
|
"ү": 131, |
|
"ө": 132, |
|
"ְ": 133, |
|
"ִ": 134, |
|
"ֵ": 135, |
|
"ָ": 136, |
|
"ֹ": 137, |
|
"ּ": 138, |
|
"ב": 139, |
|
"ה": 140, |
|
"ו": 141, |
|
"י": 142, |
|
"כ": 143, |
|
"ל": 144, |
|
"ם": 145, |
|
"מ": 146, |
|
"נ": 147, |
|
"ס": 148, |
|
"ק": 149, |
|
"ר": 150, |
|
"ש": 151, |
|
"ת": 152, |
|
"ب": 153, |
|
"ة": 154, |
|
"ذ": 155, |
|
"ه": 156, |
|
"ي": 157, |
|
"ਆ": 158, |
|
"ਘ": 159, |
|
"ਤ": 160, |
|
"ਨ": 161, |
|
"ਮ": 162, |
|
"ਸ": 163, |
|
"ਾ": 164, |
|
"ਿ": 165, |
|
"ੰ": 166, |
|
"ṁ": 167, |
|
"ṃ": 168, |
|
"ṇ": 169, |
|
"ồ": 170, |
|
"‐": 171, |
|
"‑": 172, |
|
"–": 173, |
|
"—": 174, |
|
"―": 175, |
|
"’": 176, |
|
"„": 177, |
|
"…": 178, |
|
"‧": 179, |
|
"‹": 180, |
|
"›": 181, |
|
"→": 182, |
|
"≪": 183, |
|
"≫": 184, |
|
"し": 185, |
|
"の": 186, |
|
"ひ": 187, |
|
"ら": 188, |
|
"ゴ": 189, |
|
"ヒ": 190, |
|
"ミ": 191, |
|
"ム": 192, |
|
"ラ": 193, |
|
"㓁": 194, |
|
"口": 195, |
|
"周": 196, |
|
"山": 197, |
|
"戌": 198, |
|
"日": 199, |
|
"本": 200, |
|
"比": 201, |
|
"毵": 202, |
|
"消": 203, |
|
"生": 204, |
|
"申": 205, |
|
"真": 206, |
|
"箱": 207, |
|
"网": 208, |
|
"罒": 209, |
|
"罓": 210, |
|
"肋": 211, |
|
"肌": 212, |
|
"背": 213, |
|
"良": 214, |
|
"鮓": 215, |
|
"鮨": 216, |
|
"fi": 217, |
|
"": 218 |
|
} |
|
|