Den4ikAI commited on
Commit
52bcb83
1 Parent(s): 9c90035

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +6 -0
  2. vocab.json +670 -0
tokenizer_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "name": "CharacterTokenizer",
3
+ "vocab_file": "vocab.json",
4
+ "model_max_length": 2048,
5
+ "size": 668
6
+ }
vocab.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<pad>": 0,
3
+ "<s>": 1,
4
+ "</s>": 2,
5
+ "<unk>": 3,
6
+ "<sep>": 4,
7
+ "<cls>": 5,
8
+ "<mask>": 6,
9
+ "ఒ": 7,
10
+ "ţ": 8,
11
+ "ѳ": 9,
12
+ "̉": 10,
13
+ "Ф": 11,
14
+ "·": 12,
15
+ " ": 13,
16
+ "Й": 14,
17
+ "Ѣ": 15,
18
+ "ђ": 16,
19
+ "ὸ": 17,
20
+ "γ": 18,
21
+ "Ђ": 19,
22
+ "$": 20,
23
+ "¿": 21,
24
+ "έ": 22,
25
+ "Т": 23,
26
+ "я": 24,
27
+ "✔": 25,
28
+ "̈": 26,
29
+ "∠": 27,
30
+ "∫": 28,
31
+ "Z": 29,
32
+ "←": 30,
33
+ "ќ": 31,
34
+ "š": 32,
35
+ "♥": 33,
36
+ "ń": 34,
37
+ "щ": 35,
38
+ "Ъ": 36,
39
+ "ש": 37,
40
+ "Ј": 38,
41
+ "ů": 39,
42
+ "ת": 40,
43
+ "對": 41,
44
+ "м": 42,
45
+ "О": 43,
46
+ "΀": 44,
47
+ "¹": 45,
48
+ "ř": 46,
49
+ "„": 47,
50
+ "û": 48,
51
+ "А": 49,
52
+ "н": 50,
53
+ "ј": 51,
54
+ "Ц": 52,
55
+ ",": 53,
56
+ "ʼ": 54,
57
+ "∀": 55,
58
+ "舞": 56,
59
+ "ѝ": 57,
60
+ "Ў": 58,
61
+ "ャ": 59,
62
+ "(": 60,
63
+ "ѐ": 61,
64
+ "Ї": 62,
65
+ "@": 63,
66
+ "з": 64,
67
+ "ί": 65,
68
+ "ð": 66,
69
+ "H": 67,
70
+ "内": 68,
71
+ "'": 69,
72
+ "б": 70,
73
+ "Š": 71,
74
+ "Ä": 72,
75
+ "ī": 73,
76
+ "ύ": 74,
77
+ "♂": 75,
78
+ "​": 76,
79
+ "Ё": 77,
80
+ "‹": 78,
81
+ "?": 79,
82
+ "P": 80,
83
+ "ѵ": 81,
84
+ "算": 82,
85
+ "˙": 83,
86
+ "φ": 84,
87
+ "ґ": 85,
88
+ "К": 86,
89
+ "e": 87,
90
+ "ö": 88,
91
+ "Ж": 89,
92
+ "с": 90,
93
+ "д": 91,
94
+ "ゼ": 92,
95
+ "p": 93,
96
+ "Н": 94,
97
+ "ῦ": 95,
98
+ "W": 96,
99
+ "歓": 97,
100
+ "ľ": 98,
101
+ "/": 99,
102
+ "ψ": 100,
103
+ "": 101,
104
+ "1": 102,
105
+ "ὐ": 103,
106
+ "だ": 104,
107
+ "ר": 105,
108
+ "ῷ": 106,
109
+ "が": 107,
110
+ "─": 108,
111
+ "ル": 109,
112
+ "Ω": 110,
113
+ "加": 111,
114
+ "": 112,
115
+ "¸": 113,
116
+ "й": 114,
117
+ "%": 115,
118
+ "三": 116,
119
+ "✨": 117,
120
+ "ћ": 118,
121
+ "ȁ": 119,
122
+ "ŭ": 120,
123
+ "伎": 121,
124
+ "ἄ": 122,
125
+ "Р": 123,
126
+ "ç": 124,
127
+ "S": 125,
128
+ "̀": 126,
129
+ "Τ": 127,
130
+ "ό": 128,
131
+ "à": 129,
132
+ "\t": 130,
133
+ "▸": 131,
134
+ "4": 132,
135
+ "ŋ": 133,
136
+ "⅝": 134,
137
+ "ү": 135,
138
+ "q": 136,
139
+ "∪": 137,
140
+ "松": 138,
141
+ "!": 139,
142
+ "ן": 140,
143
+ "Ӂ": 141,
144
+ "+": 142,
145
+ "零": 143,
146
+ "έ": 144,
147
+ "ΰ": 145,
148
+ "⅛": 146,
149
+ "Ξ": 147,
150
+ "カ": 148,
151
+ "â": 149,
152
+ "Ч": 150,
153
+ "ж": 151,
154
+ "¬": 152,
155
+ "\"": 153,
156
+ "ϊ": 154,
157
+ "Ç": 155,
158
+ "–": 156,
159
+ "💥": 157,
160
+ "☺": 158,
161
+ "З": 159,
162
+ "˛": 160,
163
+ "fi": 161,
164
+ "\u0005": 162,
165
+ "Ì": 163,
166
+ "‚": 164,
167
+ "7": 165,
168
+ "行": 166,
169
+ "k": 167,
170
+ "ו": 168,
171
+ "½": 169,
172
+ "§": 170,
173
+ "す": 171,
174
+ "へ": 172,
175
+ "µ": 173,
176
+ "ü": 174,
177
+ "土": 175,
178
+ "Θ": 176,
179
+ "ë": 177,
180
+ "⅜": 178,
181
+ "‚": 179,
182
+ "һ": 180,
183
+ "き": 181,
184
+ "_": 182,
185
+ "ă": 183,
186
+ "ύ": 184,
187
+ "ї": 185,
188
+ "—": 186,
189
+ "·": 187,
190
+ "Ê": 188,
191
+ "ρ": 189,
192
+ ":": 190,
193
+ "8": 191,
194
+ "ú": 192,
195
+ "《": 193,
196
+ "A": 194,
197
+ "Υ": 195,
198
+ "\u0002": 196,
199
+ "ѓ": 197,
200
+ "Ż": 198,
201
+ "ә": 199,
202
+ "ό": 200,
203
+ "ス": 201,
204
+ ">": 202,
205
+ "ś": 203,
206
+ "K": 204,
207
+ "њ": 205,
208
+ "к": 206,
209
+ "²": 207,
210
+ "—": 208,
211
+ "Γ": 209,
212
+ "5": 210,
213
+ "●": 211,
214
+ "R": 212,
215
+ "ф": 213,
216
+ "Π": 214,
217
+ "ᾳ": 215,
218
+ "能": 216,
219
+ "Ρ": 217,
220
+ "Â": 218,
221
+ "č": 219,
222
+ "è": 220,
223
+ "–": 221,
224
+ "ἱ": 222,
225
+ "!": 223,
226
+ "M": 224,
227
+ "❄": 225,
228
+ "t": 226,
229
+ "も": 227,
230
+ "ξ": 228,
231
+ "ή": 229,
232
+ "Í": 230,
233
+ "I": 231,
234
+ "ы": 232,
235
+ "ё": 233,
236
+ "️": 234,
237
+ "É": 235,
238
+ "Ã": 236,
239
+ "ɪ": 237,
240
+ "o": 238,
241
+ "": 239,
242
+ "ώ": 240,
243
+ "ϑ": 241,
244
+ "*": 242,
245
+ "Я": 243,
246
+ "э": 244,
247
+ "Ē": 245,
248
+ "Ř": 246,
249
+ "˗": 247,
250
+ "": 248,
251
+ "″": 249,
252
+ "õ": 250,
253
+ "O": 251,
254
+ "Ϊ": 252,
255
+ "ー": 253,
256
+ "χ": 254,
257
+ "ю": 255,
258
+ "ν": 256,
259
+ "|": 257,
260
+ "た": 258,
261
+ "−": 259,
262
+ "う": 260,
263
+ "џ": 261,
264
+ "Č": 262,
265
+ "П": 263,
266
+ "√": 264,
267
+ "歪": 265,
268
+ " ": 266,
269
+ "l": 267,
270
+ "ò": 268,
271
+ "℅": 269,
272
+ "р": 270,
273
+ "þ": 271,
274
+ "ュ": 272,
275
+ "z": 273,
276
+ "”": 274,
277
+ "ş": 275,
278
+ "ê": 276,
279
+ "À": 277,
280
+ "α": 278,
281
+ "座": 279,
282
+ "→": 280,
283
+ "X": 281,
284
+ "⠀": 282,
285
+ "δ": 283,
286
+ "ὼ": 284,
287
+ "ὑ": 285,
288
+ "Ы": 286,
289
+ "‘": 287,
290
+ "ä": 288,
291
+ "ň": 289,
292
+ "": 290,
293
+ "b": 291,
294
+ "ė": 292,
295
+ "¡": 293,
296
+ "c": 294,
297
+ "•": 295,
298
+ "י": 296,
299
+ "ク": 297,
300
+ "ῖ": 298,
301
+ "め": 299,
302
+ "歌": 300,
303
+ "♡": 301,
304
+ "ħ": 302,
305
+ "w": 303,
306
+ "⅓": 304,
307
+ "ű": 305,
308
+ "Ý": 306,
309
+ "у": 307,
310
+ "n": 308,
311
+ "­": 309,
312
+ " ": 310,
313
+ "↔": 311,
314
+ "Ѕ": 312,
315
+ " ": 313,
316
+ "": 314,
317
+ "ά": 315,
318
+ "Ó": 316,
319
+ "j": 317,
320
+ "字": 318,
321
+ "子": 319,
322
+ "Δ": 320,
323
+ " ": 321,
324
+ "ï": 322,
325
+ "灯": 323,
326
+ "下": 324,
327
+ "r": 325,
328
+ "Ü": 326,
329
+ "g": 327,
330
+ "ῤ": 328,
331
+ "∙": 329,
332
+ "ң": 330,
333
+ "♠": 331,
334
+ "ő": 332,
335
+ "о": 333,
336
+ "⅞": 334,
337
+ "‡": 335,
338
+ "┴": 336,
339
+ "¼": 337,
340
+ "ø": 338,
341
+ "↑": 339,
342
+ "‰": 340,
343
+ "v": 341,
344
+ "{": 342,
345
+ "ш": 343,
346
+ "Λ": 344,
347
+ "口": 345,
348
+ "∥": 346,
349
+ "□": 347,
350
+ "ā": 348,
351
+ "を": 349,
352
+ "´": 350,
353
+ "Ќ": 351,
354
+ "€": 352,
355
+ "": 353,
356
+ "λ": 354,
357
+ "Å": 355,
358
+ "¾": 356,
359
+ "\b": 357,
360
+ "º": 358,
361
+ "ô": 359,
362
+ "№": 360,
363
+ "ž": 361,
364
+ "\n": 362,
365
+ "ę": 363,
366
+ "֠": 364,
367
+ "∧": 365,
368
+ "ż": 366,
369
+ "³": 367,
370
+ "′": 368,
371
+ "æ": 369,
372
+ "Φ": 370,
373
+ "ū": 371,
374
+ "ϕ": 372,
375
+ "́": 373,
376
+ "È": 374,
377
+ "ム": 375,
378
+ "ὁ": 376,
379
+ "`": 377,
380
+ "Ú": 378,
381
+ "™": 379,
382
+ "Б": 380,
383
+ ")": 381,
384
+ "ч": 382,
385
+ "ἶ": 383,
386
+ "►": 384,
387
+ "Ь": 385,
388
+ "6": 386,
389
+ "戦": 387,
390
+ "T": 388,
391
+ "Ν": 389,
392
+ "ま": 390,
393
+ "F": 391,
394
+ "≈": 392,
395
+ "⅔": 393,
396
+ "ý": 394,
397
+ "π": 395,
398
+ "ί": 396,
399
+ "Ε": 397,
400
+ "ì": 398,
401
+ "火": 399,
402
+ "ζ": 400,
403
+ "9": 401,
404
+ "&": 402,
405
+ "Q": 403,
406
+ "": 404,
407
+ "¶": 405,
408
+ "Χ": 406,
409
+ "Ѵ": 407,
410
+ "ὺ": 408,
411
+ "C": 409,
412
+ "Ѓ": 410,
413
+ "×": 411,
414
+ "₂": 412,
415
+ "U": 413,
416
+ "Ѳ": 414,
417
+ "Є": 415,
418
+ "ď": 416,
419
+ "¦": 417,
420
+ "Э": 418,
421
+ "β": 419,
422
+ "»": 420,
423
+ "ἀ": 421,
424
+ "±": 422,
425
+ "ω": 423,
426
+ "†": 424,
427
+ "«": 425,
428
+ "῾": 426,
429
+ "Κ": 427,
430
+ "ロ": 428,
431
+ "i": 429,
432
+ "Ѹ": 430,
433
+ "灸": 431,
434
+ "~": 432,
435
+ "Œ": 433,
436
+ "\u0010": 434,
437
+ "�": 435,
438
+ "ἡ": 436,
439
+ "Ө": 437,
440
+ "υ": 438,
441
+ "体": 439,
442
+ "に": 440,
443
+ "Х": 441,
444
+ "ὴ": 442,
445
+ "よ": 443,
446
+ "á": 444,
447
+ "撃": 445,
448
+ "\f": 446,
449
+ "Ω": 447,
450
+ ";": 448,
451
+ "І": 449,
452
+ "=": 450,
453
+ "ǐ": 451,
454
+ "シ": 452,
455
+ "ğ": 453,
456
+ "В": 454,
457
+ "מ": 455,
458
+ "≠": 456,
459
+ "#": 457,
460
+ "җ": 458,
461
+ "ө": 459,
462
+ "˝": 460,
463
+ "ἁ": 461,
464
+ "Ζ": 462,
465
+ "ÿ": 463,
466
+ "■": 464,
467
+ "΄": 465,
468
+ "ὶ": 466,
469
+ "L": 467,
470
+ "п": 468,
471
+ "®": 469,
472
+ "״": 470,
473
+ "2": 471,
474
+ "<": 472,
475
+ "…": 473,
476
+ "0": 474,
477
+ "x": 475,
478
+ "τ": 476,
479
+ "爪": 477,
480
+ "ὦ": 478,
481
+ "y": 479,
482
+ "ὕ": 480,
483
+ "і": 481,
484
+ "ь": 482,
485
+ "Д": 483,
486
+ "љ": 484,
487
+ "ϋ": 485,
488
+ "ε": 486,
489
+ "σ": 487,
490
+ "ñ": 488,
491
+ "ć": 489,
492
+ "Σ": 490,
493
+ "ц": 491,
494
+ "䋚": 492,
495
+ "í": 493,
496
+ "“": 494,
497
+ "Е": 495,
498
+ "°": 496,
499
+ "▲": 497,
500
+ "т": 498,
501
+ "ź": 499,
502
+ "и": 500,
503
+ "Љ": 501,
504
+ "ó": 502,
505
+ "-": 503,
506
+ "迎": 504,
507
+ "Ґ": 505,
508
+ "E": 506,
509
+ "Æ": 507,
510
+ "Β": 508,
511
+ "♦": 509,
512
+ "£": 510,
513
+ "Ѡ": 511,
514
+ " ": 512,
515
+ "ι": 513,
516
+ "Ά": 514,
517
+ "У": 515,
518
+ "ὰ": 516,
519
+ "Ô": 517,
520
+ "\u000e": 518,
521
+ "ה": 519,
522
+ "Ł": 520,
523
+ "η": 521,
524
+ "Έ": 522,
525
+ "а": 523,
526
+ "]": 524,
527
+ "ã": 525,
528
+ "G": 526,
529
+ "ъ": 527,
530
+ "θ": 528,
531
+ "х": 529,
532
+ "h": 530,
533
+ "א": 531,
534
+ "ἕ": 532,
535
+ "ϰ": 533,
536
+ "J": 534,
537
+ "}": 535,
538
+ "Ø": 536,
539
+ "り": 537,
540
+ "-": 538,
541
+ "г": 539,
542
+ "ἐ": 540,
543
+ "♀": 541,
544
+ "л": 542,
545
+ "面": 543,
546
+ "ĕ": 544,
547
+ "ο": 545,
548
+ "ל": 546,
549
+ ".": 547,
550
+ "a": 548,
551
+ "る": 549,
552
+ "μ": 550,
553
+ "": 551,
554
+ "ː": 552,
555
+ "ı": 553,
556
+ " ": 554,
557
+ "М": 555,
558
+ "ị": 556,
559
+ "―": 557,
560
+ "в": 558,
561
+ "᾽": 559,
562
+ "ў": 560,
563
+ "Г": 561,
564
+ "😇": 562,
565
+ "ť": 563,
566
+ "Л": 564,
567
+ "↓": 565,
568
+ "é": 566,
569
+ "え": 567,
570
+ "ς": 568,
571
+ "ʙ": 569,
572
+ "藁": 570,
573
+ "Ο": 571,
574
+ "チ": 572,
575
+ "ב": 573,
576
+ "Y": 574,
577
+ "m": 575,
578
+ "Ò": 576,
579
+ "С": 577,
580
+ "ē": 578,
581
+ "\\": 579,
582
+ "ホ": 580,
583
+ "い": 581,
584
+ "(": 582,
585
+ " ": 583,
586
+ "÷": 584,
587
+ "ˆ": 585,
588
+ "N": 586,
589
+ "Ш": 587,
590
+ "ą": 588,
591
+ "Ι": 589,
592
+ "¤": 590,
593
+ "ῥ": 591,
594
+ "抗": 592,
595
+ "ѣ": 593,
596
+ "Μ": 594,
597
+ "‎": 595,
598
+ "́": 596,
599
+ "Џ": 597,
600
+ "ł": 598,
601
+ "s": 599,
602
+ "╦": 600,
603
+ "ΐ": 601,
604
+ "B": 602,
605
+ "є": 603,
606
+ "": 604,
607
+ "育": 605,
608
+ "å": 606,
609
+ "ῆ": 607,
610
+ "u": 608,
611
+ " ": 609,
612
+ "^": 610,
613
+ "е": 611,
614
+ "ά": 612,
615
+ "3": 613,
616
+ "Ö": 614,
617
+ "Ю": 615,
618
+ "電": 616,
619
+ "Ž": 617,
620
+ "Α": 618,
621
+ "Щ": 619,
622
+ "ὔ": 620,
623
+ "ù": 621,
624
+ "[": 622,
625
+ "♣": 623,
626
+ "Ś": 624,
627
+ "ῳ": 625,
628
+ "Њ": 626,
629
+ "V": 627,
630
+ "œ": 628,
631
+ "Ş": 629,
632
+ "‑": 630,
633
+ "Η": 631,
634
+ "d": 632,
635
+ "ˈ": 633,
636
+ "∂": 634,
637
+ "Ћ": 635,
638
+ "ß": 636,
639
+ "đ": 637,
640
+ "И": 638,
641
+ "κ": 639,
642
+ " ": 640,
643
+ "の": 641,
644
+ "ῶ": 642,
645
+ ")": 643,
646
+ "ě": 644,
647
+ "ѕ": 645,
648
+ "î": 646,
649
+ "半": 647,
650
+ "≥": 648,
651
+ "\u0004": 649,
652
+ "⅟": 650,
653
+ "D": 651,
654
+ "‒": 652,
655
+ "’": 653,
656
+ "›": 654,
657
+ "f": 655,
658
+ "©": 656,
659
+ "ꐜ": 657,
660
+ "+а": 658,
661
+ "+е": 659,
662
+ "+ё": 660,
663
+ "+и": 661,
664
+ "+о": 662,
665
+ "+у": 663,
666
+ "+ы": 664,
667
+ "+э": 665,
668
+ "+ю": 666,
669
+ "+я": 667
670
+ }