michelleyunun commited on
Commit
4b4f58c
1 Parent(s): c2d0391

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +719 -731
tokenizer.json CHANGED
@@ -62,762 +62,750 @@
62
  "<start>": 0,
63
  "<end>": 1,
64
  "<pad>": 2,
65
- "\"": 3,
66
- "'": 4,
67
- ",": 5,
68
- "-": 6,
69
- ".": 7,
70
  "<": 8,
71
  ">": 9,
72
  "A": 10,
73
  "B": 11,
74
  "C": 12,
75
  "D": 13,
76
- "G": 14,
77
- "H": 15,
78
- "I": 16,
79
- "M": 17,
80
- "N": 18,
81
- "O": 19,
82
- "S": 20,
83
- "a": 21,
84
- "b": 22,
85
- "c": 23,
86
- "d": 24,
87
- "e": 25,
88
- "f": 26,
89
- "g": 27,
90
- "h": 28,
91
- "i": 29,
92
- "j": 30,
93
- "k": 31,
94
- "l": 32,
95
- "m": 33,
96
- "n": 34,
97
- "o": 35,
98
- "p": 36,
99
- "r": 37,
100
- "s": 38,
101
- "t": 39,
102
- "u": 40,
103
- "w": 41,
104
- "x": 42,
105
- "y": 43,
106
- "²": 44,
107
- "Ì": 45,
108
- "Ġ": 46,
109
- "st": 47,
110
- "nd": 48,
111
- "ar": 49,
112
- "end": 50,
113
- "Ġ<": 51,
114
- "star": 52,
115
- "start": 53,
116
- "hl": 54,
117
- "̲": 55,
118
- "wi": 56,
119
- "ii": 57,
120
- "Ġg": 58,
121
- "aa": 59,
122
- "oo": 60,
123
- "Ġn": 61,
124
- "Ġwi": 62,
125
- "Ġ'": 63,
126
- "Ġii": 64,
127
- "an": 65,
128
- "Ġy": 66,
129
- "Ġl": 67,
130
- "Ii": 68,
131
- "ĠIi": 69,
132
- "oohl": 70,
133
- "ee": 71,
134
- "im": 72,
135
- "Ġwil": 73,
136
- "Ġh": 74,
137
- "whl": 75,
138
- "Ġhl": 76,
139
- "ag": 77,
140
- "dii": 78,
141
- "nii": 79,
142
- "ts": 80,
143
- "xwi": 81,
144
- "Ġd": 82,
145
- "Ġha": 83,
146
- "uu": 84,
147
- "Ġnee": 85,
148
- "xs": 86,
149
- "Ġyu": 87,
150
- "Ġa": 88,
151
- "ip": 89,
152
- "kwhl": 90,
153
- "wihl": 91,
154
- "gi": 92,
155
- "Ġk": 93,
156
- "xw": 94,
157
- "'m": 95,
158
- "Ġxs": 96,
159
- "Ġdim": 97,
160
- "Ġneedii": 98,
161
- "igi": 99,
162
- "Ġb": 100,
163
- "Ġligi": 101,
164
- "Ġwili": 102,
165
- "di": 103,
166
- "Ġj": 104,
167
- "Ġp": 105,
168
- "Ġt": 106,
169
- "Ġwihl": 107,
170
- "sxwi": 108,
171
- "Ġs": 109,
172
- "Ġya": 110,
173
- "in": 111,
174
- "Ġhlaa": 112,
175
- "Ġna": 113,
176
- "Ġan": 114,
177
- "ax": 115,
178
- "ay": 116,
179
- "ahl": 117,
180
- "oot": 118,
181
- "ni": 119,
182
- "ol": 120,
183
- "Ġyukwhl": 121,
184
- "Ġnii": 122,
185
- "Ġnaa": 123,
186
- "Ġwilp": 124,
187
- "ipe": 125,
188
- "Ġpipe": 126,
189
- "uxw": 127,
190
- "tshl": 128,
191
- "Ġyatshl": 129,
192
- "ĠS": 130,
193
- "na": 131,
194
- "hli": 132,
195
- "Ġaa": 133,
196
- "Ġneediit": 134,
197
- "Ġ\"": 135,
198
- "̲'": 136,
199
- "il": 137,
200
- "Ġw": 138,
201
- "Ġyee": 139,
202
- "Ġloot": 140,
203
- "at": 141,
204
- "ck": 142,
205
- "hol": 143,
206
- "ka": 144,
207
- "lhl": 145,
208
- "ock": 146,
209
- "tock": 147,
210
- "ya": 148,
211
- "wil": 149,
212
- "Ġgya": 150,
213
- "Ġiin": 151,
214
- "Ġluu": 152,
215
- "uuhl": 153,
216
- "ĠStock": 154,
217
- "holm": 155,
218
- "ĠStockholm": 156,
219
- "ad": 157,
220
- "ls": 158,
221
- "xu": 159,
222
- "Ġts": 160,
223
- "hla": 161,
224
- "Ġwina": 162,
225
- "Ġhlg": 163,
226
- "Ġhahla": 164,
227
- "uut": 165,
228
- "Ġbag": 166,
229
- "ayt": 167,
230
- "Ġwag": 168,
231
- "lsdi": 169,
232
- "as": 170,
233
- "ok": 171,
234
- "Ġhe": 172,
235
- "diit": 173,
236
- "ain": 174,
237
- "wit": 175,
238
- "Ġxsa": 176,
239
- "Ġxsi": 177,
240
- "Ġja": 178,
241
- "nit": 179,
242
- "xhl": 180,
243
- "xwhl": 181,
244
- "iihli": 182,
245
- "Ġgiihli": 183,
246
- "Ġlax": 184,
247
- "ak": 185,
248
- "̲.": 186,
249
- "eek": 187,
250
- "Ġap": 188,
251
- "Ġxseek": 189,
252
- "Ġji": 190,
253
- "Ġaats": 191,
254
- "hahl": 192,
255
- "un": 193,
256
- "waa": 194,
257
- "oos": 195,
258
- "Ġanhahl": 196,
259
- "Ġanhahla": 197,
260
- "ĠA": 198,
261
- "Ġneet": 199,
262
- "Ġam": 200,
263
- "akwhl": 201,
264
- "Ġak": 202,
265
- "--": 203,
266
- "Can": 204,
267
- "Dim": 205,
268
- "bi": 206,
269
- "da": 207,
270
- "fl": 208,
271
- "gwaa": 209,
272
- "isxwi": 210,
273
- "ika": 211,
274
- "ja": 212,
275
- "kst": 213,
276
- "lt": 214,
277
- "lst": 215,
278
- "nag": 216,
279
- "pja": 217,
280
- "rain": 218,
281
- "sii": 219,
282
- "ska": 220,
283
- "sgwaa": 221,
284
- "upja": 222,
285
- "yt": 223,
286
- "Ġag": 224,
287
- "ĠCan": 225,
288
- "Ġfl": 226,
289
- "Ġisxwi": 227,
290
- "Ġupja": 228,
291
- "ndoos": 229,
292
- "Ġgi": 230,
293
- "Ġgwil": 231,
294
- "Ġguuhl": 232,
295
- "aahli": 233,
296
- "oodi": 234,
297
- "Ġno": 235,
298
- "anhl": 236,
299
- "anwil": 237,
300
- "anuut": 238,
301
- "anska": 239,
302
- "Ġlip": 240,
303
- "imil": 241,
304
- "niig": 242,
305
- "niisgwaa": 243,
306
- "Ġyuwi": 244,
307
- "Ġandoos": 245,
308
- "gihl": 246,
309
- "Ġky": 247,
310
- "dilhl": 248,
311
- "Ġpol": 249,
312
- "Ġtun": 250,
313
- "Ġtrain": 251,
314
- "Ġsgihl": 252,
315
- "Ġsdilhl": 253,
316
- "Ġyalt": 254,
317
- "insxwi": 255,
318
- "Ġnakst": 256,
319
- "Ġant": 257,
320
- "Ġansii": 258,
321
- "ayoo": 259,
322
- "uxwt": 260,
323
- "Ġaam": 261,
324
- "adanska": 262,
325
- "Ġhlgu": 263,
326
- "Ġxsawi": 264,
327
- "Ġjabi": 265,
328
- "nagwit": 266,
329
- "Ġagwihl": 267,
330
- "ĠCanadanska": 268,
331
- "Ġflika": 269,
332
- "Ġgwila": 270,
333
- "aahlihl": 271,
334
- "anwilat": 272,
335
- "anuutxw": 273,
336
- "Ġandoosda": 274,
337
- "Ġpole": 275,
338
- "Ġyaltxu": 276,
339
- "Ġansiip": 277,
340
- "Hl": 278,
341
- "Nii": 279,
342
- "Oo": 280,
343
- "nim": 281,
344
- "wahl": 282,
345
- "yhl": 283,
346
- "ĠHl": 284,
347
- "ĠNii": 285,
348
- "wii": 286,
349
- "Ġguxw": 287,
350
- "Ġguut": 288,
351
- "aaxhl": 289,
352
- "Ġyuxwhl": 290,
353
- "Ġkw": 291,
354
- "Ġbas": 292,
355
- "inhl": 293,
356
- "ootxwi": 294,
357
- "nisxwi": 295,
358
- "uxwsxwi": 296,
359
- "ilx": 297,
360
- "adaaxhl": 298,
361
- "Ġhlguxwsxwi": 299,
362
- "Ġbagu": 300,
363
- "asinhl": 301,
364
- "Ġamhl": 302,
365
- "ĠHlaa": 303,
366
- "Ġguxws": 304,
367
- "Ġbasax": 305,
368
- ".\"": 306,
369
- "daa": 307,
370
- "ix": 308,
371
- "idaa": 309,
372
- "loohl": 310,
373
- "phl": 311,
374
- "pain": 312,
375
- "sx": 313,
376
- "yim": 314,
377
- "Ġxhl": 315,
378
- "aahl": 316,
379
- "aasx": 317,
380
- "aayim": 318,
381
- "ook": 319,
382
- "Ġhlag": 320,
383
- "Ġhlidaa": 321,
384
- "xwit": 322,
385
- "Ġdok": 323,
386
- "Ġdaayim": 324,
387
- "Ġyuxw": 325,
388
- "Ġaloohl": 326,
389
- "Ġbax": 327,
390
- "Ġbaasx": 328,
391
- "Ġligit": 329,
392
- "Ġjok": 330,
393
- "Ġsg": 331,
394
- "Ġsi": 332,
395
- "ĠSpain": 333,
396
- "nakwhl": 334,
397
- "Ġhehl": 335,
398
- "Ġhediit": 336,
399
- "diithl": 337,
400
- "witxwit": 338,
401
- "Ġjaphl": 339,
402
- "nithl": 340,
403
- "ytxwhl": 341,
404
- "Ġxhlii": 342,
405
- "Ġdaayimaahl": 343,
406
- "Ġyuxwdiithl": 344,
407
- "Ġbaasxi": 345,
408
- "Nakwhl": 346,
409
- "gwi": 347,
410
- "ukwhl": 348,
411
- "yukwhl": 349,
412
- "ĠAk": 350,
413
- "ĠAgwi": 351,
414
- "ĠAgwiyukwhl": 352,
415
- "BM": 353,
416
- "De": 354,
417
- "Gi": 355,
418
- "IBM": 356,
419
- "aw": 357,
420
- "ail": 358,
421
- "ce": 359,
422
- "ff": 360,
423
- "gee": 361,
424
- "it": 362,
425
- "iwaa": 363,
426
- "ice": 364,
427
- "jit": 365,
428
- "ljit": 366,
429
- "mar": 367,
430
- "mail": 368,
431
- "nmar": 369,
432
- "oxs": 370,
433
- "off": 371,
434
- "si": 372,
435
- "wan": 373,
436
- "way": 374,
437
- "yo": 375,
438
- "ĠDe": 376,
439
- "ĠGi": 377,
440
- "ĠIBM": 378,
441
- "Ġmail": 379,
442
- "Ġoff": 380,
443
- "niiwan": 381,
444
- "niiyo": 382,
445
- "xsiwaa": 383,
446
- "Ġsaw": 384,
447
- "nix": 385,
448
- "Ġwok": 386,
449
- "atdiit": 387,
450
- "̲.\"": 388,
451
- "oosun": 389,
452
- "ĠAp": 390,
453
- "Ġamxsiwaa": 391,
454
- "Ġaks": 392,
455
- "geenix": 393,
456
- "nmark": 394,
457
- "oxsxw": 395,
458
- "wayi": 396,
459
- "ĠDenmark": 397,
460
- "ĠGigeenix": 398,
461
- "Ġoffice": 399,
462
- "Ġsawatdiit": 400,
463
- "ytxw": 401
464
  },
465
  "merges": [
466
- "s t",
467
- "n d",
468
  "a r",
 
 
 
469
  "e nd",
470
  "Ġ <",
471
- "st ar",
472
- "star t",
473
- "h l",
474
- "Ì ²",
475
- "w i",
476
- "i i",
 
 
 
 
 
 
 
 
 
 
477
  "Ġ g",
478
- "a a",
479
- "o o",
480
- "Ġ n",
481
- "Ġ wi",
482
- "Ġ '",
483
- "Ġ ii",
484
- "a n",
485
- "Ġ y",
486
- "Ġ l",
487
- "I i",
488
- "Ġ Ii",
489
- "oo hl",
490
- "e e",
491
- "i m",
492
- "Ġwi l",
493
- "Ġ h",
494
- "w hl",
495
- "Ġ hl",
496
- "a g",
497
- "d ii",
498
- "n ii",
499
- "t s",
500
- "x wi",
501
- "Ġ d",
502
- "Ġh a",
503
- "u u",
504
- "Ġn ee",
505
- "x s",
506
- "Ġy u",
507
- "Ġ a",
508
- "i p",
509
- "k whl",
510
- "wi hl",
511
- "g i",
512
- "Ġ k",
513
- "x w",
514
- "' m",
515
- "Ġ xs",
516
- "Ġd im",
517
- "Ġnee dii",
518
- "i gi",
519
- "Ġ b",
520
- "Ġl igi",
521
- "Ġwil i",
522
- "d i",
523
- "Ġ j",
524
- "Ġ p",
525
- "Ġ t",
526
- "Ġwi hl",
527
- "s xwi",
528
  "Ġ s",
529
- "Ġy a",
530
  "i n",
531
- "Ġhl aa",
532
- "Ġn a",
533
- an",
534
- "a x",
535
- "a y",
536
- "a hl",
537
- "oo t",
538
- "n i",
539
- "o l",
540
- "Ġyu kwhl",
541
- "Ġn ii",
542
- "Ġn aa",
543
- "Ġwil p",
544
- "ip e",
545
- "Ġp ipe",
546
- "u xw",
547
- "ts hl",
548
- "Ġya tshl",
549
- "Ġ S",
550
- "n a",
551
- "hl i",
552
- "Ġ aa",
553
- "Ġneedii t",
554
- "Ġ \"",
555
- "̲ '",
556
- "i l",
557
  "Ġ w",
558
- "Ġy ee",
559
- "Ġl oot",
560
- "a t",
 
 
 
 
 
 
 
 
 
 
561
  "c k",
562
- "h ol",
563
- "k a",
564
- "l hl",
565
- "o ck",
566
- "t ock",
567
- "y a",
568
- "wi l",
569
- "Ġg ya",
570
- "Ġii n",
571
- "Ġl uu",
572
- "uu hl",
573
- "ĠS tock",
574
- "hol m",
575
- "ĠStock holm",
576
- "a d",
577
- "l s",
578
- "x u",
579
- "Ġ ts",
580
- "hl a",
581
- "Ġwi na",
582
- "Ġhl g",
583
- "Ġha hla",
584
- "uu t",
585
- "Ġb ag",
586
- "ay t",
587
- "Ġw ag",
588
- "ls di",
589
- "a s",
590
- "o k",
591
- "Ġh e",
592
- "dii t",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
  "a in",
594
- "wi t",
595
- "Ġxs a",
596
- "Ġxs i",
597
- "Ġj a",
598
- "ni t",
599
- "x hl",
600
- "x whl",
601
- "ii hli",
602
- "Ġg iihli",
603
- "Ġl ax",
604
- "a k",
605
- "̲ .",
606
- "ee k",
607
- "Ġa p",
608
- "Ġxs eek",
609
- "Ġj i",
610
- "Ġaa ts",
611
- "h ahl",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
612
  "u n",
613
- "w aa",
614
- "oo s",
615
- "Ġan hahl",
616
- "Ġanhahl a",
617
- "Ġ A",
618
- "Ġnee t",
619
- "Ġa m",
620
- "a kwhl",
621
- "Ġa k",
622
- "- -",
623
- "C an",
624
- "D im",
625
- "b i",
626
- "d a",
627
- "f l",
628
- "g waa",
629
- "i sxwi",
630
- "i ka",
631
- "j a",
632
- "k st",
633
- "l t",
634
- "l st",
635
- "n ag",
636
- "p ja",
637
- "r ain",
638
- "s ii",
639
- "s ka",
640
- "s gwaa",
641
- "u pja",
642
- "y t",
643
- "Ġ ag",
644
- "Ġ Can",
645
- "Ġ fl",
646
- "Ġ isxwi",
647
- "Ġ upja",
648
- "nd oos",
649
- "Ġg i",
650
- "Ġg wil",
651
- "Ġg uuhl",
652
- "aa hli",
653
- "oo di",
654
- "Ġn o",
655
- "an hl",
656
- "an wil",
657
- "an uut",
658
- "an ska",
659
- "Ġl ip",
660
- "im il",
661
- "nii g",
662
- "nii sgwaa",
663
- "Ġyu wi",
664
- "Ġa ndoos",
665
- "gi hl",
666
- "Ġk y",
667
- "di lhl",
668
- "Ġp ol",
669
- "Ġt un",
670
- "Ġt rain",
671
- "Ġs gihl",
672
- "Ġs dilhl",
673
- "Ġya lt",
674
- "in sxwi",
675
- "Ġna kst",
676
- "Ġan t",
677
- "Ġan sii",
678
- "ay oo",
679
- "uxw t",
680
- "Ġaa m",
681
- "ad anska",
682
- "Ġhlg u",
683
- "Ġxsa wi",
684
- "Ġja bi",
685
- "nag wit",
686
- "Ġag wihl",
687
- "ĠCan adanska",
688
- "Ġfl ika",
689
- "Ġgwil a",
690
- "aahli hl",
691
- "anwil at",
692
- "anuut xw",
693
- "Ġandoos da",
694
- "Ġpol e",
695
- "Ġyalt xu",
696
- "Ġansii p",
697
- "H l",
698
- "N ii",
699
- "O o",
700
- "n im",
701
- "w ahl",
702
- "y hl",
703
- "Ġ Hl",
704
- "Ġ Nii",
705
- "wi i",
706
- "Ġg uxw",
707
- "Ġg uut",
708
- "aa xhl",
709
- "Ġyu xwhl",
710
- "Ġk w",
711
- "Ġb as",
712
- "in hl",
713
- "oot xwi",
714
- "ni sxwi",
715
- "uxw sxwi",
716
- "il x",
717
- "ad aaxhl",
718
- "Ġhlg uxwsxwi",
719
- "Ġbag u",
720
- "as inhl",
721
- "Ġam hl",
722
- "ĠHl aa",
723
- "Ġguxw s",
724
- "Ġbas ax",
725
- ". \"",
726
- "d aa",
727
- "i x",
728
- "i daa",
729
- "l oohl",
730
- "p hl",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
731
  "p ain",
732
- "s x",
733
- "y im",
734
- "Ġ xhl",
735
- "aa hl",
736
- "aa sx",
737
- "aa yim",
738
- "oo k",
739
- "Ġhl ag",
740
- "Ġhl idaa",
741
- "xwi t",
742
- "Ġd ok",
743
- "Ġd aayim",
744
- "Ġyu xw",
745
- "Ġa loohl",
746
- "Ġb ax",
747
- "Ġb aasx",
748
- "Ġligi t",
749
- "Ġj ok",
750
- "Ġs g",
751
- "Ġs i",
752
  "ĠS pain",
753
- "na kwhl",
754
- "Ġhe hl",
755
- "Ġhe diit",
756
- "diit hl",
757
- "wit xwit",
758
- "Ġja phl",
759
- "nit hl",
760
- "yt xwhl",
761
- "Ġxhl ii",
762
- "Ġdaayim aahl",
763
- "Ġyuxw diithl",
764
- "Ġbaasx i",
765
- "N akwhl",
766
- "g wi",
767
- "u kwhl",
768
- "y ukwhl",
769
- "ĠA k",
770
- "ĠA gwi",
771
- "ĠAgwi yukwhl",
772
  "B M",
773
- "D e",
774
- "G i",
775
- "I BM",
776
- "a w",
777
- "a il",
778
  "c e",
 
 
779
  "f f",
780
- "g ee",
781
- "i t",
782
- "i waa",
783
  "i ce",
784
- "j it",
785
- "l jit",
786
  "m ar",
787
- "m ail",
788
- "n mar",
789
- "o xs",
790
- "o ff",
791
- "s i",
792
- "w an",
793
- "w ay",
794
- "y o",
795
- "Ġ De",
796
- "Ġ Gi",
797
- "Ġ IBM",
798
- "Ġ mail",
799
- "Ġ off",
800
- "nii wan",
801
- "nii yo",
802
- "xs iwaa",
803
- "Ġs aw",
804
- "ni x",
805
- "Ġw ok",
806
- "at diit",
807
- "̲. \"",
808
- "oos un",
809
- "ĠA p",
810
- "Ġam xsiwaa",
811
- "Ġak s",
812
- "gee nix",
813
- "nmar k",
814
- "oxs xw",
815
- "way i",
816
- "ĠDe nmark",
817
- "ĠGi geenix",
818
- "Ġoff ice",
819
- "Ġsaw atdiit",
820
- "yt xw"
821
  ]
822
  }
823
  }
 
62
  "<start>": 0,
63
  "<end>": 1,
64
  "<pad>": 2,
65
+ "-": 3,
66
+ ".": 4,
67
+ "1": 5,
68
+ "2": 6,
69
+ "3": 7,
70
  "<": 8,
71
  ">": 9,
72
  "A": 10,
73
  "B": 11,
74
  "C": 12,
75
  "D": 13,
76
+ "E": 14,
77
+ "F": 15,
78
+ "G": 16,
79
+ "I": 17,
80
+ "J": 18,
81
+ "L": 19,
82
+ "M": 20,
83
+ "N": 21,
84
+ "O": 22,
85
+ "P": 23,
86
+ "R": 24,
87
+ "S": 25,
88
+ "T": 26,
89
+ "U": 27,
90
+ "V": 28,
91
+ "W": 29,
92
+ "X": 30,
93
+ "Z": 31,
94
+ "a": 32,
95
+ "b": 33,
96
+ "c": 34,
97
+ "d": 35,
98
+ "e": 36,
99
+ "f": 37,
100
+ "g": 38,
101
+ "h": 39,
102
+ "i": 40,
103
+ "k": 41,
104
+ "l": 42,
105
+ "m": 43,
106
+ "n": 44,
107
+ "o": 45,
108
+ "p": 46,
109
+ "r": 47,
110
+ "s": 48,
111
+ "t": 49,
112
+ "u": 50,
113
+ "v": 51,
114
+ "w": 52,
115
+ "y": 53,
116
+ "Ġ": 54,
117
+ "ar": 55,
118
+ "nd": 56,
119
+ "st": 57,
120
+ "art": 58,
121
+ "end": 59,
122
+ "Ġ<": 60,
123
+ "start": 61,
124
+ "CN": 62,
125
+ "II": 63,
126
+ "ĠC": 64,
127
+ "CNJ": 65,
128
+ "ĠCCNJ": 66,
129
+ "SG": 67,
130
+ "ĠL": 68,
131
+ "OC": 69,
132
+ "ou": 70,
133
+ "ĠI": 71,
134
+ "on": 72,
135
+ "PL": 73,
136
+ "ĠP": 74,
137
+ "ĠLOC": 75,
138
+ "ho": 76,
139
+ "Ġg": 77,
140
+ "Ġs": 78,
141
+ "in": 79,
142
+ "AS": 80,
143
+ "Ġgo": 81,
144
+ "ĠN": 82,
145
+ "ĠD": 83,
146
+ "Ġw": 84,
147
+ "ĠPR": 85,
148
+ "Ġt": 86,
149
+ "ĠIN": 87,
150
+ "Ġ1": 88,
151
+ "Ġp": 89,
152
+ "SP": 90,
153
+ "MP": 91,
154
+ "OMP": 92,
155
+ "ĠCOMP": 93,
156
+ "Ġh": 94,
157
+ "TR": 95,
158
+ "EP": 96,
159
+ "le": 97,
160
+ "ck": 98,
161
+ "Ġl": 99,
162
+ "OSP": 100,
163
+ "ĠPROSP": 101,
164
+ "ke": 102,
165
+ "ll": 103,
166
+ "se": 104,
167
+ "ID": 105,
168
+ "re": 106,
169
+ "Ġar": 107,
170
+ "VB": 108,
171
+ "ĠLVB": 109,
172
+ "ay": 110,
173
+ "Ġin": 111,
174
+ "out": 112,
175
+ "CCNJ": 113,
176
+ "or": 114,
177
+ "CEP": 115,
178
+ "ĠINCEP": 116,
179
+ "EG": 117,
180
+ "FOC": 118,
181
+ "te": 119,
182
+ "ĠNEG": 120,
183
+ "ake": 121,
184
+ "ound": 122,
185
+ "Ġaround": 123,
186
+ "WID": 124,
187
+ "Ġf": 125,
188
+ "ĠDWID": 126,
189
+ "Ġon": 127,
190
+ "PAS": 128,
191
+ "me": 129,
192
+ "PASS": 130,
193
+ "an": 131,
194
+ "ee": 132,
195
+ "pe": 133,
196
+ "EAS": 134,
197
+ "REAS": 135,
198
+ "Ġb": 136,
199
+ "ĠREAS": 137,
200
+ "ouse": 138,
201
+ "Ġpi": 139,
202
+ "ĠS": 140,
203
+ "co": 141,
204
+ "LZ": 142,
205
+ "MLZ": 143,
206
+ "ĠNMLZ": 144,
207
+ "ork": 145,
208
+ "it": 146,
209
+ "Ġm": 147,
210
+ "BL": 148,
211
+ "FV": 149,
212
+ "OBL": 150,
213
+ "PFV": 151,
214
+ "ĠIPFV": 152,
215
+ "AN": 153,
216
+ "mp": 154,
217
+ "to": 155,
218
+ "Ġwho": 156,
219
+ "Ġhouse": 157,
220
+ "Ġpipe": 158,
221
+ "comp": 159,
222
+ "Ġhit": 160,
223
+ "ac": 161,
224
+ "ain": 162,
225
+ "all": 163,
226
+ "Ġtr": 164,
227
+ "ĠINS": 165,
228
+ "ear": 166,
229
+ "ow": 167,
230
+ "oll": 168,
231
+ "Ġtake": 169,
232
+ "Ġfoll": 170,
233
+ "Ġfollow": 171,
234
+ "RR": 172,
235
+ "Ġout": 173,
236
+ "AU": 174,
237
+ "ĠOBL": 175,
238
+ "AUS": 176,
239
+ "ri": 177,
240
+ "ong": 178,
241
+ "SX": 179,
242
+ "ack": 180,
243
+ "lm": 181,
244
+ "ve": 182,
245
+ "Ġo": 183,
246
+ "holm": 184,
247
+ "Ġsee": 185,
248
+ "ckholm": 186,
249
+ "ĠSto": 187,
250
+ "ĠStockholm": 188,
251
+ "CAUS": 189,
252
+ "IP": 190,
253
+ "TIP": 191,
254
+ "ly": 192,
255
+ "od": 193,
256
+ "par": 194,
257
+ "Ġcomp": 195,
258
+ "Ġgood": 196,
259
+ "Ġwork": 197,
260
+ "Ġth": 198,
261
+ "lete": 199,
262
+ "ANTIP": 200,
263
+ "Ġcomplete": 201,
264
+ "Ġcompletely": 202,
265
+ "ER": 203,
266
+ "VER": 204,
267
+ "ĠVER": 205,
268
+ "Ġsay": 206,
269
+ "DM": 207,
270
+ "IS": 208,
271
+ "he": 209,
272
+ "ir": 210,
273
+ "rn": 211,
274
+ "rt": 212,
275
+ "tu": 213,
276
+ "Ġ3": 214,
277
+ "III": 215,
278
+ "hort": 216,
279
+ "Ġshort": 217,
280
+ "Ġli": 218,
281
+ "Ġmake": 219,
282
+ "Ġtry": 220,
283
+ "turn": 221,
284
+ "ab": 222,
285
+ "un": 223,
286
+ "Ġlay": 224,
287
+ "able": 225,
288
+ "AR": 226,
289
+ "ca": 227,
290
+ "do": 228,
291
+ "way": 229,
292
+ "Ġdo": 230,
293
+ "ĠIRR": 231,
294
+ "ĠPAR": 232,
295
+ "SPT": 233,
296
+ "Ġdoor": 234,
297
+ "ĠPART": 235,
298
+ "DE": 236,
299
+ "OX": 237,
300
+ "PR": 238,
301
+ "work": 239,
302
+ "Ġre": 240,
303
+ "home": 241,
304
+ "Ġwh": 242,
305
+ "DEM": 243,
306
+ "PROX": 244,
307
+ "ate": 245,
308
+ "no": 246,
309
+ "so": 247,
310
+ "Ġno": 248,
311
+ "rive": 249,
312
+ "Ġnot": 250,
313
+ "Ġlong": 251,
314
+ "Ġlack": 252,
315
+ "PN": 253,
316
+ "AX": 254,
317
+ "EL": 255,
318
+ "EV": 256,
319
+ "IRR": 257,
320
+ "MAN": 258,
321
+ "ad": 259,
322
+ "ag": 260,
323
+ "at": 261,
324
+ "con": 262,
325
+ "de": 263,
326
+ "ike": 264,
327
+ "ian": 265,
328
+ "ide": 266,
329
+ "long": 267,
330
+ "like": 268,
331
+ "mall": 269,
332
+ "os": 270,
333
+ "ole": 271,
334
+ "pre": 272,
335
+ "pouse": 273,
336
+ "ros": 274,
337
+ "side": 275,
338
+ "tin": 276,
339
+ "uall": 277,
340
+ "year": 278,
341
+ "ĠCN": 279,
342
+ "Ġho": 280,
343
+ "Ġac": 281,
344
+ "ĠAX": 282,
345
+ "Ġcon": 283,
346
+ "Ġyear": 284,
347
+ "ĠCan": 285,
348
+ "ĠPCNJ": 286,
349
+ "Ġgir": 287,
350
+ "Ġsmall": 288,
351
+ "Ġspouse": 289,
352
+ "ĠDM": 290,
353
+ "ĠDIS": 291,
354
+ "Ġwee": 292,
355
+ "Ġpee": 293,
356
+ "Ġpole": 294,
357
+ "ree": 295,
358
+ "rest": 296,
359
+ "Ġinside": 297,
360
+ "any": 298,
361
+ "Ġpick": 299,
362
+ "ĠSEL": 300,
363
+ "Ġman": 301,
364
+ "company": 302,
365
+ "acros": 303,
366
+ "Ġtrain": 304,
367
+ "pare": 305,
368
+ "Ġthree": 306,
369
+ "heart": 307,
370
+ "Ġlie": 308,
371
+ "case": 309,
372
+ "Ġreturn": 310,
373
+ "Ġwhat": 311,
374
+ "EVID": 312,
375
+ "MANR": 313,
376
+ "adian": 314,
377
+ "again": 315,
378
+ "prepare": 316,
379
+ "tinuall": 317,
380
+ "ĠCNTR": 318,
381
+ "Ġhole": 319,
382
+ "Ġaccompany": 320,
383
+ "Ġcontinuall": 321,
384
+ "ĠCanadian": 322,
385
+ "Ġgirl": 323,
386
+ "ĠDISTR": 324,
387
+ "Ġweek": 325,
388
+ "ĠSELF": 326,
389
+ "across": 327,
390
+ "Ġcontinually": 328,
391
+ "ES": 329,
392
+ "ak": 330,
393
+ "eri": 331,
394
+ "epar": 332,
395
+ "gh": 333,
396
+ "ig": 334,
397
+ "ind": 335,
398
+ "Ġun": 336,
399
+ "ough": 337,
400
+ "Ġsepar": 338,
401
+ "ĠDES": 339,
402
+ "Ġperi": 340,
403
+ "Ġhear": 341,
404
+ "reak": 342,
405
+ "Ġarrive": 343,
406
+ "ter": 344,
407
+ "Ġfind": 345,
408
+ "Ġone": 346,
409
+ "meter": 347,
410
+ "Ġback": 348,
411
+ "Ġbig": 349,
412
+ "Ġbreak": 350,
413
+ "Ġoh": 351,
414
+ "Ġthough": 352,
415
+ "Ġunable": 353,
416
+ "Ġseparate": 354,
417
+ "Ġperimeter": 355,
418
+ "Ġthought": 356,
419
+ "fir": 357,
420
+ "ime": 358,
421
+ "lo": 359,
422
+ "pain": 360,
423
+ "run": 361,
424
+ "the": 362,
425
+ "time": 363,
426
+ "Ġco": 364,
427
+ "Ġall": 365,
428
+ "ĠSPT": 366,
429
+ "Ġrun": 367,
430
+ "ĠPREP": 368,
431
+ "EPIS": 369,
432
+ "Ġfear": 370,
433
+ "pen": 371,
434
+ "Ġblo": 372,
435
+ "ĠSpain": 373,
436
+ "Ġopen": 374,
437
+ "Ġlive": 375,
438
+ "first": 376,
439
+ "Ġcome": 377,
440
+ "Ġblock": 378,
441
+ "eca": 379,
442
+ "use": 380,
443
+ "Ġbeca": 381,
444
+ "Ġbecause": 382,
445
+ "AL": 383,
446
+ "BM": 384,
447
+ "VAL": 385,
448
+ "ai": 386,
449
+ "as": 387,
450
+ "ame": 388,
451
+ "ce": 389,
452
+ "en": 390,
453
+ "ep": 391,
454
+ "ff": 392,
455
+ "ite": 393,
456
+ "ice": 394,
457
+ "lac": 395,
458
+ "mar": 396,
459
+ "name": 397,
460
+ "prive": 398,
461
+ "rs": 399
 
 
462
  },
463
  "merges": [
 
 
464
  "a r",
465
+ "n d",
466
+ "s t",
467
+ "ar t",
468
  "e nd",
469
  "Ġ <",
470
+ "st art",
471
+ "C N",
472
+ "I I",
473
+ "Ġ C",
474
+ "CN J",
475
+ "ĠC CNJ",
476
+ "S G",
477
+ "Ġ L",
478
+ "O C",
479
+ "o u",
480
+ "Ġ I",
481
+ "o n",
482
+ "P L",
483
+ "Ġ P",
484
+ "ĠL OC",
485
+ "h o",
486
  "Ġ g",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  "Ġ s",
 
488
  "i n",
489
+ "A S",
490
+ "Ġg o",
491
+ N",
492
+ "Ġ D",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  "Ġ w",
494
+ "ĠP R",
495
+ "Ġ t",
496
+ "ĠI N",
497
+ "Ġ 1",
498
+ "Ġ p",
499
+ "S P",
500
+ "M P",
501
+ "O MP",
502
+ "ĠC OMP",
503
+ "Ġ h",
504
+ "T R",
505
+ "E P",
506
+ "l e",
507
  "c k",
508
+ "Ġ l",
509
+ "O SP",
510
+ "ĠPR OSP",
511
+ "k e",
512
+ "l l",
513
+ "s e",
514
+ "I D",
515
+ "r e",
516
+ "Ġ ar",
517
+ "V B",
518
+ "ĠL VB",
519
+ "a y",
520
+ "Ġ in",
521
+ "ou t",
522
+ "C CNJ",
523
+ "o r",
524
+ "C EP",
525
+ "ĠIN CEP",
526
+ "E G",
527
+ "F OC",
528
+ "t e",
529
+ "ĠN EG",
530
+ "a ke",
531
+ "ou nd",
532
+ "Ġar ound",
533
+ "W ID",
534
+ "Ġ f",
535
+ "ĠD WID",
536
+ "Ġ on",
537
+ "P AS",
538
+ "m e",
539
+ "PAS S",
540
+ "a n",
541
+ "e e",
542
+ "p e",
543
+ "E AS",
544
+ "R EAS",
545
+ "Ġ b",
546
+ "Ġ REAS",
547
+ "ou se",
548
+ "Ġp i",
549
+ "Ġ S",
550
+ "c o",
551
+ "L Z",
552
+ "M LZ",
553
+ "ĠN MLZ",
554
+ "or k",
555
+ "i t",
556
+ "Ġ m",
557
+ "B L",
558
+ "F V",
559
+ "O BL",
560
+ "P FV",
561
+ "ĠI PFV",
562
+ "A N",
563
+ "m p",
564
+ "t o",
565
+ "Ġw ho",
566
+ "Ġh ouse",
567
+ "Ġpi pe",
568
+ "co mp",
569
+ "Ġh it",
570
+ "a c",
571
  "a in",
572
+ "a ll",
573
+ "Ġt r",
574
+ "ĠIN S",
575
+ "e ar",
576
+ "o w",
577
+ "o ll",
578
+ "Ġt ake",
579
+ "Ġf oll",
580
+ "Ġfoll ow",
581
+ "R R",
582
+ "Ġ out",
583
+ "A U",
584
+ "Ġ OBL",
585
+ "AU S",
586
+ "r i",
587
+ "on g",
588
+ "S X",
589
+ "a ck",
590
+ "l m",
591
+ "v e",
592
+ "Ġ o",
593
+ "ho lm",
594
+ "Ġs ee",
595
+ "ck holm",
596
+ "ĠS to",
597
+ "ĠSto ckholm",
598
+ "C AUS",
599
+ "I P",
600
+ "T IP",
601
+ "l y",
602
+ "o d",
603
+ "p ar",
604
+ "Ġ comp",
605
+ "Ġgo od",
606
+ "Ġw ork",
607
+ "Ġt h",
608
+ "le te",
609
+ "AN TIP",
610
+ "Ġcomp lete",
611
+ "Ġcomplete ly",
612
+ "E R",
613
+ "V ER",
614
+ "Ġ VER",
615
+ "Ġs ay",
616
+ "D M",
617
+ "I S",
618
+ "h e",
619
+ "i r",
620
+ "r n",
621
+ "r t",
622
+ "t u",
623
+ "Ġ 3",
624
+ "II I",
625
+ "ho rt",
626
+ "Ġs hort",
627
+ "Ġl i",
628
+ "Ġm ake",
629
+ "Ġtr y",
630
+ "tu rn",
631
+ "a b",
632
  "u n",
633
+ "Ġl ay",
634
+ "ab le",
635
+ "A R",
636
+ "c a",
637
+ "d o",
638
+ "w ay",
639
+ "Ġ do",
640
+ "ĠI RR",
641
+ "ĠP AR",
642
+ "SP T",
643
+ "Ġdo or",
644
+ "ĠPAR T",
645
+ "D E",
646
+ "O X",
647
+ "P R",
648
+ "w ork",
649
+ "Ġ re",
650
+ "ho me",
651
+ "Ġw h",
652
+ "DE M",
653
+ "PR OX",
654
+ "a te",
655
+ "n o",
656
+ "s o",
657
+ "Ġ no",
658
+ "ri ve",
659
+ "Ġno t",
660
+ "Ġl ong",
661
+ "Ġl ack",
662
+ "P N",
663
+ "A X",
664
+ "E L",
665
+ "E V",
666
+ "I RR",
667
+ "M AN",
668
+ "a d",
669
+ "a g",
670
+ "a t",
671
+ "c on",
672
+ "d e",
673
+ "i ke",
674
+ "i an",
675
+ "i de",
676
+ "l ong",
677
+ "l ike",
678
+ "m all",
679
+ "o s",
680
+ "o le",
681
+ "p re",
682
+ "p ouse",
683
+ "r os",
684
+ "s ide",
685
+ "t in",
686
+ "u all",
687
+ "y ear",
688
+ "Ġ CN",
689
+ "Ġ ho",
690
+ "Ġ ac",
691
+ "Ġ AX",
692
+ "Ġ con",
693
+ "Ġ year",
694
+ "ĠC an",
695
+ "ĠP CNJ",
696
+ "Ġg ir",
697
+ "Ġs mall",
698
+ "Ġs pouse",
699
+ "ĠD M",
700
+ "ĠD IS",
701
+ "Ġw ee",
702
+ "Ġp ee",
703
+ "Ġp ole",
704
+ "re e",
705
+ "re st",
706
+ "Ġin side",
707
+ "an y",
708
+ "Ġpi ck",
709
+ "ĠS EL",
710
+ "Ġm an",
711
+ "comp any",
712
+ "ac ros",
713
+ "Ġtr ain",
714
+ "par e",
715
+ "Ġth ree",
716
+ "he art",
717
+ "Ġli e",
718
+ "ca se",
719
+ "Ġre turn",
720
+ "Ġwh at",
721
+ "EV ID",
722
+ "MAN R",
723
+ "ad ian",
724
+ "ag ain",
725
+ "pre pare",
726
+ "tin uall",
727
+ "ĠCN TR",
728
+ "Ġho le",
729
+ "Ġac company",
730
+ "Ġcon tinuall",
731
+ "ĠCan adian",
732
+ "Ġgir l",
733
+ "ĠDIS TR",
734
+ "Ġwee k",
735
+ "ĠSEL F",
736
+ "acros s",
737
+ "Ġcontinuall y",
738
+ "E S",
739
+ "a k",
740
+ "e ri",
741
+ "e par",
742
+ "g h",
743
+ "i g",
744
+ "i nd",
745
+ "Ġ un",
746
+ "ou gh",
747
+ "Ġs epar",
748
+ "ĠD ES",
749
+ "Ġp eri",
750
+ "Ġh ear",
751
+ "re ak",
752
+ "Ġar rive",
753
+ "te r",
754
+ "Ġf ind",
755
+ "Ġon e",
756
+ "me ter",
757
+ "Ġb ack",
758
+ "Ġb ig",
759
+ "Ġb reak",
760
+ "Ġo h",
761
+ "Ġth ough",
762
+ "Ġun able",
763
+ "Ġsepar ate",
764
+ "Ġperi meter",
765
+ "Ġthough t",
766
+ "f ir",
767
+ "i me",
768
+ "l o",
769
  "p ain",
770
+ "r un",
771
+ "t he",
772
+ "t ime",
773
+ "Ġ co",
774
+ "Ġ all",
775
+ "Ġ SPT",
776
+ "Ġ run",
777
+ "ĠPR EP",
778
+ "EP IS",
779
+ "Ġf ear",
780
+ "pe n",
781
+ "Ġb lo",
 
 
 
 
 
 
 
 
782
  "ĠS pain",
783
+ "Ġo pen",
784
+ "Ġli ve",
785
+ "fir st",
786
+ "Ġco me",
787
+ "Ġblo ck",
788
+ "e ca",
789
+ "u se",
790
+ "Ġb eca",
791
+ "Ġbeca use",
792
+ "A L",
 
 
 
 
 
 
 
 
 
793
  "B M",
794
+ "V AL",
795
+ "a i",
796
+ "a s",
797
+ "a me",
 
798
  "c e",
799
+ "e n",
800
+ "e p",
801
  "f f",
802
+ "i te",
 
 
803
  "i ce",
804
+ "l ac",
 
805
  "m ar",
806
+ "n ame",
807
+ "p rive",
808
+ "r s"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809
  ]
810
  }
811
  }