michelleyunun commited on
Commit
802ec7e
1 Parent(s): 4b4f58c

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +52 -2
tokenizer.json CHANGED
@@ -458,7 +458,32 @@
458
  "mar": 396,
459
  "name": 397,
460
  "prive": 398,
461
- "rs": 399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  },
463
  "merges": [
464
  "a r",
@@ -805,7 +830,32 @@
805
  "m ar",
806
  "n ame",
807
  "p rive",
808
- "r s"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809
  ]
810
  }
811
  }
 
458
  "mar": 396,
459
  "name": 397,
460
  "prive": 398,
461
+ "rs": 399,
462
+ "sit": 400,
463
+ "uprive": 401,
464
+ "was": 402,
465
+ "ĠCAUS": 403,
466
+ "ĠIBM": 404,
467
+ "Ġsle": 405,
468
+ "ĠDen": 406,
469
+ "Ġwate": 407,
470
+ "Ġplac": 408,
471
+ "pers": 409,
472
+ "Ġmai": 410,
473
+ "ach": 411,
474
+ "Ġoff": 412,
475
+ "Ġreach": 413,
476
+ "Ġwhite": 414,
477
+ "mark": 415,
478
+ "upriver": 416,
479
+ "wash": 417,
480
+ "Ġsleep": 418,
481
+ "ĠDenmark": 419,
482
+ "Ġwater": 420,
483
+ "Ġplace": 421,
484
+ "person": 422,
485
+ "Ġmail": 423,
486
+ "Ġoffice": 424
487
  },
488
  "merges": [
489
  "a r",
 
830
  "m ar",
831
  "n ame",
832
  "p rive",
833
+ "r s",
834
+ "s it",
835
+ "u prive",
836
+ "w as",
837
+ "ĠC AUS",
838
+ "ĠI BM",
839
+ "Ġs le",
840
+ "ĠD en",
841
+ "Ġw ate",
842
+ "Ġp lac",
843
+ "pe rs",
844
+ "Ġm ai",
845
+ "ac h",
846
+ "Ġo ff",
847
+ "Ġre ach",
848
+ "Ġwh ite",
849
+ "mar k",
850
+ "uprive r",
851
+ "was h",
852
+ "Ġsle ep",
853
+ "ĠDen mark",
854
+ "Ġwate r",
855
+ "Ġplac e",
856
+ "pers on",
857
+ "Ġmai l",
858
+ "Ġoff ice"
859
  ]
860
  }
861
  }