sumanthd commited on
Commit
f2c149c
1 Parent(s): 57cb11b

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +63 -1
tokenizer.json CHANGED
@@ -952,7 +952,69 @@
952
  "replacement": "▁",
953
  "add_prefix_space": true
954
  },
955
- "post_processor": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
956
  "decoder": {
957
  "type": "Metaspace",
958
  "replacement": "▁",
 
952
  "replacement": "▁",
953
  "add_prefix_space": true
954
  },
955
+ "post_processor": {
956
+ "type": "TemplateProcessing",
957
+ "single": [
958
+ {
959
+ "Sequence": {
960
+ "id": "A",
961
+ "type_id": 0
962
+ }
963
+ },
964
+ {
965
+ "SpecialToken": {
966
+ "id": "</s>",
967
+ "type_id": 0
968
+ }
969
+ }
970
+ ],
971
+ "pair": [
972
+ {
973
+ "Sequence": {
974
+ "id": "A",
975
+ "type_id": 0
976
+ }
977
+ },
978
+ {
979
+ "SpecialToken": {
980
+ "id": "<extra_id_98>",
981
+ "type_id": 0
982
+ }
983
+ },
984
+ {
985
+ "Sequence": {
986
+ "id": "B",
987
+ "type_id": 1
988
+ }
989
+ },
990
+ {
991
+ "SpecialToken": {
992
+ "id": "</s>",
993
+ "type_id": 1
994
+ }
995
+ }
996
+ ],
997
+ "special_tokens": {
998
+ "</s>": {
999
+ "id": "</s>",
1000
+ "ids": [
1001
+ 1
1002
+ ],
1003
+ "tokens": [
1004
+ "</s>"
1005
+ ]
1006
+ },
1007
+ "<extra_id_98>": {
1008
+ "id": "<extra_id_98>",
1009
+ "ids": [
1010
+ 128001
1011
+ ],
1012
+ "tokens": [
1013
+ "<extra_id_98>"
1014
+ ]
1015
+ }
1016
+ }
1017
+ },
1018
  "decoder": {
1019
  "type": "Metaspace",
1020
  "replacement": "▁",