Upload tokenizer
Browse files- tokenizer.json +63 -1
tokenizer.json
CHANGED
@@ -952,7 +952,69 @@
|
|
952 |
"replacement": "▁",
|
953 |
"add_prefix_space": true
|
954 |
},
|
955 |
-
"post_processor":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
956 |
"decoder": {
|
957 |
"type": "Metaspace",
|
958 |
"replacement": "▁",
|
|
|
952 |
"replacement": "▁",
|
953 |
"add_prefix_space": true
|
954 |
},
|
955 |
+
"post_processor": {
|
956 |
+
"type": "TemplateProcessing",
|
957 |
+
"single": [
|
958 |
+
{
|
959 |
+
"Sequence": {
|
960 |
+
"id": "A",
|
961 |
+
"type_id": 0
|
962 |
+
}
|
963 |
+
},
|
964 |
+
{
|
965 |
+
"SpecialToken": {
|
966 |
+
"id": "</s>",
|
967 |
+
"type_id": 0
|
968 |
+
}
|
969 |
+
}
|
970 |
+
],
|
971 |
+
"pair": [
|
972 |
+
{
|
973 |
+
"Sequence": {
|
974 |
+
"id": "A",
|
975 |
+
"type_id": 0
|
976 |
+
}
|
977 |
+
},
|
978 |
+
{
|
979 |
+
"SpecialToken": {
|
980 |
+
"id": "<extra_id_98>",
|
981 |
+
"type_id": 0
|
982 |
+
}
|
983 |
+
},
|
984 |
+
{
|
985 |
+
"Sequence": {
|
986 |
+
"id": "B",
|
987 |
+
"type_id": 1
|
988 |
+
}
|
989 |
+
},
|
990 |
+
{
|
991 |
+
"SpecialToken": {
|
992 |
+
"id": "</s>",
|
993 |
+
"type_id": 1
|
994 |
+
}
|
995 |
+
}
|
996 |
+
],
|
997 |
+
"special_tokens": {
|
998 |
+
"</s>": {
|
999 |
+
"id": "</s>",
|
1000 |
+
"ids": [
|
1001 |
+
1
|
1002 |
+
],
|
1003 |
+
"tokens": [
|
1004 |
+
"</s>"
|
1005 |
+
]
|
1006 |
+
},
|
1007 |
+
"<extra_id_98>": {
|
1008 |
+
"id": "<extra_id_98>",
|
1009 |
+
"ids": [
|
1010 |
+
128001
|
1011 |
+
],
|
1012 |
+
"tokens": [
|
1013 |
+
"<extra_id_98>"
|
1014 |
+
]
|
1015 |
+
}
|
1016 |
+
}
|
1017 |
+
},
|
1018 |
"decoder": {
|
1019 |
"type": "Metaspace",
|
1020 |
"replacement": "▁",
|