Upload 5 files
#1
by
Xenova
HF staff
- opened
- tokenizer.json +18 -10
tokenizer.json
CHANGED
@@ -51,17 +51,25 @@
|
|
51 |
],
|
52 |
"normalizer": null,
|
53 |
"pre_tokenizer": {
|
54 |
-
"type": "
|
55 |
-
"
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
},
|
|
|
65 |
"decoder": {
|
66 |
"type": "ByteLevel",
|
67 |
"add_prefix_space": true,
|
|
|
51 |
],
|
52 |
"normalizer": null,
|
53 |
"pre_tokenizer": {
|
54 |
+
"type": "Sequence",
|
55 |
+
"pretokenizers": [
|
56 |
+
{
|
57 |
+
"type": "Split",
|
58 |
+
"pattern": {
|
59 |
+
"Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
60 |
+
},
|
61 |
+
"behavior": "Removed",
|
62 |
+
"invert": true
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"type": "ByteLevel",
|
66 |
+
"add_prefix_space": false,
|
67 |
+
"trim_offsets": true,
|
68 |
+
"use_regex": false
|
69 |
+
}
|
70 |
+
]
|
71 |
},
|
72 |
+
"post_processor": null,
|
73 |
"decoder": {
|
74 |
"type": "ByteLevel",
|
75 |
"add_prefix_space": true,
|