Commit
·
b8d9e14
1
Parent(s):
dacea24
tokenizer improved
Browse files- tokenizer.json +8 -2
tokenizer.json
CHANGED
@@ -49,7 +49,13 @@
|
|
49 |
"special": true
|
50 |
}
|
51 |
],
|
52 |
-
"normalizer":
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
"pre_tokenizer": {
|
54 |
"type": "ByteLevel",
|
55 |
"add_prefix_space": false,
|
@@ -100352,4 +100358,4 @@
|
|
100352 |
"Ġg azed"
|
100353 |
]
|
100354 |
}
|
100355 |
-
}
|
|
|
49 |
"special": true
|
50 |
}
|
51 |
],
|
52 |
+
"normalizer": {
|
53 |
+
"type": "Replace",
|
54 |
+
"pattern": {
|
55 |
+
"String": "\u00a0"
|
56 |
+
},
|
57 |
+
"content": " "
|
58 |
+
},
|
59 |
"pre_tokenizer": {
|
60 |
"type": "ByteLevel",
|
61 |
"add_prefix_space": false,
|
|
|
100358 |
"Ġg azed"
|
100359 |
]
|
100360 |
}
|
100361 |
+
}
|