KoichiYasuoka commited on
Commit
b8d9e14
·
1 Parent(s): dacea24

tokenizer improved

Browse files
Files changed (1) hide show
  1. tokenizer.json +8 -2
tokenizer.json CHANGED
@@ -49,7 +49,13 @@
49
  "special": true
50
  }
51
  ],
52
- "normalizer": null,
 
 
 
 
 
 
53
  "pre_tokenizer": {
54
  "type": "ByteLevel",
55
  "add_prefix_space": false,
@@ -100352,4 +100358,4 @@
100352
  "Ġg azed"
100353
  ]
100354
  }
100355
- }
 
49
  "special": true
50
  }
51
  ],
52
+ "normalizer": {
53
+ "type": "Replace",
54
+ "pattern": {
55
+ "String": "\u00a0"
56
+ },
57
+ "content": " "
58
+ },
59
  "pre_tokenizer": {
60
  "type": "ByteLevel",
61
  "add_prefix_space": false,
 
100358
  "Ġg azed"
100359
  ]
100360
  }
100361
+ }