intfloat bryant1410 commited on
Commit
dca8b1a
1 Parent(s): 93da57d

Make the tokenizer config match that of the large and base versions (#11)

Browse files

- Make the tokenizer config match that of the large and base versions (f9611f088d69fc3157ff1878217feee72bda0145)


Co-authored-by: Santiago Castro <bryant1410@users.noreply.huggingface.co>

Files changed (1) hide show
  1. tokenizer_config.json +1 -3
tokenizer_config.json CHANGED
@@ -1,11 +1,9 @@
1
  {
2
  "clean_up_tokenization_spaces": true,
3
  "cls_token": "[CLS]",
4
- "do_basic_tokenize": true,
5
  "do_lower_case": true,
6
  "mask_token": "[MASK]",
7
- "model_max_length": 1000000000000000019884624838656,
8
- "never_split": null,
9
  "pad_token": "[PAD]",
10
  "sep_token": "[SEP]",
11
  "strip_accents": null,
 
1
  {
2
  "clean_up_tokenization_spaces": true,
3
  "cls_token": "[CLS]",
 
4
  "do_lower_case": true,
5
  "mask_token": "[MASK]",
6
+ "model_max_length": 512,
 
7
  "pad_token": "[PAD]",
8
  "sep_token": "[SEP]",
9
  "strip_accents": null,