rinabuoy commited on
Commit
39cd040
1 Parent(s): 25ff3e1

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +2 -0
  2. tokenizer.json +18 -0
  3. tokenizer_config.json +18 -0
special_tokens_map.json CHANGED
@@ -1,7 +1,9 @@
1
  {
2
  "bos_token": "<s>",
 
3
  "eos_token": "</s>",
4
  "mask_token": "<mask>",
5
  "pad_token": "<pad>",
 
6
  "unk_token": "<unk>"
7
  }
 
1
  {
2
  "bos_token": "<s>",
3
+ "cls_token": "<cls>",
4
  "eos_token": "</s>",
5
  "mask_token": "<mask>",
6
  "pad_token": "<pad>",
7
+ "sep_token": "<sep>",
8
  "unk_token": "<unk>"
9
  }
tokenizer.json CHANGED
@@ -47,6 +47,24 @@
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "normalizer": null,
 
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
50
+ },
51
+ {
52
+ "id": 2894,
53
+ "content": "<sep>",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
+ },
60
+ {
61
+ "id": 2895,
62
+ "content": "<cls>",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": false,
67
+ "special": true
68
  }
69
  ],
70
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -39,15 +39,33 @@
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
43
  },
44
  "bos_token": "<s>",
45
  "clean_up_tokenization_spaces": false,
 
46
  "eos_token": "</s>",
47
  "mask_token": "<mask>",
48
  "model_max_length": 1000000000000000019884624838656,
49
  "pad_token": "<pad>",
50
  "padding_side": "right",
 
51
  "tokenizer_class": "PreTrainedTokenizerFast",
52
  "truncation_side": "right",
53
  "unk_token": "<unk>"
 
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
+ },
43
+ "2894": {
44
+ "content": "<sep>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "2895": {
52
+ "content": "<cls>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
  }
59
  },
60
  "bos_token": "<s>",
61
  "clean_up_tokenization_spaces": false,
62
+ "cls_token": "<cls>",
63
  "eos_token": "</s>",
64
  "mask_token": "<mask>",
65
  "model_max_length": 1000000000000000019884624838656,
66
  "pad_token": "<pad>",
67
  "padding_side": "right",
68
+ "sep_token": "<sep>",
69
  "tokenizer_class": "PreTrainedTokenizerFast",
70
  "truncation_side": "right",
71
  "unk_token": "<unk>"