emrgnt-cmplxty commited on
Commit
5fedfd8
1 Parent(s): 0ab3c12

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -2
  2. tokenizer.json +12 -12
  3. tokenizer_config.json +13 -16
special_tokens_map.json CHANGED
@@ -17,8 +17,7 @@
17
  "[Irrelevant]",
18
  "[Utility:4]",
19
  "</paragraph>",
20
- "[Fully supported]",
21
- "<pad>"
22
  ],
23
  "bos_token": "<s>",
24
  "eos_token": "</s>",
 
17
  "[Irrelevant]",
18
  "[Utility:4]",
19
  "</paragraph>",
20
+ "[Fully supported]"
 
21
  ],
22
  "bos_token": "<s>",
23
  "eos_token": "</s>",
tokenizer.json CHANGED
@@ -7,27 +7,27 @@
7
  "id": 0,
8
  "content": "<unk>",
9
  "single_word": false,
10
- "lstrip": false,
11
- "rstrip": false,
12
- "normalized": true,
13
  "special": true
14
  },
15
  {
16
  "id": 1,
17
  "content": "<s>",
18
  "single_word": false,
19
- "lstrip": false,
20
- "rstrip": false,
21
- "normalized": true,
22
  "special": true
23
  },
24
  {
25
  "id": 2,
26
  "content": "</s>",
27
  "single_word": false,
28
- "lstrip": false,
29
- "rstrip": false,
30
- "normalized": true,
31
  "special": true
32
  },
33
  {
@@ -169,9 +169,9 @@
169
  "id": 32015,
170
  "content": "<pad>",
171
  "single_word": false,
172
- "lstrip": false,
173
- "rstrip": false,
174
- "normalized": true,
175
  "special": true
176
  }
177
  ],
 
7
  "id": 0,
8
  "content": "<unk>",
9
  "single_word": false,
10
+ "lstrip": true,
11
+ "rstrip": true,
12
+ "normalized": false,
13
  "special": true
14
  },
15
  {
16
  "id": 1,
17
  "content": "<s>",
18
  "single_word": false,
19
+ "lstrip": true,
20
+ "rstrip": true,
21
+ "normalized": false,
22
  "special": true
23
  },
24
  {
25
  "id": 2,
26
  "content": "</s>",
27
  "single_word": false,
28
+ "lstrip": true,
29
+ "rstrip": true,
30
+ "normalized": false,
31
  "special": true
32
  },
33
  {
 
169
  "id": 32015,
170
  "content": "<pad>",
171
  "single_word": false,
172
+ "lstrip": true,
173
+ "rstrip": true,
174
+ "normalized": false,
175
  "special": true
176
  }
177
  ],
tokenizer_config.json CHANGED
@@ -1,28 +1,26 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
7
- "lstrip": false,
8
- "normalized": true,
9
- "rstrip": false,
10
  "single_word": false,
11
  "special": true
12
  },
13
  "1": {
14
  "content": "<s>",
15
- "lstrip": false,
16
- "normalized": true,
17
- "rstrip": false,
18
  "single_word": false,
19
  "special": true
20
  },
21
  "2": {
22
  "content": "</s>",
23
- "lstrip": false,
24
- "normalized": true,
25
- "rstrip": false,
26
  "single_word": false,
27
  "special": true
28
  },
@@ -148,9 +146,9 @@
148
  },
149
  "32015": {
150
  "content": "<pad>",
151
- "lstrip": false,
152
- "normalized": true,
153
- "rstrip": false,
154
  "single_word": false,
155
  "special": true
156
  }
@@ -173,8 +171,7 @@
173
  "[Irrelevant]",
174
  "[Utility:4]",
175
  "</paragraph>",
176
- "[Fully supported]",
177
- "<pad>"
178
  ],
179
  "bos_token": "<s>",
180
  "clean_up_tokenization_spaces": false,
 
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
  "single_word": false,
9
  "special": true
10
  },
11
  "1": {
12
  "content": "<s>",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
  "single_word": false,
17
  "special": true
18
  },
19
  "2": {
20
  "content": "</s>",
21
+ "lstrip": true,
22
+ "normalized": false,
23
+ "rstrip": true,
24
  "single_word": false,
25
  "special": true
26
  },
 
146
  },
147
  "32015": {
148
  "content": "<pad>",
149
+ "lstrip": true,
150
+ "normalized": false,
151
+ "rstrip": true,
152
  "single_word": false,
153
  "special": true
154
  }
 
171
  "[Irrelevant]",
172
  "[Utility:4]",
173
  "</paragraph>",
174
+ "[Fully supported]"
 
175
  ],
176
  "bos_token": "<s>",
177
  "clean_up_tokenization_spaces": false,