MRNH commited on
Commit
20531d6
1 Parent(s): e42e46b

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +29 -10
  2. tokenizer.json +1 -1
  3. tokenizer_config.json +4 -3
special_tokens_map.json CHANGED
@@ -1,8 +1,33 @@
1
  {
2
  "additional_special_tokens": [
3
- "<question>",
4
- "<answer>",
5
- "<context>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  ],
7
  "bos_token": {
8
  "content": "<s>",
@@ -18,13 +43,7 @@
18
  "rstrip": false,
19
  "single_word": false
20
  },
21
- "pad_token": {
22
- "content": "</s>",
23
- "lstrip": false,
24
- "normalized": true,
25
- "rstrip": false,
26
- "single_word": false
27
- },
28
  "unk_token": {
29
  "content": "<unk>",
30
  "lstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "</context>",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<stopTMP>",
12
+ "lstrip": true,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "</question>",
19
+ "lstrip": true,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "</answer>",
26
+ "lstrip": true,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
  ],
32
  "bos_token": {
33
  "content": "<s>",
 
43
  "rstrip": false,
44
  "single_word": false
45
  },
46
+ "pad_token": "</s>",
 
 
 
 
 
 
47
  "unk_token": {
48
  "content": "<unk>",
49
  "lstrip": false,
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 600,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 500,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
tokenizer_config.json CHANGED
@@ -21330,9 +21330,10 @@
21330
  }
21331
  },
21332
  "additional_special_tokens": [
21333
- "<question>",
21334
- "<answer>",
21335
- "<context>"
 
21336
  ],
21337
  "bos_token": "<s>",
21338
  "clean_up_tokenization_spaces": false,
 
21330
  }
21331
  },
21332
  "additional_special_tokens": [
21333
+ "</context>",
21334
+ "<stopTMP>",
21335
+ "</question>",
21336
+ "</answer>"
21337
  ],
21338
  "bos_token": "<s>",
21339
  "clean_up_tokenization_spaces": false,