RonanMcGovern commited on
Commit
8ae563a
·
1 Parent(s): 3f46a53

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -12,5 +12,12 @@
12
  ">>SUFFIX<<",
13
  ">>MIDDLE<<"
14
  ],
15
- "eos_token": "<|endoftext|>"
 
 
 
 
 
 
 
16
  }
 
12
  ">>SUFFIX<<",
13
  ">>MIDDLE<<"
14
  ],
15
+ "eos_token": "<|endoftext|>",
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
  }
tokenizer.json CHANGED
@@ -110,6 +110,15 @@
110
  "rstrip": false,
111
  "normalized": false,
112
  "special": true
 
 
 
 
 
 
 
 
 
113
  }
114
  ],
115
  "normalizer": null,
 
110
  "rstrip": false,
111
  "normalized": false,
112
  "special": true
113
+ },
114
+ {
115
+ "id": 65024,
116
+ "content": "<pad>",
117
+ "single_word": false,
118
+ "lstrip": false,
119
+ "rstrip": false,
120
+ "normalized": false,
121
+ "special": true
122
  }
123
  ],
124
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -96,6 +96,14 @@
96
  "rstrip": false,
97
  "single_word": false,
98
  "special": true
 
 
 
 
 
 
 
 
99
  }
100
  },
101
  "additional_special_tokens": [
@@ -117,6 +125,7 @@
117
  "input_ids",
118
  "attention_mask"
119
  ],
120
- "model_max_length": 2048,
 
121
  "tokenizer_class": "PreTrainedTokenizerFast"
122
  }
 
96
  "rstrip": false,
97
  "single_word": false,
98
  "special": true
99
+ },
100
+ "65024": {
101
+ "content": "<pad>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
  }
108
  },
109
  "additional_special_tokens": [
 
125
  "input_ids",
126
  "attention_mask"
127
  ],
128
+ "model_max_length": 4096,
129
+ "pad_token": "<pad>",
130
  "tokenizer_class": "PreTrainedTokenizerFast"
131
  }