Prevent tokenizer calls from returning `token_type_ids`
Browse filesHi there 👋 `transformers` maintainer here.
Adding this extra tokenization config class will prevent the tokenizer from returning `token_type_ids`, which are unused by `SeedOss` 🤗 The default value for this flag is `["input_ids", "token_type_ids", "attention_mask"]`
- tokenizer_config.json +4 -0
tokenizer_config.json
CHANGED
|
@@ -1029,6 +1029,10 @@
|
|
| 1029 |
"clean_up_tokenization_spaces": false,
|
| 1030 |
"eos_token": "<seed:eos>",
|
| 1031 |
"extra_special_tokens": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1032 |
"model_max_length": 1000000000000000019884624838656,
|
| 1033 |
"pad_token": "<seed:pad>",
|
| 1034 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
|
|
|
| 1029 |
"clean_up_tokenization_spaces": false,
|
| 1030 |
"eos_token": "<seed:eos>",
|
| 1031 |
"extra_special_tokens": {},
|
| 1032 |
+
"model_input_names": [
|
| 1033 |
+
"input_ids",
|
| 1034 |
+
"attention_mask"
|
| 1035 |
+
],
|
| 1036 |
"model_max_length": 1000000000000000019884624838656,
|
| 1037 |
"pad_token": "<seed:pad>",
|
| 1038 |
"tokenizer_class": "PreTrainedTokenizerFast"
|