update tokenizer for compatibility with new transformers
#64
by
katuni4ka
- opened
- tokenization_chatglm.py +3 -0
tokenization_chatglm.py
CHANGED
@@ -271,6 +271,8 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
271 |
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
272 |
pad_to_multiple_of: Optional[int] = None,
|
273 |
return_attention_mask: Optional[bool] = None,
|
|
|
|
|
274 |
) -> dict:
|
275 |
"""
|
276 |
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
|
@@ -297,6 +299,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
297 |
"""
|
298 |
# Load from model defaults
|
299 |
assert self.padding_side == "left"
|
|
|
300 |
|
301 |
required_input = encoded_inputs[self.model_input_names[0]]
|
302 |
seq_length = len(required_input)
|
|
|
271 |
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
272 |
pad_to_multiple_of: Optional[int] = None,
|
273 |
return_attention_mask: Optional[bool] = None,
|
274 |
+
padding_side: Optional[bool] = None,
|
275 |
+
**kwargs
|
276 |
) -> dict:
|
277 |
"""
|
278 |
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
|
|
|
299 |
"""
|
300 |
# Load from model defaults
|
301 |
assert self.padding_side == "left"
|
302 |
+
assert padding_side is None or padding_side == "left"
|
303 |
|
304 |
required_input = encoded_inputs[self.model_input_names[0]]
|
305 |
seq_length = len(required_input)
|