update tokenizer for compatibility with new transformers

#64
Files changed (1) hide show
  1. tokenization_chatglm.py +3 -0
tokenization_chatglm.py CHANGED
@@ -271,6 +271,8 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
271
  padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
272
  pad_to_multiple_of: Optional[int] = None,
273
  return_attention_mask: Optional[bool] = None,
 
 
274
  ) -> dict:
275
  """
276
  Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
@@ -297,6 +299,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
297
  """
298
  # Load from model defaults
299
  assert self.padding_side == "left"
 
300
 
301
  required_input = encoded_inputs[self.model_input_names[0]]
302
  seq_length = len(required_input)
 
271
  padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
272
  pad_to_multiple_of: Optional[int] = None,
273
  return_attention_mask: Optional[bool] = None,
274
+ padding_side: Optional[bool] = None,
275
+ **kwargs
276
  ) -> dict:
277
  """
278
  Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
 
299
  """
300
  # Load from model defaults
301
  assert self.padding_side == "left"
302
+ assert padding_side is None or padding_side == "left"
303
 
304
  required_input = encoded_inputs[self.model_input_names[0]]
305
  seq_length = len(required_input)