katuni4ka commited on
Commit
3418503
1 Parent(s): c2354b9

Update tokenization_chatglm.py

Browse files
Files changed (1) hide show
  1. tokenization_chatglm.py +3 -0
tokenization_chatglm.py CHANGED
@@ -70,6 +70,9 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
70
 
71
  self.vocab_file = vocab_file
72
  self.tokenizer = SPTokenizer(vocab_file)
 
 
 
73
  super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=clean_up_tokenization_spaces, **kwargs)
74
  self.special_tokens = {
75
  "<bos>": self.tokenizer.bos_id,
 
70
 
71
  self.vocab_file = vocab_file
72
  self.tokenizer = SPTokenizer(vocab_file)
73
+ kwargs.pop("eos_token")
74
+ kwargs.pop("pad_token")
75
+ kwargs.pop("unk_token")
76
  super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=clean_up_tokenization_spaces, **kwargs)
77
  self.special_tokens = {
78
  "<bos>": self.tokenizer.bos_id,