fix bug in tokenizer `decode` method
Browse filesthe `_decode` method of ChatGLMTokenizer class should use customized `_decode` in sp_tokenizer rather than that in parent class, otherwise, the negative id would never be filtered.
- tokenization_chatglm.py +1 -1
tokenization_chatglm.py
CHANGED
@@ -282,7 +282,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
282 |
return ""
|
283 |
if self.pad_token_id in token_ids: # remove pad
|
284 |
token_ids = list(filter((self.pad_token_id).__ne__, token_ids))
|
285 |
-
return
|
286 |
|
287 |
def _convert_token_to_id(self, token):
|
288 |
""" Converts a token (str) in an id using the vocab. """
|
|
|
282 |
return ""
|
283 |
if self.pad_token_id in token_ids: # remove pad
|
284 |
token_ids = list(filter((self.pad_token_id).__ne__, token_ids))
|
285 |
+
return self.sp_tokenizer.decode(token_ids)
|
286 |
|
287 |
def _convert_token_to_id(self, token):
|
288 |
""" Converts a token (str) in an id using the vocab. """
|