ramkrithiks
commited on
Commit
•
154b411
1
Parent(s):
d61d475
Update _decode method to accept integer element and convert it to sequence
Browse files#### Why did I create this pull request?
When I tried to run this model over TGI, during model warm up step it fails by throwing `cannot convert 'int' to 'sequence'`.
#### What are the proposed changes?
Removed `List[int]` type mentioned in `_decode` function parameter and explicitly added a check to convert single element `token_ids` into list.
#### How did I test it?
I did changes over local repo in my machine and was able to successfully launch the model using TGI.
- tokenization_xgen.py +3 -1
tokenization_xgen.py
CHANGED
@@ -169,7 +169,9 @@ class XgenTokenizer(PreTrainedTokenizer):
|
|
169 |
"""Converts an index (integer) in a token (str) using the vocab."""
|
170 |
return self.encoder.decode_single_token_bytes(index).decode("utf-8")
|
171 |
|
172 |
-
def _decode(self, token_ids
|
|
|
|
|
173 |
if skip_special_tokens:
|
174 |
token_ids = [t for t in token_ids if t not in self.all_special_ids]
|
175 |
return self.encoder.decode(token_ids)
|
|
|
169 |
"""Converts an index (integer) in a token (str) using the vocab."""
|
170 |
return self.encoder.decode_single_token_bytes(index).decode("utf-8")
|
171 |
|
172 |
+
def _decode(self, token_ids, skip_special_tokens: bool = False, **kwargs):
|
173 |
+
if not isinstance(token_ids, list):
|
174 |
+
token_ids = [token_ids]
|
175 |
if skip_special_tokens:
|
176 |
token_ids = [t for t in token_ids if t not in self.all_special_ids]
|
177 |
return self.encoder.decode(token_ids)
|