fix convert token to id
Browse files- sentencepiece_ja.py +3 -0
sentencepiece_ja.py
CHANGED
@@ -41,6 +41,9 @@ class SentencePieceJA(PreTrainedTokenizer):
|
|
41 |
return self._tokenizer.encode(text).tokens
|
42 |
|
43 |
def _convert_token_to_id(self, token):
|
|
|
|
|
|
|
44 |
return self._tokenizer.encode(token).ids[0]
|
45 |
|
46 |
def _convert_id_to_token(self, index: int) -> str:
|
|
|
41 |
return self._tokenizer.encode(text).tokens
|
42 |
|
43 |
def _convert_token_to_id(self, token):
|
44 |
+
ids = self._tokenizer.encode(token).ids
|
45 |
+
if len(ids) == 0:
|
46 |
+
return self.unk_token_id
|
47 |
return self._tokenizer.encode(token).ids[0]
|
48 |
|
49 |
def _convert_id_to_token(self, index: int) -> str:
|