File size: 529 Bytes
6d20d8a 3608e05 6d20d8a bfe22ad efef38a 970954b |
1 2 3 4 5 6 7 8 9 10 11 |
from transformers import PreTrainedTokenizerFast
class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
if token_ids_1 is None:
return [id for id in token_ids_0 if id != self.eos_token_id]
return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id]
# Register the class
from transformers import AutoTokenizer
AutoTokenizer.register(ModernDecoderBERTTokenizer) |