from transformers import PreTrainedTokenizerFast | |
class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast): | |
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None): | |
if token_ids_1 is None: | |
return [id for id in token_ids_0 if id != self.eos_token_id] | |
return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id] | |
# Register the class | |
from transformers import AutoTokenizer | |
AutoTokenizer.register(ModernDecoderBERTTokenizer) |