oweller2 commited on
Commit
f64965c
1 Parent(s): 0157fcc
Files changed (1) hide show
  1. tokenizer.py +4 -4
tokenizer.py CHANGED
@@ -1,11 +1,11 @@
1
  from transformers import PreTrainedTokenizerFast
2
 
3
  class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
4
- def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
 
5
  breakpoint()
6
- if token_ids_1 is None:
7
- return [id for id in token_ids_0 if id != self.eos_token_id]
8
- return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id]
9
 
10
  # Register the class
11
  from transformers import AutoTokenizer
 
1
  from transformers import PreTrainedTokenizerFast
2
 
3
  class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
4
+
5
+ def prepare_for_model(self, ids, *args, **kwargs):
6
  breakpoint()
7
+ filtered_ids = [id for id in ids if id != self.eos_token_id]
8
+ return super().prepare_for_model(filtered_ids, *args, **kwargs)
 
9
 
10
  # Register the class
11
  from transformers import AutoTokenizer