oweller2 commited on
Commit
8a083e2
1 Parent(s): f64965c
Files changed (1) hide show
  1. tokenizer.py +4 -3
tokenizer.py CHANGED
@@ -2,10 +2,11 @@ from transformers import PreTrainedTokenizerFast
2
 
3
  class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
4
 
5
- def prepare_for_model(self, ids, *args, **kwargs):
6
  breakpoint()
7
- filtered_ids = [id for id in ids if id != self.eos_token_id]
8
- return super().prepare_for_model(filtered_ids, *args, **kwargs)
 
9
 
10
  # Register the class
11
  from transformers import AutoTokenizer
 
2
 
3
  class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
4
 
5
+ def _batch_encode_plus(self, *args, **kwargs):
6
  breakpoint()
7
+ outputs = super()._batch_encode_plus(*args, **kwargs)
8
+ outputs['input_ids'] = [[id for id in ids if id != self.eos_token_id] for ids in outputs['input_ids']]
9
+ return outputs
10
 
11
  # Register the class
12
  from transformers import AutoTokenizer