oweller2 commited on
Commit
6d20d8a
1 Parent(s): 3608e05
Files changed (1) hide show
  1. tokenizer.py +2 -13
tokenizer.py CHANGED
@@ -1,18 +1,7 @@
1
- from transformers import PreTrainedTokenizer, AutoTokenizer
2
-
3
- class ModernDecoderBERTTokenizer(PreTrainedTokenizer):
4
-
5
- def __init__(self, *args, **kwargs):
6
- super().__init__(*args, **kwargs)
7
 
 
8
  def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
9
- breakpoint()
10
  if token_ids_1 is None:
11
  return [id for id in token_ids_0 if id != self.eos_token_id]
12
  return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id]
13
-
14
- def get_vocab(self):
15
- breakpoint()
16
- return dict(self.vocab.items())
17
-
18
- AutoTokenizer.register("ModernDecoderBERTTokenizer", ModernDecoderBERTTokenizer)
 
1
+ from transformers import PreTrainedTokenizerFast
 
 
 
 
 
2
 
3
+ class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
4
  def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
 
5
  if token_ids_1 is None:
6
  return [id for id in token_ids_0 if id != self.eos_token_id]
7
  return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id]