oweller2
commited on
Commit
·
7b38d2c
1
Parent(s):
8a083e2
done
Browse files- tokenizer.py +3 -2
tokenizer.py
CHANGED
@@ -3,9 +3,10 @@ from transformers import PreTrainedTokenizerFast
|
|
3 |
class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
|
4 |
|
5 |
def _batch_encode_plus(self, *args, **kwargs):
|
6 |
-
breakpoint()
|
7 |
outputs = super()._batch_encode_plus(*args, **kwargs)
|
8 |
-
|
|
|
|
|
9 |
return outputs
|
10 |
|
11 |
# Register the class
|
|
|
3 |
class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
|
4 |
|
5 |
def _batch_encode_plus(self, *args, **kwargs):
|
|
|
6 |
outputs = super()._batch_encode_plus(*args, **kwargs)
|
7 |
+
del outputs["token_type_ids"]
|
8 |
+
for key in ['input_ids', 'attention_mask']:
|
9 |
+
outputs[key] = [sequence[:-1] for sequence in outputs[key]]
|
10 |
return outputs
|
11 |
|
12 |
# Register the class
|