oweller2
commited on
Commit
•
f64965c
1
Parent(s):
0157fcc
update
Browse files- tokenizer.py +4 -4
tokenizer.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
from transformers import PreTrainedTokenizerFast
|
2 |
|
3 |
class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
|
4 |
-
|
|
|
5 |
breakpoint()
|
6 |
-
if
|
7 |
-
|
8 |
-
return [id for id in token_ids_0 if id != self.eos_token_id] + [id for id in token_ids_1 if id != self.eos_token_id]
|
9 |
|
10 |
# Register the class
|
11 |
from transformers import AutoTokenizer
|
|
|
1 |
from transformers import PreTrainedTokenizerFast
|
2 |
|
3 |
class ModernDecoderBERTTokenizer(PreTrainedTokenizerFast):
|
4 |
+
|
5 |
+
def prepare_for_model(self, ids, *args, **kwargs):
|
6 |
breakpoint()
|
7 |
+
filtered_ids = [id for id in ids if id != self.eos_token_id]
|
8 |
+
return super().prepare_for_model(filtered_ids, *args, **kwargs)
|
|
|
9 |
|
10 |
# Register the class
|
11 |
from transformers import AutoTokenizer
|