matt
commited on
Commit
•
66223de
1
Parent(s):
e83c7ca
Update tokenizer + config
Browse files- config.json +1 -0
- tokenization_hyena.py +1 -1
config.json
CHANGED
@@ -24,6 +24,7 @@
|
|
24 |
"model_type": "hyenadna",
|
25 |
"n_layer": 2,
|
26 |
"num_inner_mlps": 2,
|
|
|
27 |
"pad_vocab_size_multiple": 8,
|
28 |
"short_filter_order": 3,
|
29 |
"tie_word_embeddings": false,
|
|
|
24 |
"model_type": "hyenadna",
|
25 |
"n_layer": 2,
|
26 |
"num_inner_mlps": 2,
|
27 |
+
"pad_token_id": 4,
|
28 |
"pad_vocab_size_multiple": 8,
|
29 |
"short_filter_order": 3,
|
30 |
"tie_word_embeddings": false,
|
tokenization_hyena.py
CHANGED
@@ -6,7 +6,7 @@ import os
|
|
6 |
|
7 |
|
8 |
class HyenaDNATokenizer(PreTrainedTokenizer):
|
9 |
-
model_input_names = ["input_ids"
|
10 |
|
11 |
def __init__(self,
|
12 |
model_max_length: int,
|
|
|
6 |
|
7 |
|
8 |
class HyenaDNATokenizer(PreTrainedTokenizer):
|
9 |
+
model_input_names = ["input_ids"]
|
10 |
|
11 |
def __init__(self,
|
12 |
model_max_length: int,
|