Upload tokenizer
Browse files- tokenization_phylogpn.py +1 -3
- tokenizer_config.json +1 -1
tokenization_phylogpn.py
CHANGED
@@ -9,7 +9,7 @@ class PhyloGPNTokenizer(PreTrainedTokenizer):
|
|
9 |
self._vocab = {k: v for v, k in enumerate("ACGTN-")}
|
10 |
|
11 |
add_prefix_space = kwargs.pop("add_prefix_space", False)
|
12 |
-
padding_side = kwargs.pop("padding_side", "
|
13 |
super().__init__(
|
14 |
model_max_length=model_max_length,
|
15 |
unk_token=unk_token,
|
@@ -24,8 +24,6 @@ class PhyloGPNTokenizer(PreTrainedTokenizer):
|
|
24 |
**kwargs,
|
25 |
)
|
26 |
|
27 |
-
self._receptive_field_size = 1
|
28 |
-
|
29 |
def _tokenize(self, seq: str) -> List[str]:
|
30 |
assert len(seq) >= 481, "Input must be at least 481 bp long"
|
31 |
return list(seq)
|
|
|
9 |
self._vocab = {k: v for v, k in enumerate("ACGTN-")}
|
10 |
|
11 |
add_prefix_space = kwargs.pop("add_prefix_space", False)
|
12 |
+
padding_side = kwargs.pop("padding_side", "right")
|
13 |
super().__init__(
|
14 |
model_max_length=model_max_length,
|
15 |
unk_token=unk_token,
|
|
|
24 |
**kwargs,
|
25 |
)
|
26 |
|
|
|
|
|
27 |
def _tokenize(self, seq: str) -> List[str]:
|
28 |
assert len(seq) >= 481, "Input must be at least 481 bp long"
|
29 |
return list(seq)
|
tokenizer_config.json
CHANGED
@@ -32,7 +32,7 @@
|
|
32 |
"mask_token": null,
|
33 |
"model_max_length": 1000000000000000019884624838656,
|
34 |
"pad_token": "-",
|
35 |
-
"padding_side": "
|
36 |
"sep_token": null,
|
37 |
"tokenizer_class": "PhyloGPNTokenizer",
|
38 |
"unk_token": "N"
|
|
|
32 |
"mask_token": null,
|
33 |
"model_max_length": 1000000000000000019884624838656,
|
34 |
"pad_token": "-",
|
35 |
+
"padding_side": "right",
|
36 |
"sep_token": null,
|
37 |
"tokenizer_class": "PhyloGPNTokenizer",
|
38 |
"unk_token": "N"
|