finnstrom3693
commited on
Commit
•
1a0de96
1
Parent(s):
0bf0be0
change to numpy
Browse files- tokenizer_make2.py +7 -6
tokenizer_make2.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from transformers import BertTokenizerFast
|
2 |
import os
|
3 |
import tensorflow as tf
|
@@ -36,11 +37,11 @@ class MiniSunTokenizer:
|
|
36 |
padding='max_length' if padding else False,
|
37 |
truncation=truncation,
|
38 |
return_attention_mask=True,
|
39 |
-
return_tensors='
|
40 |
)
|
41 |
return {
|
42 |
-
'input_ids': encoded['input_ids']
|
43 |
-
'attention_mask': encoded['attention_mask']
|
44 |
}
|
45 |
|
46 |
def _encode_batch(self, texts, max_length=512, padding=True, truncation=True):
|
@@ -52,11 +53,11 @@ class MiniSunTokenizer:
|
|
52 |
padding='max_length' if padding else False,
|
53 |
truncation=truncation,
|
54 |
return_attention_mask=True,
|
55 |
-
return_tensors='
|
56 |
)
|
57 |
return {
|
58 |
-
'input_ids': encoded_batch['input_ids']
|
59 |
-
'attention_mask': encoded_batch['attention_mask']
|
60 |
}
|
61 |
|
62 |
def decode(self, token_ids):
|
|
|
1 |
+
# @title Model Tokenizer
|
2 |
from transformers import BertTokenizerFast
|
3 |
import os
|
4 |
import tensorflow as tf
|
|
|
37 |
padding='max_length' if padding else False,
|
38 |
truncation=truncation,
|
39 |
return_attention_mask=True,
|
40 |
+
return_tensors='np'
|
41 |
)
|
42 |
return {
|
43 |
+
'input_ids': encoded['input_ids'],
|
44 |
+
'attention_mask': encoded['attention_mask']
|
45 |
}
|
46 |
|
47 |
def _encode_batch(self, texts, max_length=512, padding=True, truncation=True):
|
|
|
53 |
padding='max_length' if padding else False,
|
54 |
truncation=truncation,
|
55 |
return_attention_mask=True,
|
56 |
+
return_tensors='np'
|
57 |
)
|
58 |
return {
|
59 |
+
'input_ids': encoded_batch['input_ids'],
|
60 |
+
'attention_mask': encoded_batch['attention_mask']
|
61 |
}
|
62 |
|
63 |
def decode(self, token_ids):
|