finnstrom3693 commited on
Commit
1a0de96
1 Parent(s): 0bf0be0

change to numpy

Browse files
Files changed (1) hide show
  1. tokenizer_make2.py +7 -6
tokenizer_make2.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from transformers import BertTokenizerFast
2
  import os
3
  import tensorflow as tf
@@ -36,11 +37,11 @@ class MiniSunTokenizer:
36
  padding='max_length' if padding else False,
37
  truncation=truncation,
38
  return_attention_mask=True,
39
- return_tensors='tf'
40
  )
41
  return {
42
- 'input_ids': encoded['input_ids'].numpy().tolist(),
43
- 'attention_mask': encoded['attention_mask'].numpy().tolist()
44
  }
45
 
46
  def _encode_batch(self, texts, max_length=512, padding=True, truncation=True):
@@ -52,11 +53,11 @@ class MiniSunTokenizer:
52
  padding='max_length' if padding else False,
53
  truncation=truncation,
54
  return_attention_mask=True,
55
- return_tensors='tf'
56
  )
57
  return {
58
- 'input_ids': encoded_batch['input_ids'].numpy().tolist(),
59
- 'attention_mask': encoded_batch['attention_mask'].numpy().tolist()
60
  }
61
 
62
  def decode(self, token_ids):
 
1
+ # @title Model Tokenizer
2
  from transformers import BertTokenizerFast
3
  import os
4
  import tensorflow as tf
 
37
  padding='max_length' if padding else False,
38
  truncation=truncation,
39
  return_attention_mask=True,
40
+ return_tensors='np'
41
  )
42
  return {
43
+ 'input_ids': encoded['input_ids'],
44
+ 'attention_mask': encoded['attention_mask']
45
  }
46
 
47
  def _encode_batch(self, texts, max_length=512, padding=True, truncation=True):
 
53
  padding='max_length' if padding else False,
54
  truncation=truncation,
55
  return_attention_mask=True,
56
+ return_tensors='np'
57
  )
58
  return {
59
+ 'input_ids': encoded_batch['input_ids'],
60
+ 'attention_mask': encoded_batch['attention_mask']
61
  }
62
 
63
  def decode(self, token_ids):