michaeljcliao commited on
Commit
61070b7
1 Parent(s): 429eef1

Upload processor

Browse files
preprocessor_config.json CHANGED
@@ -19,8 +19,8 @@
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
- "size": [
23
- 960,
24
- 550
25
- ]
26
  }
 
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
+ "size": {
23
+ "height": 550,
24
+ "width": 960
25
+ }
26
  }
special_tokens_map.json CHANGED
@@ -1,89 +1,17 @@
1
  {
2
  "additional_special_tokens": [
3
- {
4
- "content": "<s_untaxed_price>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "</s_untaxed_price>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "<s_total_price>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- {
25
- "content": "</s_total_price>",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- {
32
- "content": "<s_tax>",
33
- "lstrip": false,
34
- "normalized": false,
35
- "rstrip": false,
36
- "single_word": false
37
- },
38
- {
39
- "content": "</s_tax>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false
44
- },
45
- {
46
- "content": "<s_invoice_num>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false
51
- },
52
- {
53
- "content": "</s_invoice_num>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false
58
- },
59
- {
60
- "content": "<s_invoice_date>",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false
65
- },
66
- {
67
- "content": "</s_invoice_date>",
68
- "lstrip": false,
69
- "normalized": false,
70
- "rstrip": false,
71
- "single_word": false
72
- },
73
- {
74
- "content": "<s>",
75
- "lstrip": false,
76
- "normalized": false,
77
- "rstrip": false,
78
- "single_word": false
79
- },
80
- {
81
- "content": "</s>",
82
- "lstrip": false,
83
- "normalized": false,
84
- "rstrip": false,
85
- "single_word": false
86
- }
87
  ],
88
  "bos_token": {
89
  "content": "<s>",
 
1
  {
2
  "additional_special_tokens": [
3
+ "<s_untaxed_price>",
4
+ "</s_untaxed_price>",
5
+ "<s_total_price>",
6
+ "</s_total_price>",
7
+ "<s_tax>",
8
+ "</s_tax>",
9
+ "<s_invoice_num>",
10
+ "</s_invoice_num>",
11
+ "<s_invoice_date>",
12
+ "</s_invoice_date>",
13
+ "<s>",
14
+ "</s>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  ],
16
  "bos_token": {
17
  "content": "<s>",
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -164,11 +164,18 @@
164
  "cls_token": "<s>",
165
  "eos_token": "</s>",
166
  "mask_token": "<mask>",
 
167
  "model_max_length": 1000000000000000019884624838656,
 
168
  "pad_token": "<pad>",
 
 
169
  "processor_class": "DonutProcessor",
170
  "sep_token": "</s>",
171
  "sp_model_kwargs": {},
 
172
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
173
  "unk_token": "<unk>"
174
  }
 
164
  "cls_token": "<s>",
165
  "eos_token": "</s>",
166
  "mask_token": "<mask>",
167
+ "max_length": 512,
168
  "model_max_length": 1000000000000000019884624838656,
169
+ "pad_to_multiple_of": null,
170
  "pad_token": "<pad>",
171
+ "pad_token_type_id": 0,
172
+ "padding_side": "right",
173
  "processor_class": "DonutProcessor",
174
  "sep_token": "</s>",
175
  "sp_model_kwargs": {},
176
+ "stride": 0,
177
  "tokenizer_class": "XLMRobertaTokenizer",
178
+ "truncation_side": "right",
179
+ "truncation_strategy": "longest_first",
180
  "unk_token": "<unk>"
181
  }