File size: 3,480 Bytes
f2f1526
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
Global:
  use_gpu: true
  epoch_num: 200
  log_smooth_window: 10
  print_batch_step: 10
  save_model_dir: ./output/ser_vi_layoutxlm_xfund_zh
  save_epoch_step: 2000
  eval_batch_step:
  - 0
  - 19
  cal_metric_during_train: false
  save_inference_dir: null
  use_visualdl: false
  seed: 2022
  infer_img: ppstructure/docs/kie/input/zh_val_42.jpg
  d2s_train_image_shape:
  - 3
  - 224
  - 224
  save_res_path: /content/PaddleOCR/output/ser_layoutxlm_xfund_zh/res
  kie_rec_model_dir: null
  kie_det_model_dir: null
  amp_custom_white_list:
  - scale
  - concat
  - elementwise_add
Architecture:
  model_type: kie
  algorithm: LayoutXLM
  Transform: null
  Backbone:
    name: LayoutXLMForSer
    pretrained: true
    checkpoints: null
    mode: vi
    num_classes: 79
Loss:
  name: VQASerTokenLayoutLMLoss
  num_classes: 79
  key: backbone_out
Optimizer:
  name: AdamW
  beta1: 0.9
  beta2: 0.999
  lr:
    name: Linear
    learning_rate: 5.0e-05
    epochs: 200
    warmup_epoch: 2
  regularizer:
    name: L2
    factor: 0.0
PostProcess:
  name: VQASerTokenLayoutLMPostProcess
  class_path: /content/PaddleOCR/dataset/class_list_jmfunsd.txt
Metric:
  name: VQASerTokenMetric
  main_indicator: hmean
Train:
  dataset:
    name: SimpleDataSet
    data_dir: /content/PaddleOCR/dataset/training_data/images
    label_file_list:
    - /content/PaddleOCR/dataset/train.json
    ratio_list:
    - 1.0
    transforms:
    - DecodeImage:
        img_mode: RGB
        channel_first: false
    - VQATokenLabelEncode:
        contains_re: false
        algorithm: LayoutXLM
        class_path: /content/PaddleOCR/dataset/class_list_jmfunsd.txt
        use_textline_bbox_info: true
        order_method: tb-yx
    - VQATokenPad:
        max_seq_len: 512
        return_attention_mask: true
    - VQASerTokenChunk:
        max_seq_len: 512
    - Resize:
        size:
        - 224
        - 224
    - NormalizeImage:
        scale: 1
        mean:
        - 123.675
        - 116.28
        - 103.53
        std:
        - 58.395
        - 57.12
        - 57.375
        order: hwc
    - ToCHWImage: null
    - KeepKeys:
        keep_keys:
        - input_ids
        - bbox
        - attention_mask
        - token_type_ids
        - image
        - labels
  loader:
    shuffle: true
    drop_last: false
    batch_size_per_card: 8
    num_workers: 4
Eval:
  dataset:
    name: SimpleDataSet
    data_dir: /content/PaddleOCR/dataset/testing_data/images
    label_file_list:
    - /content/PaddleOCR/dataset/test.json
    transforms:
    - DecodeImage:
        img_mode: RGB
        channel_first: false
    - VQATokenLabelEncode:
        contains_re: false
        algorithm: LayoutXLM
        class_path: /content/PaddleOCR/dataset/class_list_jmfunsd.txt
        use_textline_bbox_info: true
        order_method: tb-yx
    - VQATokenPad:
        max_seq_len: 512
        return_attention_mask: true
    - VQASerTokenChunk:
        max_seq_len: 512
    - Resize:
        size:
        - 224
        - 224
    - NormalizeImage:
        scale: 1
        mean:
        - 123.675
        - 116.28
        - 103.53
        std:
        - 58.395
        - 57.12
        - 57.375
        order: hwc
    - ToCHWImage: null
    - KeepKeys:
        keep_keys:
        - input_ids
        - bbox
        - attention_mask
        - token_type_ids
        - image
        - labels
  loader:
    shuffle: false
    drop_last: false
    batch_size_per_card: 8
    num_workers: 4
profiler_options: null