File size: 3,480 Bytes
f2f1526 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
Global:
use_gpu: true
epoch_num: 200
log_smooth_window: 10
print_batch_step: 10
save_model_dir: ./output/ser_vi_layoutxlm_xfund_zh
save_epoch_step: 2000
eval_batch_step:
- 0
- 19
cal_metric_during_train: false
save_inference_dir: null
use_visualdl: false
seed: 2022
infer_img: ppstructure/docs/kie/input/zh_val_42.jpg
d2s_train_image_shape:
- 3
- 224
- 224
save_res_path: /content/PaddleOCR/output/ser_layoutxlm_xfund_zh/res
kie_rec_model_dir: null
kie_det_model_dir: null
amp_custom_white_list:
- scale
- concat
- elementwise_add
Architecture:
model_type: kie
algorithm: LayoutXLM
Transform: null
Backbone:
name: LayoutXLMForSer
pretrained: true
checkpoints: null
mode: vi
num_classes: 79
Loss:
name: VQASerTokenLayoutLMLoss
num_classes: 79
key: backbone_out
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
lr:
name: Linear
learning_rate: 5.0e-05
epochs: 200
warmup_epoch: 2
regularizer:
name: L2
factor: 0.0
PostProcess:
name: VQASerTokenLayoutLMPostProcess
class_path: /content/PaddleOCR/dataset/class_list_jmfunsd.txt
Metric:
name: VQASerTokenMetric
main_indicator: hmean
Train:
dataset:
name: SimpleDataSet
data_dir: /content/PaddleOCR/dataset/training_data/images
label_file_list:
- /content/PaddleOCR/dataset/train.json
ratio_list:
- 1.0
transforms:
- DecodeImage:
img_mode: RGB
channel_first: false
- VQATokenLabelEncode:
contains_re: false
algorithm: LayoutXLM
class_path: /content/PaddleOCR/dataset/class_list_jmfunsd.txt
use_textline_bbox_info: true
order_method: tb-yx
- VQATokenPad:
max_seq_len: 512
return_attention_mask: true
- VQASerTokenChunk:
max_seq_len: 512
- Resize:
size:
- 224
- 224
- NormalizeImage:
scale: 1
mean:
- 123.675
- 116.28
- 103.53
std:
- 58.395
- 57.12
- 57.375
order: hwc
- ToCHWImage: null
- KeepKeys:
keep_keys:
- input_ids
- bbox
- attention_mask
- token_type_ids
- image
- labels
loader:
shuffle: true
drop_last: false
batch_size_per_card: 8
num_workers: 4
Eval:
dataset:
name: SimpleDataSet
data_dir: /content/PaddleOCR/dataset/testing_data/images
label_file_list:
- /content/PaddleOCR/dataset/test.json
transforms:
- DecodeImage:
img_mode: RGB
channel_first: false
- VQATokenLabelEncode:
contains_re: false
algorithm: LayoutXLM
class_path: /content/PaddleOCR/dataset/class_list_jmfunsd.txt
use_textline_bbox_info: true
order_method: tb-yx
- VQATokenPad:
max_seq_len: 512
return_attention_mask: true
- VQASerTokenChunk:
max_seq_len: 512
- Resize:
size:
- 224
- 224
- NormalizeImage:
scale: 1
mean:
- 123.675
- 116.28
- 103.53
std:
- 58.395
- 57.12
- 57.375
order: hwc
- ToCHWImage: null
- KeepKeys:
keep_keys:
- input_ids
- bbox
- attention_mask
- token_type_ids
- image
- labels
loader:
shuffle: false
drop_last: false
batch_size_per_card: 8
num_workers: 4
profiler_options: null
|