Global: use_gpu: true epoch_num: 200 log_smooth_window: 10 print_batch_step: 10 save_model_dir: ./output/ser_vi_layoutxlm_xfund_zh save_epoch_step: 2000 eval_batch_step: - 0 - 19 cal_metric_during_train: false save_inference_dir: null use_visualdl: false seed: 2022 infer_img: ppstructure/docs/kie/input/zh_val_42.jpg d2s_train_image_shape: - 3 - 224 - 224 save_res_path: /content/PaddleOCR/output/ser_layoutxlm_xfund_zh/res kie_rec_model_dir: null kie_det_model_dir: null amp_custom_white_list: - scale - concat - elementwise_add Architecture: model_type: kie algorithm: LayoutXLM Transform: null Backbone: name: LayoutXLMForSer pretrained: true checkpoints: null mode: vi num_classes: 79 Loss: name: VQASerTokenLayoutLMLoss num_classes: 79 key: backbone_out Optimizer: name: AdamW beta1: 0.9 beta2: 0.999 lr: name: Linear learning_rate: 5.0e-05 epochs: 200 warmup_epoch: 2 regularizer: name: L2 factor: 0.0 PostProcess: name: VQASerTokenLayoutLMPostProcess class_path: /content/PaddleOCR/dataset/class_list_jmfunsd.txt Metric: name: VQASerTokenMetric main_indicator: hmean Train: dataset: name: SimpleDataSet data_dir: /content/PaddleOCR/dataset/training_data/images label_file_list: - /content/PaddleOCR/dataset/train.json ratio_list: - 1.0 transforms: - DecodeImage: img_mode: RGB channel_first: false - VQATokenLabelEncode: contains_re: false algorithm: LayoutXLM class_path: /content/PaddleOCR/dataset/class_list_jmfunsd.txt use_textline_bbox_info: true order_method: tb-yx - VQATokenPad: max_seq_len: 512 return_attention_mask: true - VQASerTokenChunk: max_seq_len: 512 - Resize: size: - 224 - 224 - NormalizeImage: scale: 1 mean: - 123.675 - 116.28 - 103.53 std: - 58.395 - 57.12 - 57.375 order: hwc - ToCHWImage: null - KeepKeys: keep_keys: - input_ids - bbox - attention_mask - token_type_ids - image - labels loader: shuffle: true drop_last: false batch_size_per_card: 8 num_workers: 4 Eval: dataset: name: SimpleDataSet data_dir: /content/PaddleOCR/dataset/testing_data/images label_file_list: - /content/PaddleOCR/dataset/test.json transforms: - DecodeImage: img_mode: RGB channel_first: false - VQATokenLabelEncode: contains_re: false algorithm: LayoutXLM class_path: /content/PaddleOCR/dataset/class_list_jmfunsd.txt use_textline_bbox_info: true order_method: tb-yx - VQATokenPad: max_seq_len: 512 return_attention_mask: true - VQASerTokenChunk: max_seq_len: 512 - Resize: size: - 224 - 224 - NormalizeImage: scale: 1 mean: - 123.675 - 116.28 - 103.53 std: - 58.395 - 57.12 - 57.375 order: hwc - ToCHWImage: null - KeepKeys: keep_keys: - input_ids - bbox - attention_mask - token_type_ids - image - labels loader: shuffle: false drop_last: false batch_size_per_card: 8 num_workers: 4 profiler_options: null