temp_paddle_model / config.yml
JoheunKang's picture
Upload folder using huggingface_hub
f2f1526 verified
Global:
use_gpu: true
epoch_num: 200
log_smooth_window: 10
print_batch_step: 10
save_model_dir: ./output/ser_vi_layoutxlm_xfund_zh
save_epoch_step: 2000
eval_batch_step:
- 0
- 19
cal_metric_during_train: false
save_inference_dir: null
use_visualdl: false
seed: 2022
infer_img: ppstructure/docs/kie/input/zh_val_42.jpg
d2s_train_image_shape:
- 3
- 224
- 224
save_res_path: /content/PaddleOCR/output/ser_layoutxlm_xfund_zh/res
kie_rec_model_dir: null
kie_det_model_dir: null
amp_custom_white_list:
- scale
- concat
- elementwise_add
Architecture:
model_type: kie
algorithm: LayoutXLM
Transform: null
Backbone:
name: LayoutXLMForSer
pretrained: true
checkpoints: null
mode: vi
num_classes: 79
Loss:
name: VQASerTokenLayoutLMLoss
num_classes: 79
key: backbone_out
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
lr:
name: Linear
learning_rate: 5.0e-05
epochs: 200
warmup_epoch: 2
regularizer:
name: L2
factor: 0.0
PostProcess:
name: VQASerTokenLayoutLMPostProcess
class_path: /content/PaddleOCR/dataset/class_list_jmfunsd.txt
Metric:
name: VQASerTokenMetric
main_indicator: hmean
Train:
dataset:
name: SimpleDataSet
data_dir: /content/PaddleOCR/dataset/training_data/images
label_file_list:
- /content/PaddleOCR/dataset/train.json
ratio_list:
- 1.0
transforms:
- DecodeImage:
img_mode: RGB
channel_first: false
- VQATokenLabelEncode:
contains_re: false
algorithm: LayoutXLM
class_path: /content/PaddleOCR/dataset/class_list_jmfunsd.txt
use_textline_bbox_info: true
order_method: tb-yx
- VQATokenPad:
max_seq_len: 512
return_attention_mask: true
- VQASerTokenChunk:
max_seq_len: 512
- Resize:
size:
- 224
- 224
- NormalizeImage:
scale: 1
mean:
- 123.675
- 116.28
- 103.53
std:
- 58.395
- 57.12
- 57.375
order: hwc
- ToCHWImage: null
- KeepKeys:
keep_keys:
- input_ids
- bbox
- attention_mask
- token_type_ids
- image
- labels
loader:
shuffle: true
drop_last: false
batch_size_per_card: 8
num_workers: 4
Eval:
dataset:
name: SimpleDataSet
data_dir: /content/PaddleOCR/dataset/testing_data/images
label_file_list:
- /content/PaddleOCR/dataset/test.json
transforms:
- DecodeImage:
img_mode: RGB
channel_first: false
- VQATokenLabelEncode:
contains_re: false
algorithm: LayoutXLM
class_path: /content/PaddleOCR/dataset/class_list_jmfunsd.txt
use_textline_bbox_info: true
order_method: tb-yx
- VQATokenPad:
max_seq_len: 512
return_attention_mask: true
- VQASerTokenChunk:
max_seq_len: 512
- Resize:
size:
- 224
- 224
- NormalizeImage:
scale: 1
mean:
- 123.675
- 116.28
- 103.53
std:
- 58.395
- 57.12
- 57.375
order: hwc
- ToCHWImage: null
- KeepKeys:
keep_keys:
- input_ids
- bbox
- attention_mask
- token_type_ids
- image
- labels
loader:
shuffle: false
drop_last: false
batch_size_per_card: 8
num_workers: 4
profiler_options: null