wanderkid commited on Oct 11

Commit

2279299

•

1 Parent(s): c2a8540

Add PDF-Extract-Kit-1.0 models

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
README.md +26 -3
models/Layout/LayoutLMv3/config.json +33 -0
models/Layout/LayoutLMv3/model_final.pth +3 -0
models/Layout/YOLO/yolov10l_ft.pt +3 -0
models/MFD/YOLO/yolo_v8_ft.pt +3 -0
models/MFR/UniMERNet/README.md +6 -0
models/MFR/UniMERNet/config.json +193 -0
models/MFR/UniMERNet/preprocessor_config.json +36 -0
models/MFR/UniMERNet/pytorch_model.bin +3 -0
models/MFR/UniMERNet/tokenizer.json +0 -0
models/MFR/UniMERNet/tokenizer_config.json +205 -0
models/MFR/unimernet_base/.mdl +0 -0
models/MFR/unimernet_base/.msc +0 -0
models/MFR/unimernet_base/.mv +1 -0
models/MFR/unimernet_base/README.md +48 -0
models/MFR/unimernet_base/config.json +193 -0
models/MFR/unimernet_base/configuration.json +1 -0
models/MFR/unimernet_base/preprocessor_config.json +36 -0
models/MFR/unimernet_base/pytorch_model.pth +3 -0
models/MFR/unimernet_base/tokenizer.json +0 -0
models/MFR/unimernet_base/tokenizer_config.json +205 -0
models/MFR/unimernet_base/unimernet_base.yaml +46 -0
models/MFR/unimernet_small/.mdl +0 -0
models/MFR/unimernet_small/.msc +0 -0
models/MFR/unimernet_small/.mv +1 -0
models/MFR/unimernet_small/README.md +47 -0
models/MFR/unimernet_small/config.json +193 -0
models/MFR/unimernet_small/configuration.json +1 -0
models/MFR/unimernet_small/preprocessor_config.json +36 -0
models/MFR/unimernet_small/pytorch_model.pth +3 -0
models/MFR/unimernet_small/tokenizer.json +0 -0
models/MFR/unimernet_small/tokenizer_config.json +205 -0
models/MFR/unimernet_small/unimernet_small.yaml +46 -0
models/MFR/unimernet_tiny/.mdl +0 -0
models/MFR/unimernet_tiny/.msc +0 -0
models/MFR/unimernet_tiny/.mv +1 -0
models/MFR/unimernet_tiny/README.md +48 -0
models/MFR/unimernet_tiny/config.json +193 -0
models/MFR/unimernet_tiny/configuration.json +1 -0
models/MFR/unimernet_tiny/preprocessor_config.json +36 -0
models/MFR/unimernet_tiny/pytorch_model.pth +3 -0
models/MFR/unimernet_tiny/tokenizer.json +0 -0
models/MFR/unimernet_tiny/tokenizer_config.json +205 -0
models/MFR/unimernet_tiny/unimernet_tiny.yaml +46 -0
models/README.md +75 -0
models/TabRec/StructEqTable/config.json +36 -0
models/TabRec/StructEqTable/generation_config.json +8 -0
models/TabRec/StructEqTable/model.safetensors +3 -0
models/TabRec/StructEqTable/preprocessor_config.json +12 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.pdiparams filter=lfs diff=lfs merge=lfs -text
+*.pdmodel filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,26 @@
----
-license: apache-2.0
----

+[MinerU](https://github.com/opendatalab/MinerU)项目中使用的模型，欢迎下载使用。
+模型使用请参考[PDF-Extract-Kit](https://github.com/opendatalab/PDF-Extract-Kit)项目。
+### SDK Download
+```bash
+# First, install the ModelScope library using pip:
+pip install modelscope
+```
+```python
+# Use the following Python code to download the model using the ModelScope SDK:
+from modelscope import snapshot_download
+model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
+```
+### Git Download
+Alternatively, you can use Git to clone the model repository from ModelScope:
+```bash
+git clone https://www.modelscope.cn/opendatalab/PDF-Extract-Kit.git
+```
+---
+license: apache-2.0
+---

models/Layout/LayoutLMv3/config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "coordinate_size": 128,
+  "eos_token_id": 2,
+  "has_relative_attention_bias": true,
+  "has_spatial_attention_bias": true,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "input_size": 224,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_2d_position_embeddings": 1024,
+  "max_position_embeddings": 514,
+  "max_rel_2d_pos": 256,
+  "max_rel_pos": 128,
+  "model_type": "layoutlmv3",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "rel_2d_pos_bins": 64,
+  "rel_pos_bins": 32,
+  "second_input_size": 112,
+  "shape_size": 128,
+  "torch_dtype": "float32",
+  "transformers_version": "4.12.5",
+  "type_vocab_size": 1,
+  "visual_embed": true,
+  "vocab_size": 250002
+}

models/Layout/LayoutLMv3/model_final.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd2402ed4dd01de36c659a1e7dad35541adb2265df8599f0a071b1cb66bbbc5c
+size 564052519

models/Layout/YOLO/yolov10l_ft.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fad2c4c27428e5525460321a099327bb9810880934ee9e3e1a68d25d22887cd6
+size 52275810

models/MFD/YOLO/yolo_v8_ft.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:41029d5abb9b0b6df825ecaf8adf9151762ea0688ac0fc4ea0ea34ab9a5808fc
+size 349867002

models/MFR/UniMERNet/README.md ADDED Viewed

	@@ -0,0 +1,6 @@

+---
+license: apache-2.0
+---
+UniMERNet: A Universal Network for Mathematical Expression Recognition in Real-World Scenarios.
+Visit our GitHub repository at [unimernet](https://github.com/opendatalab/unimernet) for more information.

models/MFR/UniMERNet/config.json ADDED Viewed

	@@ -0,0 +1,193 @@

+{
+  "_name_or_path": "unimernet/checkpoint-180000",
+  "architectures": [
+    "VisionEncoderDecoderModel"
+  ],
+  "decoder": {
+    "_name_or_path": "",
+    "activation_dropout": 0.0,
+    "activation_function": "gelu",
+    "add_cross_attention": true,
+    "add_final_layer_norm": true,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "classifier_dropout": 0.0,
+    "cross_attention_hidden_size": null,
+    "d_model": 1024,
+    "decoder_attention_heads": 16,
+    "decoder_ffn_dim": 4096,
+    "decoder_layerdrop": 0.0,
+    "decoder_layers": 8,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": false,
+    "encoder_attention_heads": 16,
+    "encoder_ffn_dim": 4096,
+    "encoder_layerdrop": 0.0,
+    "encoder_layers": 12,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": 2,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "init_std": 0.02,
+    "is_decoder": true,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 1536,
+    "min_length": 0,
+    "model_type": "mbart",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "scale_embedding": true,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": false,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": true,
+    "vocab_size": 50000
+  },
+  "decoder_start_token_id": 0,
+  "encoder": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_probs_dropout_prob": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "depths": [
+      2,
+      2,
+      14,
+      2
+    ],
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "drop_path_rate": 0.1,
+    "early_stopping": false,
+    "embed_dim": 128,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.0,
+    "hidden_size": 1024,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": [
+      420,
+      420
+    ],
+    "initializer_range": 0.02,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "mlp_ratio": 4.0,
+    "model_type": "donut-swin",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_heads": [
+      4,
+      8,
+      16,
+      32
+    ],
+    "num_layers": 4,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 4,
+    "path_norm": true,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "qkv_bias": true,
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_2d_embeddings": false,
+    "use_absolute_embeddings": false,
+    "use_bfloat16": false,
+    "window_size": 5
+  },
+  "is_encoder_decoder": true,
+  "model_type": "vision-encoder-decoder",
+  "pad_token_id": 1,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.0"
+}

models/MFR/UniMERNet/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "do_align_long_axis": false,
+  "do_normalize": false,
+  "do_pad": false,
+  "do_rescale": false,
+  "do_resize": false,
+  "do_thumbnail": false,
+  "feature_extractor_type": "DonutFeatureExtractor",
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "VariableDonutImageProcessor",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "max_size": {
+    "height": 192,
+    "width": 672
+  },
+  "patch_size": [
+    4,
+    4
+  ],
+  "processor_class": "VariableDonutProcessor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": [
+    192,
+    672
+  ],
+  "train": false
+}

models/MFR/UniMERNet/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c80486e05b8cfbb48324a8802a2909221d219dd46aa6a936b92f2225555935e
+size 3750208149

models/MFR/UniMERNet/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/MFR/UniMERNet/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,205 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[START_REF]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "[END_REF]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "[IMAGE]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<fragments>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "</fragments>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<work>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "</work>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "[START_SUP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "[END_SUP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "[START_SUB]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "[END_SUB]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "[START_DNA]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "[END_DNA]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "[START_AMINO]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "[END_AMINO]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "19": {
+      "content": "[START_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "20": {
+      "content": "[END_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "21": {
+      "content": "[START_I_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "22": {
+      "content": "[END_I_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "max_length": 4096,
+  "model_max_length": 768,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "processor_class": "VariableDonutProcessor",
+  "stride": 0,
+  "tokenizer_class": "NougatTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>",
+  "vocab_file": null
+}

models/MFR/unimernet_base/.mdl ADDED Viewed

Binary file (47 Bytes). View file

models/MFR/unimernet_base/.msc ADDED Viewed

Binary file (523 Bytes). View file

models/MFR/unimernet_base/.mv ADDED Viewed

	@@ -0,0 +1 @@


1	+ Revision:master,CreatedAt:1725608506

models/MFR/unimernet_base/README.md ADDED Viewed

	@@ -0,0 +1,48 @@

+---
+license: apache-2.0
+---
+## UniMERNet: A Universal Network for Mathematical Expression Recognition in Real-World Scenarios.
+Visit our GitHub repository at [UniMERNet](https://github.com/opendatalab/unimernet) for more information.
+## 引用
+```
+@misc{wang2024unimernetuniversalnetworkrealworld,
+      title={UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition},
+      author={Bin Wang and Zhuangcheng Gu and Guang Liang and Chao Xu and Bo Zhang and Botian Shi and Conghui He},
+      year={2024},
+      eprint={2404.15254},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV},
+      url={https://arxiv.org/abs/2404.15254},
+}
+@misc{wang2024cdmreliablemetricfair,
+      title={CDM: A Reliable Metric for Fair and Accurate Formula Recognition Evaluation},
+      author={Bin Wang and Fan Wu and Linke Ouyang and Zhuangcheng Gu and Rui Zhang and Renqiu Xia and Bo Zhang and Conghui He},
+      year={2024},
+      eprint={2409.03643},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV},
+      url={https://arxiv.org/abs/2409.03643},
+}
+@misc{he2024opendatalabempoweringgeneralartificial,
+      title={OpenDataLab: Empowering General Artificial Intelligence with Open Datasets},
+      author={Conghui He and Wei Li and Zhenjiang Jin and Chao Xu and Bin Wang and Dahua Lin},
+      year={2024},
+      eprint={2407.13773},
+      archivePrefix={arXiv},
+      primaryClass={cs.DL},
+      url={https://arxiv.org/abs/2407.13773},
+}
+```
+```
+## MD5 checksums
+```
+97f4867b4ff4e9a96c8daba8aaa793b4  tokenizer_config.json
+351652071425d3d36a634ccc8efb22e8  tokenizer.json
+ff4391872dad6688f21ed140009d817b  pytorch_model.pth
+```

models/MFR/unimernet_base/config.json ADDED Viewed

	@@ -0,0 +1,193 @@

+{
+  "_name_or_path": "unimernet/checkpoint-300000",
+  "architectures": [
+    "VisionEncoderDecoderModel"
+  ],
+  "decoder": {
+    "_name_or_path": "",
+    "activation_dropout": 0.0,
+    "activation_function": "gelu",
+    "add_cross_attention": true,
+    "add_final_layer_norm": true,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "classifier_dropout": 0.0,
+    "cross_attention_hidden_size": null,
+    "d_model": 1024,
+    "decoder_attention_heads": 16,
+    "decoder_ffn_dim": 4096,
+    "decoder_layerdrop": 0.0,
+    "decoder_layers": 8,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": false,
+    "encoder_attention_heads": 16,
+    "encoder_ffn_dim": 4096,
+    "encoder_layerdrop": 0.0,
+    "encoder_layers": 12,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": 2,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "init_std": 0.02,
+    "is_decoder": true,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 1536,
+    "min_length": 0,
+    "model_type": "mbart",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "scale_embedding": true,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": false,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": true,
+    "vocab_size": 50000
+  },
+  "decoder_start_token_id": 0,
+  "encoder": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_probs_dropout_prob": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "depths": [
+      6,
+      6,
+      6,
+      6
+    ],
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "drop_path_rate": 0.1,
+    "early_stopping": false,
+    "embed_dim": 128,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.0,
+    "hidden_size": 1024,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": [
+      420,
+      420
+    ],
+    "initializer_range": 0.02,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "mlp_ratio": 4.0,
+    "model_type": "donut-swin",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_heads": [
+      4,
+      8,
+      16,
+      32
+    ],
+    "num_layers": 4,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 4,
+    "path_norm": true,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "qkv_bias": true,
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_2d_embeddings": false,
+    "use_absolute_embeddings": false,
+    "use_bfloat16": false,
+    "window_size": 5
+  },
+  "is_encoder_decoder": true,
+  "model_type": "vision-encoder-decoder",
+  "pad_token_id": 1,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.36.0"
+}

models/MFR/unimernet_base/configuration.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"framework":"Pytorch","task":"ocr-recognition"}

models/MFR/unimernet_base/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "do_align_long_axis": false,
+  "do_normalize": false,
+  "do_pad": false,
+  "do_rescale": false,
+  "do_resize": false,
+  "do_thumbnail": false,
+  "feature_extractor_type": "DonutFeatureExtractor",
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "VariableDonutImageProcessor",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "max_size": {
+    "height": 192,
+    "width": 672
+  },
+  "patch_size": [
+    4,
+    4
+  ],
+  "processor_class": "VariableDonutProcessor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": [
+    192,
+    672
+  ],
+  "train": false
+}

models/MFR/unimernet_base/pytorch_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16cd0891233cfee3c11215a7b87306f160f7e7f3f52091a6253751c149a8c180
+size 1300760949

models/MFR/unimernet_base/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/MFR/unimernet_base/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,205 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[START_REF]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "[END_REF]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "[IMAGE]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<fragments>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "</fragments>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<work>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "</work>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "[START_SUP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "[END_SUP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "[START_SUB]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "[END_SUB]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "[START_DNA]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "[END_DNA]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "[START_AMINO]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "[END_AMINO]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "19": {
+      "content": "[START_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "20": {
+      "content": "[END_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "21": {
+      "content": "[START_I_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "22": {
+      "content": "[END_I_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "max_length": 4096,
+  "model_max_length": 768,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "processor_class": "VariableDonutProcessor",
+  "stride": 0,
+  "tokenizer_class": "NougatTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>",
+  "vocab_file": null
+}

models/MFR/unimernet_base/unimernet_base.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+model:
+  arch: unimernet
+  model_type: unimernet
+  model_config:
+    model_name: ./models/unimernet_base
+    max_seq_len: 1536
+  load_pretrained: True
+  pretrained: './models/unimernet_base/pytorch_model.pth'
+  tokenizer_config:
+    path: ./models/unimernet_base
+datasets:
+  formula_rec_eval:
+    vis_processor:
+      eval:
+        name: "formula_image_eval"
+        image_size:
+          - 192
+          - 672
+run:
+  runner: runner_iter
+  task: unimernet_train
+  batch_size_train: 64
+  batch_size_eval: 64
+  num_workers: 1
+  iters_per_inner_epoch: 2000
+  max_iters: 60000
+  seed: 42
+  output_dir: "../output/demo"
+  evaluate: True
+  test_splits: [ "eval" ]
+  device: "cuda"
+  world_size: 1
+  dist_url: "env://"
+  distributed: True
+  distributed_type: ddp  # or fsdp when train llm
+  generate_cfg:
+    temperature: 0.0

models/MFR/unimernet_small/.mdl ADDED Viewed

Binary file (48 Bytes). View file

models/MFR/unimernet_small/.msc ADDED Viewed

Binary file (524 Bytes). View file

models/MFR/unimernet_small/.mv ADDED Viewed

	@@ -0,0 +1 @@


1	+ Revision:master,CreatedAt:1725608428

models/MFR/unimernet_small/README.md ADDED Viewed

	@@ -0,0 +1,47 @@

+---
+license: apache-2.0
+---
+## UniMERNet: A Universal Network for Mathematical Expression Recognition in Real-World Scenarios.
+Visit our GitHub repository at [UniMERNet](https://github.com/opendatalab/unimernet) for more information.
+## 引用
+```
+@misc{wang2024unimernetuniversalnetworkrealworld,
+      title={UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition},
+      author={Bin Wang and Zhuangcheng Gu and Guang Liang and Chao Xu and Bo Zhang and Botian Shi and Conghui He},
+      year={2024},
+      eprint={2404.15254},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV},
+      url={https://arxiv.org/abs/2404.15254},
+}
+@misc{wang2024cdmreliablemetricfair,
+      title={CDM: A Reliable Metric for Fair and Accurate Formula Recognition Evaluation},
+      author={Bin Wang and Fan Wu and Linke Ouyang and Zhuangcheng Gu and Rui Zhang and Renqiu Xia and Bo Zhang and Conghui He},
+      year={2024},
+      eprint={2409.03643},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV},
+      url={https://arxiv.org/abs/2409.03643},
+}
+@misc{he2024opendatalabempoweringgeneralartificial,
+      title={OpenDataLab: Empowering General Artificial Intelligence with Open Datasets},
+      author={Conghui He and Wei Li and Zhenjiang Jin and Chao Xu and Bin Wang and Dahua Lin},
+      year={2024},
+      eprint={2407.13773},
+      archivePrefix={arXiv},
+      primaryClass={cs.DL},
+      url={https://arxiv.org/abs/2407.13773},
+}
+```
+## MD5 checksums
+```
+97f4867b4ff4e9a96c8daba8aaa793b4  tokenizer_config.json
+351652071425d3d36a634ccc8efb22e8  tokenizer.json
+430e426354e71624fb096c5c7ad90a78  pytorch_model.pth
+```

models/MFR/unimernet_small/config.json ADDED Viewed

	@@ -0,0 +1,193 @@

+{
+  "_name_or_path": "unimernet/checkpoint-300000",
+  "architectures": [
+    "VisionEncoderDecoderModel"
+  ],
+  "decoder": {
+    "_name_or_path": "",
+    "activation_dropout": 0.0,
+    "activation_function": "gelu",
+    "add_cross_attention": true,
+    "add_final_layer_norm": true,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "classifier_dropout": 0.0,
+    "cross_attention_hidden_size": null,
+    "d_model": 768,
+    "decoder_attention_heads": 16,
+    "decoder_ffn_dim": 3072,
+    "decoder_layerdrop": 0.0,
+    "decoder_layers": 8,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": false,
+    "encoder_attention_heads": 16,
+    "encoder_ffn_dim": 3072,
+    "encoder_layerdrop": 0.0,
+    "encoder_layers": 12,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": 2,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "init_std": 0.02,
+    "is_decoder": true,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 1536,
+    "min_length": 0,
+    "model_type": "mbart",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "scale_embedding": true,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": false,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": true,
+    "vocab_size": 50000
+  },
+  "decoder_start_token_id": 0,
+  "encoder": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_probs_dropout_prob": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "depths": [
+      6,
+      6,
+      6,
+      6
+    ],
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "drop_path_rate": 0.1,
+    "early_stopping": false,
+    "embed_dim": 96,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.0,
+    "hidden_size": 768,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": [
+      420,
+      420
+    ],
+    "initializer_range": 0.02,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "mlp_ratio": 4.0,
+    "model_type": "donut-swin",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_heads": [
+      3,
+      6,
+      12,
+      24
+    ],
+    "num_layers": 4,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 4,
+    "path_norm": true,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "qkv_bias": true,
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_2d_embeddings": false,
+    "use_absolute_embeddings": false,
+    "use_bfloat16": false,
+    "window_size": 5
+  },
+  "is_encoder_decoder": true,
+  "model_type": "vision-encoder-decoder",
+  "pad_token_id": 1,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.36.0"
+}

models/MFR/unimernet_small/configuration.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"framework":"Pytorch","task":"ocr-recognition"}

models/MFR/unimernet_small/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "do_align_long_axis": false,
+  "do_normalize": false,
+  "do_pad": false,
+  "do_rescale": false,
+  "do_resize": false,
+  "do_thumbnail": false,
+  "feature_extractor_type": "DonutFeatureExtractor",
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "VariableDonutImageProcessor",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "max_size": {
+    "height": 192,
+    "width": 672
+  },
+  "patch_size": [
+    4,
+    4
+  ],
+  "processor_class": "VariableDonutProcessor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": [
+    192,
+    672
+  ],
+  "train": false
+}

models/MFR/unimernet_small/pytorch_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa54b0a8126bb60060bc90818ce20a5ca1b5dd5d7da5c0983579f5c3a2cc90ea
+size 810284404

models/MFR/unimernet_small/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/MFR/unimernet_small/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,205 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[START_REF]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "[END_REF]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "[IMAGE]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<fragments>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "</fragments>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<work>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "</work>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "[START_SUP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "[END_SUP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "[START_SUB]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "[END_SUB]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "[START_DNA]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "[END_DNA]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "[START_AMINO]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "[END_AMINO]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "19": {
+      "content": "[START_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "20": {
+      "content": "[END_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "21": {
+      "content": "[START_I_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "22": {
+      "content": "[END_I_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "max_length": 4096,
+  "model_max_length": 768,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "processor_class": "VariableDonutProcessor",
+  "stride": 0,
+  "tokenizer_class": "NougatTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>",
+  "vocab_file": null
+}

models/MFR/unimernet_small/unimernet_small.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+model:
+  arch: unimernet
+  model_type: unimernet
+  model_config:
+    model_name: ./models/unimernet_small
+    max_seq_len: 1536
+  load_pretrained: True
+  pretrained: './models/unimernet_small/pytorch_model.pth'
+  tokenizer_config:
+    path: ./models/unimernet_small
+datasets:
+  formula_rec_eval:
+    vis_processor:
+      eval:
+        name: "formula_image_eval"
+        image_size:
+          - 192
+          - 672
+run:
+  runner: runner_iter
+  task: unimernet_train
+  batch_size_train: 64
+  batch_size_eval: 64
+  num_workers: 1
+  iters_per_inner_epoch: 2000
+  max_iters: 60000
+  seed: 42
+  output_dir: "../output/demo"
+  evaluate: True
+  test_splits: [ "eval" ]
+  device: "cuda"
+  world_size: 1
+  dist_url: "env://"
+  distributed: True
+  distributed_type: ddp  # or fsdp when train llm
+  generate_cfg:
+    temperature: 0.0

models/MFR/unimernet_tiny/.mdl ADDED Viewed

Binary file (47 Bytes). View file

models/MFR/unimernet_tiny/.msc ADDED Viewed

Binary file (523 Bytes). View file

models/MFR/unimernet_tiny/.mv ADDED Viewed

	@@ -0,0 +1 @@


1	+ Revision:master,CreatedAt:1725608470

models/MFR/unimernet_tiny/README.md ADDED Viewed

	@@ -0,0 +1,48 @@

+---
+license: apache-2.0
+---
+## UniMERNet: A Universal Network for Mathematical Expression Recognition in Real-World Scenarios.
+Visit our GitHub repository at [UniMERNet](https://github.com/opendatalab/unimernet) for more information.
+## 引用
+```
+@misc{wang2024unimernetuniversalnetworkrealworld,
+      title={UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition},
+      author={Bin Wang and Zhuangcheng Gu and Guang Liang and Chao Xu and Bo Zhang and Botian Shi and Conghui He},
+      year={2024},
+      eprint={2404.15254},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV},
+      url={https://arxiv.org/abs/2404.15254},
+}
+@misc{wang2024cdmreliablemetricfair,
+      title={CDM: A Reliable Metric for Fair and Accurate Formula Recognition Evaluation},
+      author={Bin Wang and Fan Wu and Linke Ouyang and Zhuangcheng Gu and Rui Zhang and Renqiu Xia and Bo Zhang and Conghui He},
+      year={2024},
+      eprint={2409.03643},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV},
+      url={https://arxiv.org/abs/2409.03643},
+}
+@misc{he2024opendatalabempoweringgeneralartificial,
+      title={OpenDataLab: Empowering General Artificial Intelligence with Open Datasets},
+      author={Conghui He and Wei Li and Zhenjiang Jin and Chao Xu and Bin Wang and Dahua Lin},
+      year={2024},
+      eprint={2407.13773},
+      archivePrefix={arXiv},
+      primaryClass={cs.DL},
+      url={https://arxiv.org/abs/2407.13773},
+}
+```
+```
+## MD5 checksums
+```
+97f4867b4ff4e9a96c8daba8aaa793b4  tokenizer_config.json
+351652071425d3d36a634ccc8efb22e8  tokenizer.json
+72b53a2152af43a57f8d5eebf8e31562  pytorch_model.pth
+```

models/MFR/unimernet_tiny/config.json ADDED Viewed

	@@ -0,0 +1,193 @@

+{
+  "_name_or_path": "unimernet/checkpoint-300000",
+  "architectures": [
+    "VisionEncoderDecoderModel"
+  ],
+  "decoder": {
+    "_name_or_path": "",
+    "activation_dropout": 0.0,
+    "activation_function": "gelu",
+    "add_cross_attention": true,
+    "add_final_layer_norm": true,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "classifier_dropout": 0.0,
+    "cross_attention_hidden_size": null,
+    "d_model": 512,
+    "decoder_attention_heads": 16,
+    "decoder_ffn_dim": 2048,
+    "decoder_layerdrop": 0.0,
+    "decoder_layers": 8,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": false,
+    "encoder_attention_heads": 16,
+    "encoder_ffn_dim": 2048,
+    "encoder_layerdrop": 0.0,
+    "encoder_layers": 12,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": 2,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "init_std": 0.02,
+    "is_decoder": true,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 1536,
+    "min_length": 0,
+    "model_type": "mbart",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "scale_embedding": true,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": false,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": true,
+    "vocab_size": 50000
+  },
+  "decoder_start_token_id": 0,
+  "encoder": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_probs_dropout_prob": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "depths": [
+      6,
+      6,
+      6,
+      6
+    ],
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "drop_path_rate": 0.1,
+    "early_stopping": false,
+    "embed_dim": 64,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.0,
+    "hidden_size": 512,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": [
+      420,
+      420
+    ],
+    "initializer_range": 0.02,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "mlp_ratio": 4.0,
+    "model_type": "donut-swin",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_heads": [
+      2,
+      4,
+      8,
+      16
+    ],
+    "num_layers": 4,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 4,
+    "path_norm": true,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "qkv_bias": true,
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_2d_embeddings": false,
+    "use_absolute_embeddings": false,
+    "use_bfloat16": false,
+    "window_size": 5
+  },
+  "is_encoder_decoder": true,
+  "model_type": "vision-encoder-decoder",
+  "pad_token_id": 1,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.36.0"
+}

models/MFR/unimernet_tiny/configuration.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"framework":"Pytorch","task":"ocr-recognition"}

models/MFR/unimernet_tiny/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "do_align_long_axis": false,
+  "do_normalize": false,
+  "do_pad": false,
+  "do_rescale": false,
+  "do_resize": false,
+  "do_thumbnail": false,
+  "feature_extractor_type": "DonutFeatureExtractor",
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "VariableDonutImageProcessor",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "max_size": {
+    "height": 192,
+    "width": 672
+  },
+  "patch_size": [
+    4,
+    4
+  ],
+  "processor_class": "VariableDonutProcessor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": [
+    192,
+    672
+  ],
+  "train": false
+}

models/MFR/unimernet_tiny/pytorch_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6f7608624e2d7549c7f0f05fcfbe073ae521328cf70f1d46374d96f9881d7371
+size 430075701

models/MFR/unimernet_tiny/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/MFR/unimernet_tiny/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,205 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[START_REF]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "[END_REF]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "[IMAGE]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<fragments>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "</fragments>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<work>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "</work>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "[START_SUP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "[END_SUP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "[START_SUB]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "[END_SUB]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "[START_DNA]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "[END_DNA]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "[START_AMINO]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "[END_AMINO]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "19": {
+      "content": "[START_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "20": {
+      "content": "[END_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "21": {
+      "content": "[START_I_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "22": {
+      "content": "[END_I_SMILES]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "max_length": 4096,
+  "model_max_length": 768,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "processor_class": "VariableDonutProcessor",
+  "stride": 0,
+  "tokenizer_class": "NougatTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>",
+  "vocab_file": null
+}

models/MFR/unimernet_tiny/unimernet_tiny.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+model:
+  arch: unimernet
+  model_type: unimernet
+  model_config:
+    model_name: ./models/unimernet_tiny
+    max_seq_len: 1536
+  load_pretrained: True
+  pretrained: './models/unimernet_tiny/pytorch_model.pth'
+  tokenizer_config:
+    path: ./models/unimernet_tiny
+datasets:
+  formula_rec_eval:
+    vis_processor:
+      eval:
+        name: "formula_image_eval"
+        image_size:
+          - 192
+          - 672
+run:
+  runner: runner_iter
+  task: unimernet_train
+  batch_size_train: 64
+  batch_size_eval: 64
+  num_workers: 1
+  iters_per_inner_epoch: 2000
+  max_iters: 60000
+  seed: 42
+  output_dir: "../output/demo"
+  evaluate: True
+  test_splits: [ "eval" ]
+  device: "cuda"
+  world_size: 1
+  dist_url: "env://"
+  distributed: True
+  distributed_type: ddp  # or fsdp when train llm
+  generate_cfg:
+    temperature: 0.0

models/README.md ADDED Viewed

	@@ -0,0 +1,75 @@

+Put [model files]() here:
+```
+models
+├── Layout
+│   ├── LayoutLMv3
+│   │   ├── config.json
+│   │   └── model_final.pth
+│   └── YOLO
+│       └── yolov10l_ft.pt
+├── MFD
+│   └── YOLO
+│       └── yolo_v8_ft.pt
+├── MFR
+│   ├── UniMERNet
+│   │   ├── README.md
+│   │   ├── config.json
+│   │   ├── preprocessor_config.json
+│   │   ├── pytorch_model.bin
+│   │   ├── tokenizer.json
+│   │   └── tokenizer_config.json
+│   ├── unimernet_base
+│   │   ├── README.md
+│   │   ├── config.json
+│   │   ├── configuration.json
+│   │   ├── preprocessor_config.json
+│   │   ├── pytorch_model.pth
+│   │   ├── tokenizer.json
+│   │   ├── tokenizer_config.json
+│   │   └── unimernet_base.yaml
+│   ├── unimernet_small
+│   │   ├── README.md
+│   │   ├── config.json
+│   │   ├── configuration.json
+│   │   ├── preprocessor_config.json
+│   │   ├── pytorch_model.pth
+│   │   ├── tokenizer.json
+│   │   ├── tokenizer_config.json
+│   │   └── unimernet_small.yaml
+│   └── unimernet_tiny
+│       ├── README.md
+│       ├── config.json
+│       ├── configuration.json
+│       ├── preprocessor_config.json
+│       ├── pytorch_model.pth
+│       ├── tokenizer.json
+│       ├── tokenizer_config.json
+│       └── unimernet_tiny.yaml
+├── README.md
+└── TabRec
+    ├── StructEqTable
+    │   ├── config.json
+    │   ├── generation_config.json
+    │   ├── model.safetensors
+    │   ├── preprocessor_config.json
+    │   ├── special_tokens_map.json
+    │   ├── spiece.model
+    │   ├── tokenizer.json
+    │   └── tokenizer_config.json
+    └── TableMaster
+        ├── ch_PP-OCRv3_det_infer
+        │   ├── inference.pdiparams
+        │   ├── inference.pdiparams.info
+        │   └── inference.pdmodel
+        ├── ch_PP-OCRv3_rec_infer
+        │   ├── inference.pdiparams
+        │   ├── inference.pdiparams.info
+        │   └── inference.pdmodel
+        ├── ppocr_keys_v1.txt
+        ├── table_master_structure_dict.txt
+        └── table_structure_tablemaster_infer
+            ├── inference.pdiparams
+            ├── inference.pdiparams.info
+            └── inference.pdmodel
+```

models/TabRec/StructEqTable/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "/cpfs01/user/zhouhongbin/code/StructEqTable-deepspeed/ckpt/pretrained/pix2struct-base-zh",
+  "architectures": [
+    "Pix2StructForConditionalGeneration"
+  ],
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "is_vqa": false,
+  "model_type": "pix2struct",
+  "pad_token_id": 0,
+  "text_config": {
+    "dropout_rate": 0.2,
+    "encoder_hidden_size": 768,
+    "initializer_range": 0.02,
+    "model_type": "pix2struct_text_model",
+    "vocab_size": 77078
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.37.2",
+  "use_cache": false,
+  "vision_config": {
+    "attention_dropout": 0.2,
+    "dropout_rate": 0.2,
+    "hidden_dropout_prob": 0.2,
+    "initializer_range": 0.02,
+    "layer_norm_bias": false,
+    "model_type": "pix2struct_vision_model",
+    "num_channels": 3,
+    "patch_size": 16,
+    "projection_dim": 768
+  }
+}

models/TabRec/StructEqTable/generation_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.37.2",
+  "use_cache": false
+}

models/TabRec/StructEqTable/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66baeb5e3b8e13f7e30cdf998c6724af7a97e8a2a3c78ece70666b6e2af278ce
+size 1294046176

models/TabRec/StructEqTable/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "image_processor_type": "Pix2StructImageProcessor",
+  "is_vqa": false,
+  "max_patches": 4096,
+  "patch_size": {
+    "height": 16,
+    "width": 16
+  },
+  "processor_class": "Pix2StructProcessor"
+}