h2ovl-mississippi-benchmarks / 0926-OCRBench-opensource.csv
Shanshan Wang
updated default view and the model name
5c51e60
raw
history blame
1.71 kB
Rank,Model,Param (B),Language Model,Vision Model,OCRBench,Text Recognition,Scene Text-centric VQA,Document Oriented VQA,KIE,Handwritten Math Expression Recognition,
,minicpm_v2.6,8,Qwen2-7B,SigLIP-400M,836,259,184,169,182,42,
,MiniMonkey,2.2,,,792,250,178,126,171,67,
7,GLM-4v-9B,9,glm-4-9b,EVA-02-5B,,,,,,,
8,CogVLM2-19B-Chat,19,Llama-3-8B-Instruct,EVA2-CLIP-E,,,,,,,
5,InternVL2-4B,4,Phi-3,InternViT-300M,785,236,170,154,158,67,
10,MiniCPM-Llama3-V2.5,8,Llama-3-8B-Instruct,SigLIP-400M,725,221,171,125,155,53,
11,InternVL-Chat-V1.5,26,InternLM2-20B,InternViT-6B,722,236,181,149,153,3,
6,InternVL2-2B,2,InternLM2-1.8B,InternViT-300M,785,246,170,133,167,69,
,H2OVL-Mississippi-2B,2,H2O-Danube2 1.8B,InternViT-300M-448,782,252,171,140,166,53,
9,InternVL2-1B,0.8,Qwen2-0.5B,InternViT-300M,755,242,164,127,150,72,
17,WeMM,7,InternLM2-7B,SigLIP-400M,,,,,,,
18,IDEFICS2-8B,8,Mistral-7B,SigLIP-400M,,,,,,,
19,Cambrian-8B,8,Vicuna-v1.5-7B,CLIP ViT-L/14,,,,,,,
,H2OVL-Mississippi-0.8B,0.8,H2O-Danube3 0.5B,InternViT-300M-448,751,274,162,112,152,51,
14,Mini-InternVL-Chat-2B-V1.5,2,InternLM2-1.8B,InternViT-300M,652,222,161,126,139,4,
15,Mini-InternVL-Chat-4B-V1.5,4,Phi-3,InternViT-300M,640,193,160,146,135,6,
20,PaliGemma-3B-mix-448,3,Gemma-2B,SigLIP-400M,613,242,165,88,118,0,
21,MiniCPM-V-2,2.8,MiniCPM-2.4B,SigLIP-400M,596,243,168,100,85,0,
21,MiniCPM-V,3,MiniCPM-2.4B,SigLIP-400M,596,243,168,100,85,0,
16,Phi-3-Vision,4.2,Phi-3,CLIP ViT-L/14,640,196,159,137,148,0,
,Internvl2-26B,26,,,823,251,184,153,168,67,
,Qwen2-VL-2B-Instruct,2.1,Qwen2-1.5B,ViT-600M,812,265,172,146,174,55,
,GOT-OCR2.0,0.6,,,622,245,99,83,164,31,
,doctr-default,0.05,,,,177,,,,,
,doctr-v2m(best),0.05,,,,256,,,,,