{ "metadata": { "timestamp": "2025-01-30T14:08:26.760026", "vidore_benchmark_version": "4.0.3.dev20+g2d72668" }, "metrics": { "arxivqa_test_subsampled": { "ndcg_at_1": 0.83, "ndcg_at_3": 0.87855, "ndcg_at_5": 0.88517, "ndcg_at_10": 0.8947, "ndcg_at_20": 0.89981, "ndcg_at_50": 0.90189, "ndcg_at_100": 0.90285, "map_at_1": 0.83, "map_at_3": 0.86633, "map_at_5": 0.87003, "map_at_10": 0.87386, "map_at_20": 0.87529, "map_at_50": 0.87566, "map_at_100": 0.87574, "recall_at_1": 0.83, "recall_at_3": 0.914, "recall_at_5": 0.93, "recall_at_10": 0.96, "recall_at_20": 0.98, "recall_at_50": 0.99, "recall_at_100": 0.996, "precision_at_1": 0.83, "precision_at_3": 0.30467, "precision_at_5": 0.186, "precision_at_10": 0.096, "precision_at_20": 0.049, "precision_at_50": 0.0198, "precision_at_100": 0.00996, "mrr_at_1": 0.834, "mrr_at_3": 0.8696666666666666, "mrr_at_5": 0.8724666666666667, "mrr_at_10": 0.8768738095238096, "mrr_at_20": 0.8780439976689977, "mrr_at_50": 0.8784954053717398, "mrr_at_100": 0.8785747821152928, "naucs_at_1_max": 0.7814883612933191, "naucs_at_1_std": -0.014564346665118771, "naucs_at_1_diff1": 0.9431415800777861, "naucs_at_3_max": 0.7651293075369701, "naucs_at_3_std": 0.13162008989642363, "naucs_at_3_diff1": 0.876392417432091, "naucs_at_5_max": 0.7719887955182062, "naucs_at_5_std": 0.12731759370415013, "naucs_at_5_diff1": 0.8762838468720809, "naucs_at_10_max": 0.8234126984126983, "naucs_at_10_std": 0.03160597572361713, "naucs_at_10_diff1": 0.8921568627450972, "naucs_at_20_max": 0.8225957049486448, "naucs_at_20_std": -0.02110177404295123, "naucs_at_20_diff1": 0.9084967320261399, "naucs_at_50_max": 0.9738562091503188, "naucs_at_50_std": 0.3997198879551785, "naucs_at_50_diff1": 0.947712418300658, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 0.9346405228758466 }, "docvqa_test_subsampled": { "ndcg_at_1": 0.5255, "ndcg_at_3": 0.59582, "ndcg_at_5": 0.61852, "ndcg_at_10": 0.6347, "ndcg_at_20": 0.6492, "ndcg_at_50": 0.65706, "ndcg_at_100": 0.66361, "map_at_1": 0.5255, "map_at_3": 0.57871, "map_at_5": 0.59124, "map_at_10": 0.59815, "map_at_20": 0.6021, "map_at_50": 0.60333, "map_at_100": 0.60394, "recall_at_1": 0.5255, "recall_at_3": 0.64523, "recall_at_5": 0.70067, "recall_at_10": 0.74945, "recall_at_20": 0.8071, "recall_at_50": 0.84701, "recall_at_100": 0.88692, "precision_at_1": 0.5255, "precision_at_3": 0.21508, "precision_at_5": 0.14013, "precision_at_10": 0.07494, "precision_at_20": 0.04035, "precision_at_50": 0.01694, "precision_at_100": 0.00887, "mrr_at_1": 0.5254988913525499, "mrr_at_3": 0.5798226164079823, "mrr_at_5": 0.5909090909090909, "mrr_at_10": 0.5981936085594622, "mrr_at_20": 0.6018577072277826, "mrr_at_50": 0.603184631365057, "mrr_at_100": 0.6036950361798563, "naucs_at_1_max": 0.3862474572443775, "naucs_at_1_std": 0.6866980709008784, "naucs_at_1_diff1": 0.908866111398327, "naucs_at_3_max": 0.3232612877463118, "naucs_at_3_std": 0.7754733885431401, "naucs_at_3_diff1": 0.8488934990862631, "naucs_at_5_max": 0.26561580627789727, "naucs_at_5_std": 0.8294969802329596, "naucs_at_5_diff1": 0.843549438877749, "naucs_at_10_max": 0.21226897631460165, "naucs_at_10_std": 0.9004830786766392, "naucs_at_10_diff1": 0.8313389289681655, "naucs_at_20_max": 0.1442628756616487, "naucs_at_20_std": 0.9192991363569017, "naucs_at_20_diff1": 0.8284091873092273, "naucs_at_50_max": 0.04661475790032267, "naucs_at_50_std": 0.9356118889205624, "naucs_at_50_diff1": 0.8459735188892017, "naucs_at_100_max": -0.014764226166713695, "naucs_at_100_std": 0.9484296739649325, "naucs_at_100_diff1": 0.853153345512516 }, "syntheticDocQA_energy_test": { "ndcg_at_1": 0.95, "ndcg_at_3": 0.96131, "ndcg_at_5": 0.96131, "ndcg_at_10": 0.96487, "ndcg_at_20": 0.96757, "ndcg_at_50": 0.96952, "ndcg_at_100": 0.96952, "map_at_1": 0.95, "map_at_3": 0.95833, "map_at_5": 0.95833, "map_at_10": 0.96, "map_at_20": 0.96083, "map_at_50": 0.96113, "map_at_100": 0.96113, "recall_at_1": 0.95, "recall_at_3": 0.97, "recall_at_5": 0.97, "recall_at_10": 0.98, "recall_at_20": 0.99, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.95, "precision_at_3": 0.32333, "precision_at_5": 0.194, "precision_at_10": 0.098, "precision_at_20": 0.0495, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.95, "mrr_at_3": 0.9583333333333333, "mrr_at_5": 0.9603333333333333, "mrr_at_10": 0.9603333333333333, "mrr_at_20": 0.9612424242424242, "mrr_at_50": 0.9615549242424243, "mrr_at_100": 0.9615549242424243, "naucs_at_1_max": 0.5612511671335213, "naucs_at_1_std": -0.8497665732959815, "naucs_at_1_diff1": 0.9477124183006508, "naucs_at_3_max": 0.807812013694371, "naucs_at_3_std": -0.6484593837535022, "naucs_at_3_diff1": 0.9564270152505466, "naucs_at_5_max": 0.807812013694364, "naucs_at_5_std": -0.6484593837535011, "naucs_at_5_diff1": 0.9564270152505424, "naucs_at_10_max": 0.9346405228758136, "naucs_at_10_std": -0.10270774976657283, "naucs_at_10_diff1": 0.9346405228758136, "naucs_at_20_max": 0.8692810457516413, "naucs_at_20_std": -0.5634920634920767, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "tatdqa_test": { "ndcg_at_1": 0.69502, "ndcg_at_3": 0.78919, "ndcg_at_5": 0.80833, "ndcg_at_10": 0.82129, "ndcg_at_20": 0.82519, "ndcg_at_50": 0.82963, "ndcg_at_100": 0.83159, "map_at_1": 0.69502, "map_at_3": 0.76701, "map_at_5": 0.77755, "map_at_10": 0.78302, "map_at_20": 0.7842, "map_at_50": 0.78491, "map_at_100": 0.78508, "recall_at_1": 0.69502, "recall_at_3": 0.85298, "recall_at_5": 0.89976, "recall_at_10": 0.93925, "recall_at_20": 0.95383, "recall_at_50": 0.97631, "recall_at_100": 0.98846, "precision_at_1": 0.69502, "precision_at_3": 0.28433, "precision_at_5": 0.17995, "precision_at_10": 0.09392, "precision_at_20": 0.04769, "precision_at_50": 0.01953, "precision_at_100": 0.00988, "mrr_at_1": 0.6950182260024301, "mrr_at_3": 0.7656946132037261, "mrr_at_5": 0.7770554880518429, "mrr_at_10": 0.7827320681208895, "mrr_at_20": 0.783837406855329, "mrr_at_50": 0.784510459247072, "mrr_at_100": 0.7846961562032659, "naucs_at_1_max": 0.2747145592665206, "naucs_at_1_std": -0.09557716491043182, "naucs_at_1_diff1": 0.8084384046681937, "naucs_at_3_max": 0.3330280541363154, "naucs_at_3_std": -0.0389562599865292, "naucs_at_3_diff1": 0.7240601862039753, "naucs_at_5_max": 0.352919194006459, "naucs_at_5_std": 0.0514244344972758, "naucs_at_5_diff1": 0.6583624352004738, "naucs_at_10_max": 0.43229730811413347, "naucs_at_10_std": 0.2815741957749473, "naucs_at_10_diff1": 0.6007108794001051, "naucs_at_20_max": 0.4241317350379727, "naucs_at_20_std": 0.2971486230084223, "naucs_at_20_diff1": 0.5874614197126973, "naucs_at_50_max": 0.5140385840003181, "naucs_at_50_std": 0.518664544400799, "naucs_at_50_diff1": 0.5976629425860989, "naucs_at_100_max": 0.4955447990957944, "naucs_at_100_std": 0.5556756488737464, "naucs_at_100_diff1": 0.6696787538618733 }, "infovqa_test_subsampled": { "ndcg_at_1": 0.88664, "ndcg_at_3": 0.91826, "ndcg_at_5": 0.92479, "ndcg_at_10": 0.93028, "ndcg_at_20": 0.93185, "ndcg_at_50": 0.93365, "ndcg_at_100": 0.93435, "map_at_1": 0.88664, "map_at_3": 0.91093, "map_at_5": 0.91447, "map_at_10": 0.91689, "map_at_20": 0.91735, "map_at_50": 0.91756, "map_at_100": 0.91763, "recall_at_1": 0.88664, "recall_at_3": 0.93927, "recall_at_5": 0.95547, "recall_at_10": 0.97166, "recall_at_20": 0.97773, "recall_at_50": 0.98785, "recall_at_100": 0.9919, "precision_at_1": 0.88664, "precision_at_3": 0.31309, "precision_at_5": 0.19109, "precision_at_10": 0.09717, "precision_at_20": 0.04889, "precision_at_50": 0.01976, "precision_at_100": 0.00992, "mrr_at_1": 0.8846153846153846, "mrr_at_3": 0.9092442645074225, "mrr_at_5": 0.9127867746288799, "mrr_at_10": 0.9150801683696421, "mrr_at_20": 0.9155341004025214, "mrr_at_50": 0.9157652608527469, "mrr_at_100": 0.9158356239262059, "naucs_at_1_max": 0.6077965938527005, "naucs_at_1_std": -0.034859963613718514, "naucs_at_1_diff1": 0.9382491207886892, "naucs_at_3_max": 0.7083430308645834, "naucs_at_3_std": 0.09173878193665044, "naucs_at_3_diff1": 0.941058293792039, "naucs_at_5_max": 0.828261285914772, "naucs_at_5_std": 0.25360809635439513, "naucs_at_5_diff1": 0.9374342352707031, "naucs_at_10_max": 0.8969865162710334, "naucs_at_10_std": 0.5367916133086486, "naucs_at_10_diff1": 0.9521757785748344, "naucs_at_20_max": 0.927259949634811, "naucs_at_20_std": 0.6312197158498455, "naucs_at_20_diff1": 0.9643814216187027, "naucs_at_50_max": 0.9129323639568211, "naucs_at_50_std": 0.5625809313845725, "naucs_at_50_diff1": 0.9564661819783937, "naucs_at_100_max": 0.9346992729676393, "naucs_at_100_std": 0.7480140525249407, "naucs_at_100_diff1": 0.9673496364838197 }, "syntheticDocQA_healthcare_industry_test": { "ndcg_at_1": 0.97, "ndcg_at_3": 0.98762, "ndcg_at_5": 0.98762, "ndcg_at_10": 0.98762, "ndcg_at_20": 0.98762, "ndcg_at_50": 0.98762, "ndcg_at_100": 0.98762, "map_at_1": 0.97, "map_at_3": 0.98333, "map_at_5": 0.98333, "map_at_10": 0.98333, "map_at_20": 0.98333, "map_at_50": 0.98333, "map_at_100": 0.98333, "recall_at_1": 0.97, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.97, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.97, "mrr_at_3": 0.9833333333333333, "mrr_at_5": 0.9833333333333333, "mrr_at_10": 0.9833333333333333, "mrr_at_20": 0.9833333333333333, "mrr_at_50": 0.9833333333333333, "mrr_at_100": 0.9833333333333333, "naucs_at_1_max": 0.8202614379084989, "naucs_at_1_std": -0.2198879551820713, "naucs_at_1_diff1": 0.9564270152505465, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "tabfquad_test_subsampled": { "ndcg_at_1": 0.83571, "ndcg_at_3": 0.88699, "ndcg_at_5": 0.89851, "ndcg_at_10": 0.90549, "ndcg_at_20": 0.91018, "ndcg_at_50": 0.9125, "ndcg_at_100": 0.9125, "map_at_1": 0.83571, "map_at_3": 0.875, "map_at_5": 0.88125, "map_at_10": 0.88416, "map_at_20": 0.88553, "map_at_50": 0.88598, "map_at_100": 0.88598, "recall_at_1": 0.83571, "recall_at_3": 0.92143, "recall_at_5": 0.95, "recall_at_10": 0.97143, "recall_at_20": 0.98929, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.83571, "precision_at_3": 0.30714, "precision_at_5": 0.19, "precision_at_10": 0.09714, "precision_at_20": 0.04946, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.8392857142857143, "mrr_at_3": 0.8767857142857143, "mrr_at_5": 0.8830357142857143, "mrr_at_10": 0.8858517573696145, "mrr_at_20": 0.8872690305726021, "mrr_at_50": 0.8877252707609851, "mrr_at_100": 0.8877252707609851, "naucs_at_1_max": 0.4880524536858767, "naucs_at_1_std": 0.1966050571171859, "naucs_at_1_diff1": 0.9065802421597565, "naucs_at_3_max": 0.5683515830574649, "naucs_at_3_std": 0.33952975129445734, "naucs_at_3_diff1": 0.8935786435786452, "naucs_at_5_max": 0.7507669734560504, "naucs_at_5_std": 0.5118714152327615, "naucs_at_5_diff1": 0.9109643857543043, "naucs_at_10_max": 0.8150093370681609, "naucs_at_10_std": 0.5556722689075682, "naucs_at_10_diff1": 0.934640522875815, "naucs_at_20_max": 0.9128540305011011, "naucs_at_20_std": 0.807812013694365, "naucs_at_20_diff1": 0.9564270152505505, "naucs_at_50_max": 1.0, "naucs_at_50_std": 1.0, "naucs_at_50_diff1": 1.0, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0 }, "syntheticDocQA_government_reports_test": { "ndcg_at_1": 0.91, "ndcg_at_3": 0.94393, "ndcg_at_5": 0.95254, "ndcg_at_10": 0.9561, "ndcg_at_20": 0.9561, "ndcg_at_50": 0.9561, "ndcg_at_100": 0.9561, "map_at_1": 0.91, "map_at_3": 0.935, "map_at_5": 0.94, "map_at_10": 0.94167, "map_at_20": 0.94167, "map_at_50": 0.94167, "map_at_100": 0.94167, "recall_at_1": 0.91, "recall_at_3": 0.97, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.91, "precision_at_3": 0.32333, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.91, "mrr_at_3": 0.9383333333333332, "mrr_at_5": 0.9428333333333333, "mrr_at_10": 0.9428333333333333, "mrr_at_20": 0.9428333333333333, "mrr_at_50": 0.9428333333333333, "mrr_at_100": 0.9428333333333333, "naucs_at_1_max": 0.8246706089843343, "naucs_at_1_std": 0.4518103537711367, "naucs_at_1_diff1": 0.9564270152505436, "naucs_at_3_max": 0.7027699968876487, "naucs_at_3_std": 0.460939931528168, "naucs_at_3_diff1": 0.9564270152505466, "naucs_at_5_max": 0.5541549953314738, "naucs_at_5_std": -0.1713352007469681, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "shiftproject_test": { "ndcg_at_1": 0.73, "ndcg_at_3": 0.85095, "ndcg_at_5": 0.85526, "ndcg_at_10": 0.86174, "ndcg_at_20": 0.86703, "ndcg_at_50": 0.87134, "ndcg_at_100": 0.87134, "map_at_1": 0.73, "map_at_3": 0.82333, "map_at_5": 0.82583, "map_at_10": 0.82851, "map_at_20": 0.83009, "map_at_50": 0.83092, "map_at_100": 0.83092, "recall_at_1": 0.73, "recall_at_3": 0.93, "recall_at_5": 0.94, "recall_at_10": 0.96, "recall_at_20": 0.98, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.73, "precision_at_3": 0.31, "precision_at_5": 0.188, "precision_at_10": 0.096, "precision_at_20": 0.049, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.75, "mrr_at_3": 0.8316666666666667, "mrr_at_5": 0.8341666666666667, "mrr_at_10": 0.8380238095238096, "mrr_at_20": 0.8386904761904762, "mrr_at_50": 0.8395600414078674, "mrr_at_100": 0.8395600414078674, "naucs_at_1_max": -0.0662092826672433, "naucs_at_1_std": -0.3356071477717276, "naucs_at_1_diff1": 0.7686999941024986, "naucs_at_3_max": -0.05822328931572497, "naucs_at_3_std": -0.2665732959850601, "naucs_at_3_diff1": 0.7789782579698524, "naucs_at_5_max": 0.22206660441954526, "naucs_at_5_std": -0.33146591970121353, "naucs_at_5_diff1": 0.8883442265795238, "naucs_at_10_max": 0.05240429505135564, "naucs_at_10_std": -0.4458450046685252, "naucs_at_10_diff1": 0.8651960784313721, "naucs_at_20_max": -0.5088702147525547, "naucs_at_20_std": -0.5929038281979383, "naucs_at_20_diff1": 0.9346405228758136, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "syntheticDocQA_artificial_intelligence_test": { "ndcg_at_1": 0.98, "ndcg_at_3": 0.99262, "ndcg_at_5": 0.99262, "ndcg_at_10": 0.99262, "ndcg_at_20": 0.99262, "ndcg_at_50": 0.99262, "ndcg_at_100": 0.99262, "map_at_1": 0.98, "map_at_3": 0.99, "map_at_5": 0.99, "map_at_10": 0.99, "map_at_20": 0.99, "map_at_50": 0.99, "map_at_100": 0.99, "recall_at_1": 0.98, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.98, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.98, "mrr_at_3": 0.99, "mrr_at_5": 0.99, "mrr_at_10": 0.99, "mrr_at_20": 0.99, "mrr_at_50": 0.99, "mrr_at_100": 0.99, "naucs_at_1_max": 0.3489729225023353, "naucs_at_1_std": -0.2987861811391249, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null } } }