dse-qwen2-2b-mrl-v1 / results.json
MrLight's picture
Update results.json
c6b9166 verified
raw
history blame
16.3 kB
{"vidore/arxivqa_test_subsampled": {"ndcg_at_1": 0.794, "ndcg_at_3": 0.84612, "ndcg_at_5": 0.85584, "ndcg_at_10": 0.86884, "ndcg_at_20": 0.87552, "ndcg_at_100": 0.88006, "ndcg_at_1000": 0.88061, "map_at_1": 0.794, "map_at_3": 0.83367, "map_at_5": 0.83897, "map_at_10": 0.84437, "map_at_20": 0.84626, "map_at_100": 0.84692, "map_at_1000": 0.84695, "recall_at_1": 0.794, "recall_at_3": 0.882, "recall_at_5": 0.906, "recall_at_10": 0.946, "recall_at_20": 0.972, "recall_at_100": 0.996, "recall_at_1000": 1.0, "precision_at_1": 0.794, "precision_at_3": 0.294, "precision_at_5": 0.1812, "precision_at_10": 0.0946, "precision_at_20": 0.0486, "precision_at_100": 0.00996, "precision_at_1000": 0.001, "mrr_at_1": 0.794, "mrr_at_3": 0.8333333333333334, "mrr_at_5": 0.8385333333333332, "mrr_at_10": 0.8440206349206347, "mrr_at_20": 0.8457624946948473, "mrr_at_100": 0.8464318523346234, "mrr_at_1000": 0.8464586755347477, "naucs_at_1_max": 0.0264204919913496, "naucs_at_1_std": -0.4432218159450506, "naucs_at_1_diff1": 0.8754399753955507, "naucs_at_3_max": -0.08298212469117683, "naucs_at_3_std": -0.5738264787094874, "naucs_at_3_diff1": 0.8379192300860663, "naucs_at_5_max": -0.07192522399030574, "naucs_at_5_std": -0.5486421518962216, "naucs_at_5_diff1": 0.8177285098436528, "naucs_at_10_max": -0.30577169139260696, "naucs_at_10_std": -0.7531728740879055, "naucs_at_10_diff1": 0.8244112459798737, "naucs_at_20_max": -0.2807456315859826, "naucs_at_20_std": -0.4205348806189298, "naucs_at_20_diff1": 0.882953181272511, "naucs_at_100_max": -1.7399626517274025, "naucs_at_100_std": 0.34897292250231704, "naucs_at_100_diff1": 0.8611111111111168, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/docvqa_test_subsampled": {"ndcg_at_1": 0.48337, "ndcg_at_3": 0.55403, "ndcg_at_5": 0.5713, "ndcg_at_10": 0.60081, "ndcg_at_20": 0.61311, "ndcg_at_100": 0.63676, "ndcg_at_1000": 0.64891, "map_at_1": 0.48337, "map_at_3": 0.53769, "map_at_5": 0.54723, "map_at_10": 0.55947, "map_at_20": 0.56283, "map_at_100": 0.56619, "map_at_1000": 0.56674, "recall_at_1": 0.48337, "recall_at_3": 0.60089, "recall_at_5": 0.64302, "recall_at_10": 0.73392, "recall_at_20": 0.78271, "recall_at_100": 0.90909, "recall_at_1000": 1.0, "precision_at_1": 0.48337, "precision_at_3": 0.2003, "precision_at_5": 0.1286, "precision_at_10": 0.07339, "precision_at_20": 0.03914, "precision_at_100": 0.00909, "precision_at_1000": 0.001, "mrr_at_1": 0.4878048780487805, "mrr_at_3": 0.5395417590539542, "mrr_at_5": 0.5487435328898742, "mrr_at_10": 0.5610539189807481, "mrr_at_20": 0.5648169761114504, "mrr_at_100": 0.5681408169734422, "mrr_at_1000": 0.5686621426973844, "naucs_at_1_max": -0.5212133012346877, "naucs_at_1_std": -0.09288577096523737, "naucs_at_1_diff1": 0.7503022682010474, "naucs_at_3_max": -0.6565880303875717, "naucs_at_3_std": -0.07460405832901369, "naucs_at_3_diff1": 0.6561082577501688, "naucs_at_5_max": -0.5943855392066872, "naucs_at_5_std": -0.05220343159264611, "naucs_at_5_diff1": 0.6238399743739096, "naucs_at_10_max": -0.6195908132701516, "naucs_at_10_std": 0.13900687126802166, "naucs_at_10_diff1": 0.558265195499329, "naucs_at_20_max": -0.6615869284465803, "naucs_at_20_std": 0.1387067044872028, "naucs_at_20_diff1": 0.5554270724438797, "naucs_at_100_max": -0.7684732285094673, "naucs_at_100_std": 0.45832384986805724, "naucs_at_100_diff1": 0.4903408194281384, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/infovqa_test_subsampled": {"ndcg_at_1": 0.81377, "ndcg_at_3": 0.86726, "ndcg_at_5": 0.88059, "ndcg_at_10": 0.89071, "ndcg_at_20": 0.89184, "ndcg_at_100": 0.89493, "ndcg_at_1000": 0.89651, "map_at_1": 0.81377, "map_at_3": 0.85425, "map_at_5": 0.86164, "map_at_10": 0.866, "map_at_20": 0.86637, "map_at_100": 0.86683, "map_at_1000": 0.86689, "recall_at_1": 0.81377, "recall_at_3": 0.90486, "recall_at_5": 0.93725, "recall_at_10": 0.96761, "recall_at_20": 0.97166, "recall_at_100": 0.98785, "recall_at_1000": 1.0, "precision_at_1": 0.81377, "precision_at_3": 0.30162, "precision_at_5": 0.18745, "precision_at_10": 0.09676, "precision_at_20": 0.04858, "precision_at_100": 0.00988, "precision_at_1000": 0.001, "mrr_at_1": 0.8137651821862348, "mrr_at_3": 0.8542510121457486, "mrr_at_5": 0.8617408906882587, "mrr_at_10": 0.866114002956108, "mrr_at_20": 0.8665832705306387, "mrr_at_100": 0.8669491238036686, "mrr_at_1000": 0.8670130381350266, "naucs_at_1_max": 0.08332768086513993, "naucs_at_1_std": -0.16693401490742565, "naucs_at_1_diff1": 0.9069340613475471, "naucs_at_3_max": 0.0011369168562785203, "naucs_at_3_std": -0.14976111225508304, "naucs_at_3_diff1": 0.8389370588773137, "naucs_at_5_max": -0.0749515972070914, "naucs_at_5_std": -0.08214044161222527, "naucs_at_5_diff1": 0.8372514892403252, "naucs_at_10_max": -0.1502345681826907, "naucs_at_10_std": 0.5637465376812698, "naucs_at_10_diff1": 0.7969878779414252, "naucs_at_20_max": -0.20287146290299748, "naucs_at_20_std": 0.6550388680835327, "naucs_at_20_diff1": 0.7878243417729163, "naucs_at_100_max": -0.6623458938523301, "naucs_at_100_std": 0.7639533363794858, "naucs_at_100_diff1": 0.8604151369738356, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/tabfquad_test_subsampled": {"ndcg_at_1": 0.875, "ndcg_at_3": 0.92487, "ndcg_at_5": 0.93087, "ndcg_at_10": 0.93426, "ndcg_at_20": 0.93601, "ndcg_at_100": 0.93739, "ndcg_at_1000": 0.93739, "map_at_1": 0.875, "map_at_3": 0.9125, "map_at_5": 0.91589, "map_at_10": 0.91725, "map_at_20": 0.91769, "map_at_100": 0.91791, "map_at_1000": 0.91791, "recall_at_1": 0.875, "recall_at_3": 0.96071, "recall_at_5": 0.975, "recall_at_10": 0.98571, "recall_at_20": 0.99286, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.875, "precision_at_3": 0.32024, "precision_at_5": 0.195, "precision_at_10": 0.09857, "precision_at_20": 0.04964, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.8678571428571429, "mrr_at_3": 0.9101190476190477, "mrr_at_5": 0.9135119047619048, "mrr_at_10": 0.9148653628117914, "mrr_at_20": 0.9153266723356009, "mrr_at_100": 0.9155495426855899, "mrr_at_1000": 0.9155495426855899, "naucs_at_1_max": 0.6341186138885083, "naucs_at_1_std": -0.33273524174770575, "naucs_at_1_diff1": 0.8733735104780164, "naucs_at_3_max": 0.7835922247686966, "naucs_at_3_std": -0.27565571683218926, "naucs_at_3_diff1": 0.8767082590612036, "naucs_at_5_max": 0.8832866479925309, "naucs_at_5_std": -0.041149793250630305, "naucs_at_5_diff1": 0.885620915032675, "naucs_at_10_max": 0.8978758169934754, "naucs_at_10_std": 0.022175536881420224, "naucs_at_10_diff1": 0.8692810457516408, "naucs_at_20_max": 0.8611111111111035, "naucs_at_20_std": 0.41433239962653884, "naucs_at_20_diff1": 0.8692810457516309, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/tatdqa_test": {"ndcg_at_1": 0.54192, "ndcg_at_3": 0.65843, "ndcg_at_5": 0.69378, "ndcg_at_10": 0.71916, "ndcg_at_20": 0.73058, "ndcg_at_100": 0.73824, "ndcg_at_1000": 0.73959, "map_at_1": 0.54192, "map_at_3": 0.63001, "map_at_5": 0.64967, "map_at_10": 0.66004, "map_at_20": 0.6632, "map_at_100": 0.66435, "map_at_1000": 0.66442, "recall_at_1": 0.54192, "recall_at_3": 0.74058, "recall_at_5": 0.82625, "recall_at_10": 0.90522, "recall_at_20": 0.95018, "recall_at_100": 0.99028, "recall_at_1000": 1.0, "precision_at_1": 0.54192, "precision_at_3": 0.24686, "precision_at_5": 0.16525, "precision_at_10": 0.09052, "precision_at_20": 0.04751, "precision_at_100": 0.0099, "precision_at_1000": 0.001, "mrr_at_1": 0.540097205346294, "mrr_at_3": 0.6291008505467812, "mrr_at_5": 0.6485722964763079, "mrr_at_10": 0.6587733128893529, "mrr_at_20": 0.6620815452578555, "mrr_at_100": 0.6632579676072735, "mrr_at_1000": 0.6633336632341336, "naucs_at_1_max": -0.058378738250013384, "naucs_at_1_std": -0.22221425489777502, "naucs_at_1_diff1": 0.7291748780182709, "naucs_at_3_max": -0.07682812054313384, "naucs_at_3_std": -0.24718525281474638, "naucs_at_3_diff1": 0.6115470895642744, "naucs_at_5_max": -0.07888770379939952, "naucs_at_5_std": -0.19457655229648088, "naucs_at_5_diff1": 0.5914062739597068, "naucs_at_10_max": 0.0029404715257986057, "naucs_at_10_std": -0.05548744311414097, "naucs_at_10_diff1": 0.5356727090689206, "naucs_at_20_max": -0.004305759775130589, "naucs_at_20_std": 0.2822864075893588, "naucs_at_20_diff1": 0.5157529604916903, "naucs_at_100_max": -0.04774510029921774, "naucs_at_100_std": 0.8056862206490103, "naucs_at_100_diff1": 0.5126658383723813, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/shiftproject_test": {"ndcg_at_1": 0.68, "ndcg_at_3": 0.79595, "ndcg_at_5": 0.8196, "ndcg_at_10": 0.83596, "ndcg_at_20": 0.83866, "ndcg_at_100": 0.84072, "ndcg_at_1000": 0.84072, "map_at_1": 0.68, "map_at_3": 0.77, "map_at_5": 0.7825, "map_at_10": 0.78935, "map_at_20": 0.79018, "map_at_100": 0.79054, "map_at_1000": 0.79054, "recall_at_1": 0.68, "recall_at_3": 0.87, "recall_at_5": 0.93, "recall_at_10": 0.98, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.68, "precision_at_3": 0.29, "precision_at_5": 0.186, "precision_at_10": 0.098, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.7, "mrr_at_3": 0.775, "mrr_at_5": 0.7915000000000001, "mrr_at_10": 0.7985238095238096, "mrr_at_20": 0.799357142857143, "mrr_at_100": 0.7997142857142859, "mrr_at_1000": 0.7997142857142859, "naucs_at_1_max": 0.2058945477161214, "naucs_at_1_std": -0.3255476470181746, "naucs_at_1_diff1": 0.6658489392578257, "naucs_at_3_max": 0.26312671258238784, "naucs_at_3_std": -0.1116418573650315, "naucs_at_3_diff1": 0.5204769310523577, "naucs_at_5_max": 0.26463918900894084, "naucs_at_5_std": 0.08563425370148234, "naucs_at_5_diff1": 0.3650126717353644, "naucs_at_10_max": 0.7957516339869297, "naucs_at_10_std": 0.12278244631185727, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 0.7222222222222276, "naucs_at_20_std": 0.12278244631185926, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.94, "ndcg_at_3": 0.97024, "ndcg_at_5": 0.97454, "ndcg_at_10": 0.97454, "ndcg_at_20": 0.97454, "ndcg_at_100": 0.97454, "ndcg_at_1000": 0.97454, "map_at_1": 0.94, "map_at_3": 0.96333, "map_at_5": 0.96583, "map_at_10": 0.96583, "map_at_20": 0.96583, "map_at_100": 0.96583, "map_at_1000": 0.96583, "recall_at_1": 0.94, "recall_at_3": 0.99, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.94, "precision_at_3": 0.33, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.94, "mrr_at_3": 0.9633333333333333, "mrr_at_5": 0.9658333333333333, "mrr_at_10": 0.9658333333333333, "mrr_at_20": 0.9658333333333333, "mrr_at_100": 0.9658333333333333, "mrr_at_1000": 0.9658333333333333, "naucs_at_1_max": 0.28003423591658866, "naucs_at_1_std": -0.3201058201058171, "naucs_at_1_diff1": 0.9564270152505444, "naucs_at_3_max": 1.0, "naucs_at_3_std": -0.5634920634921204, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_energy_test": {"ndcg_at_1": 0.87, "ndcg_at_3": 0.91655, "ndcg_at_5": 0.92859, "ndcg_at_10": 0.9316, "ndcg_at_20": 0.9316, "ndcg_at_100": 0.93378, "ndcg_at_1000": 0.93378, "map_at_1": 0.87, "map_at_3": 0.905, "map_at_5": 0.9115, "map_at_10": 0.91261, "map_at_20": 0.91261, "map_at_100": 0.91305, "map_at_1000": 0.91305, "recall_at_1": 0.87, "recall_at_3": 0.95, "recall_at_5": 0.98, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.87, "precision_at_3": 0.31667, "precision_at_5": 0.196, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.87, "mrr_at_3": 0.91, "mrr_at_5": 0.9139999999999999, "mrr_at_10": 0.9152499999999999, "mrr_at_20": 0.9152499999999999, "mrr_at_100": 0.9157045454545454, "mrr_at_1000": 0.9157045454545454, "naucs_at_1_max": 0.2051766274161294, "naucs_at_1_std": -0.443160779086129, "naucs_at_1_diff1": 0.9218692142486855, "naucs_at_3_max": -0.14724556489262278, "naucs_at_3_std": -0.7007469654528471, "naucs_at_3_diff1": 0.9477124183006521, "naucs_at_5_max": -0.661531279178339, "naucs_at_5_std": -0.9556489262371534, "naucs_at_5_diff1": 0.9346405228758136, "naucs_at_10_max": -1.1517273576097316, "naucs_at_10_std": -1.7399626517273863, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": -1.1517273576097316, "naucs_at_20_std": -1.7399626517273863, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_government_reports_test": {"ndcg_at_1": 0.92, "ndcg_at_3": 0.95655, "ndcg_at_5": 0.96042, "ndcg_at_10": 0.96398, "ndcg_at_20": 0.96398, "ndcg_at_100": 0.96398, "ndcg_at_1000": 0.96398, "map_at_1": 0.92, "map_at_3": 0.94833, "map_at_5": 0.95033, "map_at_10": 0.952, "map_at_20": 0.952, "map_at_100": 0.952, "map_at_1000": 0.952, "recall_at_1": 0.92, "recall_at_3": 0.98, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.92, "precision_at_3": 0.32667, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.94, "mrr_at_3": 0.9583333333333333, "mrr_at_5": 0.9603333333333333, "mrr_at_10": 0.9620000000000001, "mrr_at_20": 0.9620000000000001, "mrr_at_100": 0.9620000000000001, "mrr_at_1000": 0.9620000000000001, "naucs_at_1_max": 0.8082983193277313, "naucs_at_1_std": -0.12400793650793779, "naucs_at_1_diff1": 0.9325980392156855, "naucs_at_3_max": 0.7770774976657261, "naucs_at_3_std": -0.9556489262371661, "naucs_at_3_diff1": 0.8611111111111119, "naucs_at_5_max": 0.5541549953314738, "naucs_at_5_std": -1.7399626517273863, "naucs_at_5_diff1": 0.7222222222222276, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_healthcare_industry_test": {"ndcg_at_1": 0.91, "ndcg_at_3": 0.96417, "ndcg_at_5": 0.96417, "ndcg_at_10": 0.96417, "ndcg_at_20": 0.96417, "ndcg_at_100": 0.96417, "ndcg_at_1000": 0.96417, "map_at_1": 0.91, "map_at_3": 0.95167, "map_at_5": 0.95167, "map_at_10": 0.95167, "map_at_20": 0.95167, "map_at_100": 0.95167, "map_at_1000": 0.95167, "recall_at_1": 0.91, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.91, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.92, "mrr_at_3": 0.9583333333333335, "mrr_at_5": 0.9583333333333335, "mrr_at_10": 0.9583333333333335, "mrr_at_20": 0.9583333333333335, "mrr_at_100": 0.9583333333333335, "mrr_at_1000": 0.9583333333333335, "naucs_at_1_max": 0.6672891378773725, "naucs_at_1_std": -0.03304284676833719, "naucs_at_1_diff1": 0.9400871459694983, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}