|
{ |
|
"metadata": { |
|
"timestamp": "2025-01-30T14:08:26.760026", |
|
"vidore_benchmark_version": "4.0.3.dev20+g2d72668" |
|
}, |
|
"metrics": { |
|
"arxivqa_test_subsampled": { |
|
"ndcg_at_1": 0.83, |
|
"ndcg_at_3": 0.87855, |
|
"ndcg_at_5": 0.88517, |
|
"ndcg_at_10": 0.8947, |
|
"ndcg_at_20": 0.89981, |
|
"ndcg_at_50": 0.90189, |
|
"ndcg_at_100": 0.90285, |
|
"map_at_1": 0.83, |
|
"map_at_3": 0.86633, |
|
"map_at_5": 0.87003, |
|
"map_at_10": 0.87386, |
|
"map_at_20": 0.87529, |
|
"map_at_50": 0.87566, |
|
"map_at_100": 0.87574, |
|
"recall_at_1": 0.83, |
|
"recall_at_3": 0.914, |
|
"recall_at_5": 0.93, |
|
"recall_at_10": 0.96, |
|
"recall_at_20": 0.98, |
|
"recall_at_50": 0.99, |
|
"recall_at_100": 0.996, |
|
"precision_at_1": 0.83, |
|
"precision_at_3": 0.30467, |
|
"precision_at_5": 0.186, |
|
"precision_at_10": 0.096, |
|
"precision_at_20": 0.049, |
|
"precision_at_50": 0.0198, |
|
"precision_at_100": 0.00996, |
|
"mrr_at_1": 0.834, |
|
"mrr_at_3": 0.8696666666666666, |
|
"mrr_at_5": 0.8724666666666667, |
|
"mrr_at_10": 0.8768738095238096, |
|
"mrr_at_20": 0.8780439976689977, |
|
"mrr_at_50": 0.8784954053717398, |
|
"mrr_at_100": 0.8785747821152928, |
|
"naucs_at_1_max": 0.7814883612933191, |
|
"naucs_at_1_std": -0.014564346665118771, |
|
"naucs_at_1_diff1": 0.9431415800777861, |
|
"naucs_at_3_max": 0.7651293075369701, |
|
"naucs_at_3_std": 0.13162008989642363, |
|
"naucs_at_3_diff1": 0.876392417432091, |
|
"naucs_at_5_max": 0.7719887955182062, |
|
"naucs_at_5_std": 0.12731759370415013, |
|
"naucs_at_5_diff1": 0.8762838468720809, |
|
"naucs_at_10_max": 0.8234126984126983, |
|
"naucs_at_10_std": 0.03160597572361713, |
|
"naucs_at_10_diff1": 0.8921568627450972, |
|
"naucs_at_20_max": 0.8225957049486448, |
|
"naucs_at_20_std": -0.02110177404295123, |
|
"naucs_at_20_diff1": 0.9084967320261399, |
|
"naucs_at_50_max": 0.9738562091503188, |
|
"naucs_at_50_std": 0.3997198879551785, |
|
"naucs_at_50_diff1": 0.947712418300658, |
|
"naucs_at_100_max": 1.0, |
|
"naucs_at_100_std": 1.0, |
|
"naucs_at_100_diff1": 0.9346405228758466 |
|
}, |
|
"docvqa_test_subsampled": { |
|
"ndcg_at_1": 0.5255, |
|
"ndcg_at_3": 0.59582, |
|
"ndcg_at_5": 0.61852, |
|
"ndcg_at_10": 0.6347, |
|
"ndcg_at_20": 0.6492, |
|
"ndcg_at_50": 0.65706, |
|
"ndcg_at_100": 0.66361, |
|
"map_at_1": 0.5255, |
|
"map_at_3": 0.57871, |
|
"map_at_5": 0.59124, |
|
"map_at_10": 0.59815, |
|
"map_at_20": 0.6021, |
|
"map_at_50": 0.60333, |
|
"map_at_100": 0.60394, |
|
"recall_at_1": 0.5255, |
|
"recall_at_3": 0.64523, |
|
"recall_at_5": 0.70067, |
|
"recall_at_10": 0.74945, |
|
"recall_at_20": 0.8071, |
|
"recall_at_50": 0.84701, |
|
"recall_at_100": 0.88692, |
|
"precision_at_1": 0.5255, |
|
"precision_at_3": 0.21508, |
|
"precision_at_5": 0.14013, |
|
"precision_at_10": 0.07494, |
|
"precision_at_20": 0.04035, |
|
"precision_at_50": 0.01694, |
|
"precision_at_100": 0.00887, |
|
"mrr_at_1": 0.5254988913525499, |
|
"mrr_at_3": 0.5798226164079823, |
|
"mrr_at_5": 0.5909090909090909, |
|
"mrr_at_10": 0.5981936085594622, |
|
"mrr_at_20": 0.6018577072277826, |
|
"mrr_at_50": 0.603184631365057, |
|
"mrr_at_100": 0.6036950361798563, |
|
"naucs_at_1_max": 0.3862474572443775, |
|
"naucs_at_1_std": 0.6866980709008784, |
|
"naucs_at_1_diff1": 0.908866111398327, |
|
"naucs_at_3_max": 0.3232612877463118, |
|
"naucs_at_3_std": 0.7754733885431401, |
|
"naucs_at_3_diff1": 0.8488934990862631, |
|
"naucs_at_5_max": 0.26561580627789727, |
|
"naucs_at_5_std": 0.8294969802329596, |
|
"naucs_at_5_diff1": 0.843549438877749, |
|
"naucs_at_10_max": 0.21226897631460165, |
|
"naucs_at_10_std": 0.9004830786766392, |
|
"naucs_at_10_diff1": 0.8313389289681655, |
|
"naucs_at_20_max": 0.1442628756616487, |
|
"naucs_at_20_std": 0.9192991363569017, |
|
"naucs_at_20_diff1": 0.8284091873092273, |
|
"naucs_at_50_max": 0.04661475790032267, |
|
"naucs_at_50_std": 0.9356118889205624, |
|
"naucs_at_50_diff1": 0.8459735188892017, |
|
"naucs_at_100_max": -0.014764226166713695, |
|
"naucs_at_100_std": 0.9484296739649325, |
|
"naucs_at_100_diff1": 0.853153345512516 |
|
}, |
|
"syntheticDocQA_energy_test": { |
|
"ndcg_at_1": 0.95, |
|
"ndcg_at_3": 0.96131, |
|
"ndcg_at_5": 0.96131, |
|
"ndcg_at_10": 0.96487, |
|
"ndcg_at_20": 0.96757, |
|
"ndcg_at_50": 0.96952, |
|
"ndcg_at_100": 0.96952, |
|
"map_at_1": 0.95, |
|
"map_at_3": 0.95833, |
|
"map_at_5": 0.95833, |
|
"map_at_10": 0.96, |
|
"map_at_20": 0.96083, |
|
"map_at_50": 0.96113, |
|
"map_at_100": 0.96113, |
|
"recall_at_1": 0.95, |
|
"recall_at_3": 0.97, |
|
"recall_at_5": 0.97, |
|
"recall_at_10": 0.98, |
|
"recall_at_20": 0.99, |
|
"recall_at_50": 1.0, |
|
"recall_at_100": 1.0, |
|
"precision_at_1": 0.95, |
|
"precision_at_3": 0.32333, |
|
"precision_at_5": 0.194, |
|
"precision_at_10": 0.098, |
|
"precision_at_20": 0.0495, |
|
"precision_at_50": 0.02, |
|
"precision_at_100": 0.01, |
|
"mrr_at_1": 0.95, |
|
"mrr_at_3": 0.9583333333333333, |
|
"mrr_at_5": 0.9603333333333333, |
|
"mrr_at_10": 0.9603333333333333, |
|
"mrr_at_20": 0.9612424242424242, |
|
"mrr_at_50": 0.9615549242424243, |
|
"mrr_at_100": 0.9615549242424243, |
|
"naucs_at_1_max": 0.5612511671335213, |
|
"naucs_at_1_std": -0.8497665732959815, |
|
"naucs_at_1_diff1": 0.9477124183006508, |
|
"naucs_at_3_max": 0.807812013694371, |
|
"naucs_at_3_std": -0.6484593837535022, |
|
"naucs_at_3_diff1": 0.9564270152505466, |
|
"naucs_at_5_max": 0.807812013694364, |
|
"naucs_at_5_std": -0.6484593837535011, |
|
"naucs_at_5_diff1": 0.9564270152505424, |
|
"naucs_at_10_max": 0.9346405228758136, |
|
"naucs_at_10_std": -0.10270774976657283, |
|
"naucs_at_10_diff1": 0.9346405228758136, |
|
"naucs_at_20_max": 0.8692810457516413, |
|
"naucs_at_20_std": -0.5634920634920767, |
|
"naucs_at_20_diff1": 1.0, |
|
"naucs_at_50_max": null, |
|
"naucs_at_50_std": null, |
|
"naucs_at_50_diff1": null, |
|
"naucs_at_100_max": null, |
|
"naucs_at_100_std": null, |
|
"naucs_at_100_diff1": null |
|
}, |
|
"tatdqa_test": { |
|
"ndcg_at_1": 0.69502, |
|
"ndcg_at_3": 0.78919, |
|
"ndcg_at_5": 0.80833, |
|
"ndcg_at_10": 0.82129, |
|
"ndcg_at_20": 0.82519, |
|
"ndcg_at_50": 0.82963, |
|
"ndcg_at_100": 0.83159, |
|
"map_at_1": 0.69502, |
|
"map_at_3": 0.76701, |
|
"map_at_5": 0.77755, |
|
"map_at_10": 0.78302, |
|
"map_at_20": 0.7842, |
|
"map_at_50": 0.78491, |
|
"map_at_100": 0.78508, |
|
"recall_at_1": 0.69502, |
|
"recall_at_3": 0.85298, |
|
"recall_at_5": 0.89976, |
|
"recall_at_10": 0.93925, |
|
"recall_at_20": 0.95383, |
|
"recall_at_50": 0.97631, |
|
"recall_at_100": 0.98846, |
|
"precision_at_1": 0.69502, |
|
"precision_at_3": 0.28433, |
|
"precision_at_5": 0.17995, |
|
"precision_at_10": 0.09392, |
|
"precision_at_20": 0.04769, |
|
"precision_at_50": 0.01953, |
|
"precision_at_100": 0.00988, |
|
"mrr_at_1": 0.6950182260024301, |
|
"mrr_at_3": 0.7656946132037261, |
|
"mrr_at_5": 0.7770554880518429, |
|
"mrr_at_10": 0.7827320681208895, |
|
"mrr_at_20": 0.783837406855329, |
|
"mrr_at_50": 0.784510459247072, |
|
"mrr_at_100": 0.7846961562032659, |
|
"naucs_at_1_max": 0.2747145592665206, |
|
"naucs_at_1_std": -0.09557716491043182, |
|
"naucs_at_1_diff1": 0.8084384046681937, |
|
"naucs_at_3_max": 0.3330280541363154, |
|
"naucs_at_3_std": -0.0389562599865292, |
|
"naucs_at_3_diff1": 0.7240601862039753, |
|
"naucs_at_5_max": 0.352919194006459, |
|
"naucs_at_5_std": 0.0514244344972758, |
|
"naucs_at_5_diff1": 0.6583624352004738, |
|
"naucs_at_10_max": 0.43229730811413347, |
|
"naucs_at_10_std": 0.2815741957749473, |
|
"naucs_at_10_diff1": 0.6007108794001051, |
|
"naucs_at_20_max": 0.4241317350379727, |
|
"naucs_at_20_std": 0.2971486230084223, |
|
"naucs_at_20_diff1": 0.5874614197126973, |
|
"naucs_at_50_max": 0.5140385840003181, |
|
"naucs_at_50_std": 0.518664544400799, |
|
"naucs_at_50_diff1": 0.5976629425860989, |
|
"naucs_at_100_max": 0.4955447990957944, |
|
"naucs_at_100_std": 0.5556756488737464, |
|
"naucs_at_100_diff1": 0.6696787538618733 |
|
}, |
|
"infovqa_test_subsampled": { |
|
"ndcg_at_1": 0.88664, |
|
"ndcg_at_3": 0.91826, |
|
"ndcg_at_5": 0.92479, |
|
"ndcg_at_10": 0.93028, |
|
"ndcg_at_20": 0.93185, |
|
"ndcg_at_50": 0.93365, |
|
"ndcg_at_100": 0.93435, |
|
"map_at_1": 0.88664, |
|
"map_at_3": 0.91093, |
|
"map_at_5": 0.91447, |
|
"map_at_10": 0.91689, |
|
"map_at_20": 0.91735, |
|
"map_at_50": 0.91756, |
|
"map_at_100": 0.91763, |
|
"recall_at_1": 0.88664, |
|
"recall_at_3": 0.93927, |
|
"recall_at_5": 0.95547, |
|
"recall_at_10": 0.97166, |
|
"recall_at_20": 0.97773, |
|
"recall_at_50": 0.98785, |
|
"recall_at_100": 0.9919, |
|
"precision_at_1": 0.88664, |
|
"precision_at_3": 0.31309, |
|
"precision_at_5": 0.19109, |
|
"precision_at_10": 0.09717, |
|
"precision_at_20": 0.04889, |
|
"precision_at_50": 0.01976, |
|
"precision_at_100": 0.00992, |
|
"mrr_at_1": 0.8846153846153846, |
|
"mrr_at_3": 0.9092442645074225, |
|
"mrr_at_5": 0.9127867746288799, |
|
"mrr_at_10": 0.9150801683696421, |
|
"mrr_at_20": 0.9155341004025214, |
|
"mrr_at_50": 0.9157652608527469, |
|
"mrr_at_100": 0.9158356239262059, |
|
"naucs_at_1_max": 0.6077965938527005, |
|
"naucs_at_1_std": -0.034859963613718514, |
|
"naucs_at_1_diff1": 0.9382491207886892, |
|
"naucs_at_3_max": 0.7083430308645834, |
|
"naucs_at_3_std": 0.09173878193665044, |
|
"naucs_at_3_diff1": 0.941058293792039, |
|
"naucs_at_5_max": 0.828261285914772, |
|
"naucs_at_5_std": 0.25360809635439513, |
|
"naucs_at_5_diff1": 0.9374342352707031, |
|
"naucs_at_10_max": 0.8969865162710334, |
|
"naucs_at_10_std": 0.5367916133086486, |
|
"naucs_at_10_diff1": 0.9521757785748344, |
|
"naucs_at_20_max": 0.927259949634811, |
|
"naucs_at_20_std": 0.6312197158498455, |
|
"naucs_at_20_diff1": 0.9643814216187027, |
|
"naucs_at_50_max": 0.9129323639568211, |
|
"naucs_at_50_std": 0.5625809313845725, |
|
"naucs_at_50_diff1": 0.9564661819783937, |
|
"naucs_at_100_max": 0.9346992729676393, |
|
"naucs_at_100_std": 0.7480140525249407, |
|
"naucs_at_100_diff1": 0.9673496364838197 |
|
}, |
|
"syntheticDocQA_healthcare_industry_test": { |
|
"ndcg_at_1": 0.97, |
|
"ndcg_at_3": 0.98762, |
|
"ndcg_at_5": 0.98762, |
|
"ndcg_at_10": 0.98762, |
|
"ndcg_at_20": 0.98762, |
|
"ndcg_at_50": 0.98762, |
|
"ndcg_at_100": 0.98762, |
|
"map_at_1": 0.97, |
|
"map_at_3": 0.98333, |
|
"map_at_5": 0.98333, |
|
"map_at_10": 0.98333, |
|
"map_at_20": 0.98333, |
|
"map_at_50": 0.98333, |
|
"map_at_100": 0.98333, |
|
"recall_at_1": 0.97, |
|
"recall_at_3": 1.0, |
|
"recall_at_5": 1.0, |
|
"recall_at_10": 1.0, |
|
"recall_at_20": 1.0, |
|
"recall_at_50": 1.0, |
|
"recall_at_100": 1.0, |
|
"precision_at_1": 0.97, |
|
"precision_at_3": 0.33333, |
|
"precision_at_5": 0.2, |
|
"precision_at_10": 0.1, |
|
"precision_at_20": 0.05, |
|
"precision_at_50": 0.02, |
|
"precision_at_100": 0.01, |
|
"mrr_at_1": 0.97, |
|
"mrr_at_3": 0.9833333333333333, |
|
"mrr_at_5": 0.9833333333333333, |
|
"mrr_at_10": 0.9833333333333333, |
|
"mrr_at_20": 0.9833333333333333, |
|
"mrr_at_50": 0.9833333333333333, |
|
"mrr_at_100": 0.9833333333333333, |
|
"naucs_at_1_max": 0.8202614379084989, |
|
"naucs_at_1_std": -0.2198879551820713, |
|
"naucs_at_1_diff1": 0.9564270152505465, |
|
"naucs_at_3_max": 1.0, |
|
"naucs_at_3_std": 1.0, |
|
"naucs_at_3_diff1": 1.0, |
|
"naucs_at_5_max": 1.0, |
|
"naucs_at_5_std": 1.0, |
|
"naucs_at_5_diff1": 1.0, |
|
"naucs_at_10_max": 1.0, |
|
"naucs_at_10_std": 1.0, |
|
"naucs_at_10_diff1": 1.0, |
|
"naucs_at_20_max": 1.0, |
|
"naucs_at_20_std": 1.0, |
|
"naucs_at_20_diff1": 1.0, |
|
"naucs_at_50_max": null, |
|
"naucs_at_50_std": null, |
|
"naucs_at_50_diff1": null, |
|
"naucs_at_100_max": null, |
|
"naucs_at_100_std": null, |
|
"naucs_at_100_diff1": null |
|
}, |
|
"tabfquad_test_subsampled": { |
|
"ndcg_at_1": 0.83571, |
|
"ndcg_at_3": 0.88699, |
|
"ndcg_at_5": 0.89851, |
|
"ndcg_at_10": 0.90549, |
|
"ndcg_at_20": 0.91018, |
|
"ndcg_at_50": 0.9125, |
|
"ndcg_at_100": 0.9125, |
|
"map_at_1": 0.83571, |
|
"map_at_3": 0.875, |
|
"map_at_5": 0.88125, |
|
"map_at_10": 0.88416, |
|
"map_at_20": 0.88553, |
|
"map_at_50": 0.88598, |
|
"map_at_100": 0.88598, |
|
"recall_at_1": 0.83571, |
|
"recall_at_3": 0.92143, |
|
"recall_at_5": 0.95, |
|
"recall_at_10": 0.97143, |
|
"recall_at_20": 0.98929, |
|
"recall_at_50": 1.0, |
|
"recall_at_100": 1.0, |
|
"precision_at_1": 0.83571, |
|
"precision_at_3": 0.30714, |
|
"precision_at_5": 0.19, |
|
"precision_at_10": 0.09714, |
|
"precision_at_20": 0.04946, |
|
"precision_at_50": 0.02, |
|
"precision_at_100": 0.01, |
|
"mrr_at_1": 0.8392857142857143, |
|
"mrr_at_3": 0.8767857142857143, |
|
"mrr_at_5": 0.8830357142857143, |
|
"mrr_at_10": 0.8858517573696145, |
|
"mrr_at_20": 0.8872690305726021, |
|
"mrr_at_50": 0.8877252707609851, |
|
"mrr_at_100": 0.8877252707609851, |
|
"naucs_at_1_max": 0.4880524536858767, |
|
"naucs_at_1_std": 0.1966050571171859, |
|
"naucs_at_1_diff1": 0.9065802421597565, |
|
"naucs_at_3_max": 0.5683515830574649, |
|
"naucs_at_3_std": 0.33952975129445734, |
|
"naucs_at_3_diff1": 0.8935786435786452, |
|
"naucs_at_5_max": 0.7507669734560504, |
|
"naucs_at_5_std": 0.5118714152327615, |
|
"naucs_at_5_diff1": 0.9109643857543043, |
|
"naucs_at_10_max": 0.8150093370681609, |
|
"naucs_at_10_std": 0.5556722689075682, |
|
"naucs_at_10_diff1": 0.934640522875815, |
|
"naucs_at_20_max": 0.9128540305011011, |
|
"naucs_at_20_std": 0.807812013694365, |
|
"naucs_at_20_diff1": 0.9564270152505505, |
|
"naucs_at_50_max": 1.0, |
|
"naucs_at_50_std": 1.0, |
|
"naucs_at_50_diff1": 1.0, |
|
"naucs_at_100_max": 1.0, |
|
"naucs_at_100_std": 1.0, |
|
"naucs_at_100_diff1": 1.0 |
|
}, |
|
"syntheticDocQA_government_reports_test": { |
|
"ndcg_at_1": 0.91, |
|
"ndcg_at_3": 0.94393, |
|
"ndcg_at_5": 0.95254, |
|
"ndcg_at_10": 0.9561, |
|
"ndcg_at_20": 0.9561, |
|
"ndcg_at_50": 0.9561, |
|
"ndcg_at_100": 0.9561, |
|
"map_at_1": 0.91, |
|
"map_at_3": 0.935, |
|
"map_at_5": 0.94, |
|
"map_at_10": 0.94167, |
|
"map_at_20": 0.94167, |
|
"map_at_50": 0.94167, |
|
"map_at_100": 0.94167, |
|
"recall_at_1": 0.91, |
|
"recall_at_3": 0.97, |
|
"recall_at_5": 0.99, |
|
"recall_at_10": 1.0, |
|
"recall_at_20": 1.0, |
|
"recall_at_50": 1.0, |
|
"recall_at_100": 1.0, |
|
"precision_at_1": 0.91, |
|
"precision_at_3": 0.32333, |
|
"precision_at_5": 0.198, |
|
"precision_at_10": 0.1, |
|
"precision_at_20": 0.05, |
|
"precision_at_50": 0.02, |
|
"precision_at_100": 0.01, |
|
"mrr_at_1": 0.91, |
|
"mrr_at_3": 0.9383333333333332, |
|
"mrr_at_5": 0.9428333333333333, |
|
"mrr_at_10": 0.9428333333333333, |
|
"mrr_at_20": 0.9428333333333333, |
|
"mrr_at_50": 0.9428333333333333, |
|
"mrr_at_100": 0.9428333333333333, |
|
"naucs_at_1_max": 0.8246706089843343, |
|
"naucs_at_1_std": 0.4518103537711367, |
|
"naucs_at_1_diff1": 0.9564270152505436, |
|
"naucs_at_3_max": 0.7027699968876487, |
|
"naucs_at_3_std": 0.460939931528168, |
|
"naucs_at_3_diff1": 0.9564270152505466, |
|
"naucs_at_5_max": 0.5541549953314738, |
|
"naucs_at_5_std": -0.1713352007469681, |
|
"naucs_at_5_diff1": 1.0, |
|
"naucs_at_10_max": 1.0, |
|
"naucs_at_10_std": 1.0, |
|
"naucs_at_10_diff1": 1.0, |
|
"naucs_at_20_max": 1.0, |
|
"naucs_at_20_std": 1.0, |
|
"naucs_at_20_diff1": 1.0, |
|
"naucs_at_50_max": null, |
|
"naucs_at_50_std": null, |
|
"naucs_at_50_diff1": null, |
|
"naucs_at_100_max": null, |
|
"naucs_at_100_std": null, |
|
"naucs_at_100_diff1": null |
|
}, |
|
"shiftproject_test": { |
|
"ndcg_at_1": 0.73, |
|
"ndcg_at_3": 0.85095, |
|
"ndcg_at_5": 0.85526, |
|
"ndcg_at_10": 0.86174, |
|
"ndcg_at_20": 0.86703, |
|
"ndcg_at_50": 0.87134, |
|
"ndcg_at_100": 0.87134, |
|
"map_at_1": 0.73, |
|
"map_at_3": 0.82333, |
|
"map_at_5": 0.82583, |
|
"map_at_10": 0.82851, |
|
"map_at_20": 0.83009, |
|
"map_at_50": 0.83092, |
|
"map_at_100": 0.83092, |
|
"recall_at_1": 0.73, |
|
"recall_at_3": 0.93, |
|
"recall_at_5": 0.94, |
|
"recall_at_10": 0.96, |
|
"recall_at_20": 0.98, |
|
"recall_at_50": 1.0, |
|
"recall_at_100": 1.0, |
|
"precision_at_1": 0.73, |
|
"precision_at_3": 0.31, |
|
"precision_at_5": 0.188, |
|
"precision_at_10": 0.096, |
|
"precision_at_20": 0.049, |
|
"precision_at_50": 0.02, |
|
"precision_at_100": 0.01, |
|
"mrr_at_1": 0.75, |
|
"mrr_at_3": 0.8316666666666667, |
|
"mrr_at_5": 0.8341666666666667, |
|
"mrr_at_10": 0.8380238095238096, |
|
"mrr_at_20": 0.8386904761904762, |
|
"mrr_at_50": 0.8395600414078674, |
|
"mrr_at_100": 0.8395600414078674, |
|
"naucs_at_1_max": -0.0662092826672433, |
|
"naucs_at_1_std": -0.3356071477717276, |
|
"naucs_at_1_diff1": 0.7686999941024986, |
|
"naucs_at_3_max": -0.05822328931572497, |
|
"naucs_at_3_std": -0.2665732959850601, |
|
"naucs_at_3_diff1": 0.7789782579698524, |
|
"naucs_at_5_max": 0.22206660441954526, |
|
"naucs_at_5_std": -0.33146591970121353, |
|
"naucs_at_5_diff1": 0.8883442265795238, |
|
"naucs_at_10_max": 0.05240429505135564, |
|
"naucs_at_10_std": -0.4458450046685252, |
|
"naucs_at_10_diff1": 0.8651960784313721, |
|
"naucs_at_20_max": -0.5088702147525547, |
|
"naucs_at_20_std": -0.5929038281979383, |
|
"naucs_at_20_diff1": 0.9346405228758136, |
|
"naucs_at_50_max": null, |
|
"naucs_at_50_std": null, |
|
"naucs_at_50_diff1": null, |
|
"naucs_at_100_max": null, |
|
"naucs_at_100_std": null, |
|
"naucs_at_100_diff1": null |
|
}, |
|
"syntheticDocQA_artificial_intelligence_test": { |
|
"ndcg_at_1": 0.98, |
|
"ndcg_at_3": 0.99262, |
|
"ndcg_at_5": 0.99262, |
|
"ndcg_at_10": 0.99262, |
|
"ndcg_at_20": 0.99262, |
|
"ndcg_at_50": 0.99262, |
|
"ndcg_at_100": 0.99262, |
|
"map_at_1": 0.98, |
|
"map_at_3": 0.99, |
|
"map_at_5": 0.99, |
|
"map_at_10": 0.99, |
|
"map_at_20": 0.99, |
|
"map_at_50": 0.99, |
|
"map_at_100": 0.99, |
|
"recall_at_1": 0.98, |
|
"recall_at_3": 1.0, |
|
"recall_at_5": 1.0, |
|
"recall_at_10": 1.0, |
|
"recall_at_20": 1.0, |
|
"recall_at_50": 1.0, |
|
"recall_at_100": 1.0, |
|
"precision_at_1": 0.98, |
|
"precision_at_3": 0.33333, |
|
"precision_at_5": 0.2, |
|
"precision_at_10": 0.1, |
|
"precision_at_20": 0.05, |
|
"precision_at_50": 0.02, |
|
"precision_at_100": 0.01, |
|
"mrr_at_1": 0.98, |
|
"mrr_at_3": 0.99, |
|
"mrr_at_5": 0.99, |
|
"mrr_at_10": 0.99, |
|
"mrr_at_20": 0.99, |
|
"mrr_at_50": 0.99, |
|
"mrr_at_100": 0.99, |
|
"naucs_at_1_max": 0.3489729225023353, |
|
"naucs_at_1_std": -0.2987861811391249, |
|
"naucs_at_1_diff1": 1.0, |
|
"naucs_at_3_max": 1.0, |
|
"naucs_at_3_std": 1.0, |
|
"naucs_at_3_diff1": 1.0, |
|
"naucs_at_5_max": 1.0, |
|
"naucs_at_5_std": 1.0, |
|
"naucs_at_5_diff1": 1.0, |
|
"naucs_at_10_max": 1.0, |
|
"naucs_at_10_std": 1.0, |
|
"naucs_at_10_diff1": 1.0, |
|
"naucs_at_20_max": 1.0, |
|
"naucs_at_20_std": 1.0, |
|
"naucs_at_20_diff1": 1.0, |
|
"naucs_at_50_max": null, |
|
"naucs_at_50_std": null, |
|
"naucs_at_50_diff1": null, |
|
"naucs_at_100_max": null, |
|
"naucs_at_100_std": null, |
|
"naucs_at_100_diff1": null |
|
} |
|
} |
|
} |