{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.15, "eval_steps": 100, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.06993763389097322, "learning_rate": 1.2553691692674561e-06, "loss": 3.0703, "step": 2 }, { "epoch": 0.0, "grad_norm": 0.06153174752924976, "learning_rate": 2.5107383385349122e-06, "loss": 2.9844, "step": 4 }, { "epoch": 0.0, "grad_norm": 0.2222815841415942, "learning_rate": 3.245082227117844e-06, "loss": 2.1152, "step": 6 }, { "epoch": 0.0, "grad_norm": 0.0537359374559032, "learning_rate": 3.7661075078023677e-06, "loss": 2.9844, "step": 8 }, { "epoch": 0.0, "grad_norm": 0.06405006802699156, "learning_rate": 4.170246112844972e-06, "loss": 2.6562, "step": 10 }, { "epoch": 0.0, "grad_norm": 0.11637142063571428, "learning_rate": 4.5004513963853e-06, "loss": 2.4043, "step": 12 }, { "epoch": 0.0, "grad_norm": 0.1892893466719824, "learning_rate": 4.779635985609814e-06, "loss": 3.4062, "step": 14 }, { "epoch": 0.0, "grad_norm": 0.048251497571099866, "learning_rate": 5.0214766770698244e-06, "loss": 2.1211, "step": 16 }, { "epoch": 0.0, "grad_norm": 0.0568415678322659, "learning_rate": 5.234795284968231e-06, "loss": 2.1914, "step": 18 }, { "epoch": 0.0, "grad_norm": 0.200681152700164, "learning_rate": 5.425615282112428e-06, "loss": 3.1094, "step": 20 }, { "epoch": 0.0, "grad_norm": 0.18051216338180223, "learning_rate": 5.598232966493732e-06, "loss": 3.5078, "step": 22 }, { "epoch": 0.0, "grad_norm": 0.06093213542225355, "learning_rate": 5.755820565652757e-06, "loss": 2.1797, "step": 24 }, { "epoch": 0.0, "grad_norm": 0.05166025684541596, "learning_rate": 5.900787104154539e-06, "loss": 1.8945, "step": 26 }, { "epoch": 0.0, "grad_norm": 0.07724068655290238, "learning_rate": 6.03500515487727e-06, "loss": 2.3125, "step": 28 }, { "epoch": 0.0, "grad_norm": 0.10396538948031947, "learning_rate": 6.159959170695358e-06, "loss": 2.0352, "step": 30 }, { "epoch": 0.0, "grad_norm": 0.054259556614586625, "learning_rate": 6.27684584633728e-06, "loss": 1.6758, "step": 32 }, { "epoch": 0.0, "grad_norm": 0.07654097884702263, "learning_rate": 6.386644000699491e-06, "loss": 2.6172, "step": 34 }, { "epoch": 0.0, "grad_norm": 0.13452991659941343, "learning_rate": 6.490164454235688e-06, "loss": 2.0039, "step": 36 }, { "epoch": 0.0, "grad_norm": 0.0511568875605143, "learning_rate": 6.5880864029275e-06, "loss": 3.2227, "step": 38 }, { "epoch": 0.0, "grad_norm": 0.05027966184401755, "learning_rate": 6.680984451379884e-06, "loss": 1.9648, "step": 40 }, { "epoch": 0.0, "grad_norm": 0.05870045244699116, "learning_rate": 6.769349043460203e-06, "loss": 1.418, "step": 42 }, { "epoch": 0.0, "grad_norm": 0.06159022656905628, "learning_rate": 6.853602135761187e-06, "loss": 4.0156, "step": 44 }, { "epoch": 0.0, "grad_norm": 0.13273234613359977, "learning_rate": 6.934109384172617e-06, "loss": 3.2891, "step": 46 }, { "epoch": 0.0, "grad_norm": 0.052052813270225366, "learning_rate": 7.011189734920213e-06, "loss": 2.6172, "step": 48 }, { "epoch": 0.0, "grad_norm": 0.15961881341722708, "learning_rate": 7.085123056422486e-06, "loss": 4.6094, "step": 50 }, { "epoch": 0.0, "grad_norm": 0.049703982396482015, "learning_rate": 7.156156273421995e-06, "loss": 2.9766, "step": 52 }, { "epoch": 0.0, "grad_norm": 0.1779622827984218, "learning_rate": 7.224508342818619e-06, "loss": 2.7109, "step": 54 }, { "epoch": 0.0, "grad_norm": 0.05820561642082821, "learning_rate": 7.290374324144728e-06, "loss": 4.0156, "step": 56 }, { "epoch": 0.0, "grad_norm": 0.1367064129848971, "learning_rate": 7.3539287354378455e-06, "loss": 2.0254, "step": 58 }, { "epoch": 0.0, "grad_norm": 0.07025650563122704, "learning_rate": 7.415328339962814e-06, "loss": 3.0938, "step": 60 }, { "epoch": 0.0, "grad_norm": 0.07864152211515654, "learning_rate": 7.474714475825724e-06, "loss": 3.2656, "step": 62 }, { "epoch": 0.0, "grad_norm": 0.10434333055982575, "learning_rate": 7.532215015604735e-06, "loss": 2.5859, "step": 64 }, { "epoch": 0.0, "grad_norm": 0.08468608134478554, "learning_rate": 7.587946024344118e-06, "loss": 2.9844, "step": 66 }, { "epoch": 0.0, "grad_norm": 0.06122282169051712, "learning_rate": 7.642013169966947e-06, "loss": 2.5859, "step": 68 }, { "epoch": 0.0, "grad_norm": 0.0865492074104283, "learning_rate": 7.69451292918733e-06, "loss": 3.1172, "step": 70 }, { "epoch": 0.0, "grad_norm": 0.10794311555188178, "learning_rate": 7.745533623503144e-06, "loss": 1.6191, "step": 72 }, { "epoch": 0.0, "grad_norm": 0.1030752468383858, "learning_rate": 7.795156313214624e-06, "loss": 2.3516, "step": 74 }, { "epoch": 0.0, "grad_norm": 0.059585575196562775, "learning_rate": 7.843455572194956e-06, "loss": 3.1016, "step": 76 }, { "epoch": 0.0, "grad_norm": 0.06094201801741937, "learning_rate": 7.890500162004926e-06, "loss": 3.7188, "step": 78 }, { "epoch": 0.0, "grad_norm": 0.06172587189145012, "learning_rate": 7.93635362064734e-06, "loss": 3.2188, "step": 80 }, { "epoch": 0.0, "grad_norm": 0.05842479147840705, "learning_rate": 7.981074778612054e-06, "loss": 2.5859, "step": 82 }, { "epoch": 0.0, "grad_norm": 0.08713550699538668, "learning_rate": 8.024718212727658e-06, "loss": 4.25, "step": 84 }, { "epoch": 0.0, "grad_norm": 0.05385791652102987, "learning_rate": 8.067334646603105e-06, "loss": 2.1328, "step": 86 }, { "epoch": 0.0, "grad_norm": 0.061292113145551747, "learning_rate": 8.108971305028645e-06, "loss": 2.7227, "step": 88 }, { "epoch": 0.0, "grad_norm": 0.04357968658567487, "learning_rate": 8.149672228545746e-06, "loss": 2.7344, "step": 90 }, { "epoch": 0.0, "grad_norm": 0.0604204079926121, "learning_rate": 8.189478553440074e-06, "loss": 3.0781, "step": 92 }, { "epoch": 0.0, "grad_norm": 0.05658025854347226, "learning_rate": 8.228428761620285e-06, "loss": 2.6641, "step": 94 }, { "epoch": 0.0, "grad_norm": 0.048768682059982936, "learning_rate": 8.266558904187668e-06, "loss": 1.9219, "step": 96 }, { "epoch": 0.0, "grad_norm": 0.049735516778038245, "learning_rate": 8.303902801952174e-06, "loss": 3.8281, "step": 98 }, { "epoch": 0.01, "grad_norm": 0.05805351434989718, "learning_rate": 8.340492225689944e-06, "loss": 3.6328, "step": 100 }, { "epoch": 0.01, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.3359375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.2109375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.66924285888672, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 13.6882, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 4.676, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.073, "step": 100 }, { "epoch": 0.01, "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.59375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.2890625, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.484375, "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.540794372558594, "eval_specter_top15HN_validation.jsonl.gz_runtime": 3.573, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 17.912, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.28, "step": 100 }, { "epoch": 0.01, "eval_nq_top15HN_validation.jsonl.gz_acc1": 42.578125, "eval_nq_top15HN_validation.jsonl.gz_acc3": 88.0859375, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.87890625, "eval_nq_top15HN_validation.jsonl.gz_mrr": 66.20653533935547, "eval_nq_top15HN_validation.jsonl.gz_runtime": 12.4184, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.154, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.081, "step": 100 }, { "epoch": 0.01, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 38.4765625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 80.46875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.99609375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.372718811035156, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 15.9617, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.01, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.063, "step": 100 }, { "epoch": 0.01, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.40625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 84.9609375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 64.66063690185547, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.8761, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.389, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.084, "step": 100 }, { "epoch": 0.01, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.0703125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.53125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.05279541015625, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 12.0374, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.317, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.083, "step": 100 }, { "epoch": 0.01, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.0390625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 83.59375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.44341278076172, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 11.9454, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.358, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.084, "step": 100 }, { "epoch": 0.01, "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.40625, "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.28125, "eval_fever_top15HN_validation.jsonl.gz_loss": 3.015625, "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.08790588378906, "eval_fever_top15HN_validation.jsonl.gz_runtime": 17.8889, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.578, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.056, "step": 100 }, { "epoch": 0.01, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.5, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 79.1015625, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.84375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 60.242340087890625, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 6.6659, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 9.601, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.15, "step": 100 }, { "epoch": 0.01, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.5078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 92.578125, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 69.9529800415039, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.2652, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.743, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.121, "step": 100 }, { "epoch": 0.01, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.9453125, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.578125, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.82560729980469, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 6.2525, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 10.236, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.16, "step": 100 }, { "epoch": 0.01, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 32.8125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 73.828125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 55.7899284362793, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 15.3397, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.172, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.065, "step": 100 }, { "epoch": 0.01, "grad_norm": 0.0555762154078443, "learning_rate": 8.376357058549878e-06, "loss": 3.8516, "step": 102 }, { "epoch": 0.01, "grad_norm": 0.09096721847690384, "learning_rate": 8.41152544268945e-06, "loss": 3.0312, "step": 104 }, { "epoch": 0.01, "grad_norm": 0.09138371442473803, "learning_rate": 8.446023911942528e-06, "loss": 3.2656, "step": 106 }, { "epoch": 0.01, "grad_norm": 0.05511478622620939, "learning_rate": 8.479877512086076e-06, "loss": 3.6562, "step": 108 }, { "epoch": 0.01, "grad_norm": 0.08097997634746824, "learning_rate": 8.513109910071246e-06, "loss": 3.375, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.053618034298021276, "learning_rate": 8.545743493412182e-06, "loss": 2.5977, "step": 112 }, { "epoch": 0.01, "grad_norm": 0.048484076517767906, "learning_rate": 8.577799460777888e-06, "loss": 2.3438, "step": 114 }, { "epoch": 0.01, "grad_norm": 0.04762160994558407, "learning_rate": 8.609297904705302e-06, "loss": 2.5625, "step": 116 }, { "epoch": 0.01, "grad_norm": 0.056551775221808875, "learning_rate": 8.640257887241806e-06, "loss": 3.6562, "step": 118 }, { "epoch": 0.01, "grad_norm": 0.08650912291299868, "learning_rate": 8.67069750923027e-06, "loss": 3.0781, "step": 120 }, { "epoch": 0.01, "grad_norm": 0.05024595516846764, "learning_rate": 8.700633973867262e-06, "loss": 2.5312, "step": 122 }, { "epoch": 0.01, "grad_norm": 0.08139131343094162, "learning_rate": 8.73008364509318e-06, "loss": 2.9844, "step": 124 }, { "epoch": 0.01, "grad_norm": 0.05261944016574215, "learning_rate": 8.75906210131059e-06, "loss": 3.1719, "step": 126 }, { "epoch": 0.01, "grad_norm": 0.04990762703132796, "learning_rate": 8.787584184872193e-06, "loss": 2.8438, "step": 128 }, { "epoch": 0.01, "grad_norm": 0.11405115920346946, "learning_rate": 8.815664047732054e-06, "loss": 3.625, "step": 130 }, { "epoch": 0.01, "grad_norm": 0.046456081831816425, "learning_rate": 8.843315193611574e-06, "loss": 3.4141, "step": 132 }, { "epoch": 0.01, "grad_norm": 0.05027807580007265, "learning_rate": 8.870550516994724e-06, "loss": 2.5742, "step": 134 }, { "epoch": 0.01, "grad_norm": 0.04783365099303492, "learning_rate": 8.897382339234405e-06, "loss": 2.207, "step": 136 }, { "epoch": 0.01, "grad_norm": 0.053648309608052606, "learning_rate": 8.923822442023006e-06, "loss": 3.625, "step": 138 }, { "epoch": 0.01, "grad_norm": 0.05821291421967534, "learning_rate": 8.949882098454784e-06, "loss": 2.3711, "step": 140 }, { "epoch": 0.01, "grad_norm": 0.07947431557911265, "learning_rate": 8.975572101884981e-06, "loss": 3.0, "step": 142 }, { "epoch": 0.01, "grad_norm": 0.042870498622682544, "learning_rate": 9.0009027927706e-06, "loss": 3.3047, "step": 144 }, { "epoch": 0.01, "grad_norm": 0.04492287531029236, "learning_rate": 9.025884083659961e-06, "loss": 2.5508, "step": 146 }, { "epoch": 0.01, "grad_norm": 0.055349818184018997, "learning_rate": 9.05052548248208e-06, "loss": 2.0547, "step": 148 }, { "epoch": 0.01, "grad_norm": 0.06308661407367727, "learning_rate": 9.074836114272873e-06, "loss": 3.4922, "step": 150 }, { "epoch": 0.01, "grad_norm": 0.055429411592394985, "learning_rate": 9.098824741462414e-06, "loss": 3.8359, "step": 152 }, { "epoch": 0.01, "grad_norm": 0.04545788666314569, "learning_rate": 9.12249978283609e-06, "loss": 3.5547, "step": 154 }, { "epoch": 0.01, "grad_norm": 0.05213374911481201, "learning_rate": 9.145869331272382e-06, "loss": 1.8984, "step": 156 }, { "epoch": 0.01, "grad_norm": 0.0929787972975564, "learning_rate": 9.16894117035073e-06, "loss": 3.4375, "step": 158 }, { "epoch": 0.01, "grad_norm": 0.06586975877837985, "learning_rate": 9.191722789914796e-06, "loss": 3.2578, "step": 160 }, { "epoch": 0.01, "grad_norm": 0.07128694282933898, "learning_rate": 9.214221400669006e-06, "loss": 3.2266, "step": 162 }, { "epoch": 0.01, "grad_norm": 0.05663837642152596, "learning_rate": 9.23644394787951e-06, "loss": 2.9766, "step": 164 }, { "epoch": 0.01, "grad_norm": 0.04490447484833529, "learning_rate": 9.258397124244722e-06, "loss": 2.75, "step": 166 }, { "epoch": 0.01, "grad_norm": 0.05914922510919387, "learning_rate": 9.280087381995114e-06, "loss": 3.8125, "step": 168 }, { "epoch": 0.01, "grad_norm": 0.055595307384766375, "learning_rate": 9.301520944277006e-06, "loss": 2.3398, "step": 170 }, { "epoch": 0.01, "grad_norm": 0.04965775451157443, "learning_rate": 9.32270381587056e-06, "loss": 2.4844, "step": 172 }, { "epoch": 0.01, "grad_norm": 0.05284752010553776, "learning_rate": 9.343641793288234e-06, "loss": 2.6719, "step": 174 }, { "epoch": 0.01, "grad_norm": 0.19756340929876257, "learning_rate": 9.3643404742961e-06, "loss": 1.041, "step": 176 }, { "epoch": 0.01, "grad_norm": 0.045348920860207864, "learning_rate": 9.384805266897236e-06, "loss": 2.5625, "step": 178 }, { "epoch": 0.01, "grad_norm": 0.06276055132520378, "learning_rate": 9.405041397813202e-06, "loss": 2.3047, "step": 180 }, { "epoch": 0.01, "grad_norm": 0.058813238321584256, "learning_rate": 9.425053920496896e-06, "loss": 2.5117, "step": 182 }, { "epoch": 0.01, "grad_norm": 0.0661215147360088, "learning_rate": 9.44484772270753e-06, "loss": 3.5703, "step": 184 }, { "epoch": 0.01, "grad_norm": 0.05292887713455962, "learning_rate": 9.464427533676113e-06, "loss": 4.0938, "step": 186 }, { "epoch": 0.01, "grad_norm": 0.05263024221290531, "learning_rate": 9.483797930887741e-06, "loss": 2.457, "step": 188 }, { "epoch": 0.01, "grad_norm": 0.04471022826113325, "learning_rate": 9.502963346505015e-06, "loss": 2.7891, "step": 190 }, { "epoch": 0.01, "grad_norm": 0.08212123204504364, "learning_rate": 9.521928073455125e-06, "loss": 2.1836, "step": 192 }, { "epoch": 0.01, "grad_norm": 0.05038826441595468, "learning_rate": 9.540696271201526e-06, "loss": 1.582, "step": 194 }, { "epoch": 0.01, "grad_norm": 0.05778434920226073, "learning_rate": 9.559271971219628e-06, "loss": 3.6328, "step": 196 }, { "epoch": 0.01, "grad_norm": 0.04973184410313608, "learning_rate": 9.577659082194506e-06, "loss": 1.6289, "step": 198 }, { "epoch": 0.01, "grad_norm": 0.09038426203739623, "learning_rate": 9.595861394957398e-06, "loss": 3.7031, "step": 200 }, { "epoch": 0.01, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.53125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.796875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.8046875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.5916748046875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.5049, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.563, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, "step": 200 }, { "epoch": 0.01, "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 23.2421875, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.6689510345459, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.7618, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.174, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.362, "step": 200 }, { "epoch": 0.01, "eval_nq_top15HN_validation.jsonl.gz_acc1": 44.921875, "eval_nq_top15HN_validation.jsonl.gz_acc3": 92.7734375, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.8671875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 68.84564971923828, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8596, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.893, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 200 }, { "epoch": 0.01, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.8203125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.5703125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.984375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.992000579833984, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.9761, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.273, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.067, "step": 200 }, { "epoch": 0.01, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.796875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.3515625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.20314025878906, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8663, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.89, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 200 }, { "epoch": 0.01, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.0703125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.29474639892578, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.0218, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.807, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 200 }, { "epoch": 0.01, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 39.84375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 83.7890625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 62.934173583984375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.8548, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.896, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 200 }, { "epoch": 0.01, "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.3828125, "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.2578125, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.890625, "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.17569732666016, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.3741, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.909, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, "step": 200 }, { "epoch": 0.01, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 35.546875, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 76.171875, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83984375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 58.256744384765625, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.4017, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.848, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.185, "step": 200 }, { "epoch": 0.01, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.2890625, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 94.140625, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0625, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.64799499511719, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 5.7737, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 11.085, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.173, "step": 200 }, { "epoch": 0.01, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 42.96875, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 89.84375, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.7109375, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 67.71356964111328, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.1763, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.364, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.193, "step": 200 }, { "epoch": 0.01, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 27.734375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 64.84375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.86328125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 50.73663330078125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.1216, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.532, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.071, "step": 200 }, { "epoch": 0.01, "grad_norm": 0.05187297999516563, "learning_rate": 9.613882587176614e-06, "loss": 1.6211, "step": 202 }, { "epoch": 0.01, "grad_norm": 0.06054422157379995, "learning_rate": 9.631726227817333e-06, "loss": 2.4805, "step": 204 }, { "epoch": 0.01, "grad_norm": 0.05470661272122422, "learning_rate": 9.64939578138386e-06, "loss": 2.8594, "step": 206 }, { "epoch": 0.01, "grad_norm": 0.04252591798544973, "learning_rate": 9.666894611956906e-06, "loss": 2.2148, "step": 208 }, { "epoch": 0.01, "grad_norm": 0.055328731041222816, "learning_rate": 9.684225987037717e-06, "loss": 2.0742, "step": 210 }, { "epoch": 0.01, "grad_norm": 0.06429040482274337, "learning_rate": 9.701393081209986e-06, "loss": 1.2773, "step": 212 }, { "epoch": 0.01, "grad_norm": 0.04865171991818917, "learning_rate": 9.718398979629844e-06, "loss": 2.6875, "step": 214 }, { "epoch": 0.01, "grad_norm": 0.062086833306091785, "learning_rate": 9.735246681353532e-06, "loss": 3.6016, "step": 216 }, { "epoch": 0.01, "grad_norm": 0.055348962148643696, "learning_rate": 9.751939102511684e-06, "loss": 4.3203, "step": 218 }, { "epoch": 0.01, "grad_norm": 0.0654632532081427, "learning_rate": 9.768479079338704e-06, "loss": 2.6484, "step": 220 }, { "epoch": 0.01, "grad_norm": 0.056276097965491816, "learning_rate": 9.78486937106501e-06, "loss": 2.1543, "step": 222 }, { "epoch": 0.01, "grad_norm": 0.04384275761621124, "learning_rate": 9.801112662679638e-06, "loss": 2.9062, "step": 224 }, { "epoch": 0.01, "grad_norm": 0.10285480569491967, "learning_rate": 9.817211567569991e-06, "loss": 2.1309, "step": 226 }, { "epoch": 0.01, "grad_norm": 0.057563429339058884, "learning_rate": 9.833168630045344e-06, "loss": 1.3789, "step": 228 }, { "epoch": 0.01, "grad_norm": 0.04845892803303715, "learning_rate": 9.848986327750132e-06, "loss": 3.0078, "step": 230 }, { "epoch": 0.01, "grad_norm": 0.052079127807501846, "learning_rate": 9.864667073972758e-06, "loss": 1.8516, "step": 232 }, { "epoch": 0.01, "grad_norm": 0.0580523372207002, "learning_rate": 9.880213219855314e-06, "loss": 3.5938, "step": 234 }, { "epoch": 0.01, "grad_norm": 0.05520583701375494, "learning_rate": 9.895627056509262e-06, "loss": 1.8652, "step": 236 }, { "epoch": 0.01, "grad_norm": 0.04670165215553551, "learning_rate": 9.91091081704185e-06, "loss": 2.5469, "step": 238 }, { "epoch": 0.01, "grad_norm": 0.05111165944588646, "learning_rate": 9.926066678497726e-06, "loss": 2.5312, "step": 240 }, { "epoch": 0.01, "grad_norm": 0.045520169806954836, "learning_rate": 9.941096763720006e-06, "loss": 1.9609, "step": 242 }, { "epoch": 0.01, "grad_norm": 0.04443447929589807, "learning_rate": 9.956003143134718e-06, "loss": 1.7402, "step": 244 }, { "epoch": 0.01, "grad_norm": 0.0457533842247698, "learning_rate": 9.97078783646244e-06, "loss": 3.0859, "step": 246 }, { "epoch": 0.01, "grad_norm": 0.07183809693572032, "learning_rate": 9.985452814360637e-06, "loss": 2.1992, "step": 248 }, { "epoch": 0.01, "grad_norm": 0.04695659727754891, "learning_rate": 1e-05, "loss": 1.9609, "step": 250 }, { "epoch": 0.01, "grad_norm": 0.045207151805051436, "learning_rate": 9.999493670886077e-06, "loss": 2.9766, "step": 252 }, { "epoch": 0.01, "grad_norm": 0.05494287375989865, "learning_rate": 9.998481012658229e-06, "loss": 2.0117, "step": 254 }, { "epoch": 0.01, "grad_norm": 0.05104609803909529, "learning_rate": 9.99746835443038e-06, "loss": 4.0469, "step": 256 }, { "epoch": 0.01, "grad_norm": 0.10716340875144627, "learning_rate": 9.996455696202532e-06, "loss": 3.3438, "step": 258 }, { "epoch": 0.01, "grad_norm": 0.049844088116010224, "learning_rate": 9.995443037974684e-06, "loss": 2.3359, "step": 260 }, { "epoch": 0.01, "grad_norm": 0.06172126420851459, "learning_rate": 9.994430379746836e-06, "loss": 2.9883, "step": 262 }, { "epoch": 0.01, "grad_norm": 0.05462014145721095, "learning_rate": 9.993417721518988e-06, "loss": 3.3047, "step": 264 }, { "epoch": 0.01, "grad_norm": 0.1061552628130712, "learning_rate": 9.99240506329114e-06, "loss": 2.6836, "step": 266 }, { "epoch": 0.01, "grad_norm": 0.07377903646023005, "learning_rate": 9.991392405063292e-06, "loss": 2.293, "step": 268 }, { "epoch": 0.01, "grad_norm": 0.07509279074117713, "learning_rate": 9.990379746835444e-06, "loss": 2.9492, "step": 270 }, { "epoch": 0.01, "grad_norm": 0.05146566974838422, "learning_rate": 9.989367088607596e-06, "loss": 3.0703, "step": 272 }, { "epoch": 0.01, "grad_norm": 0.04868427544965678, "learning_rate": 9.988354430379748e-06, "loss": 3.1016, "step": 274 }, { "epoch": 0.01, "grad_norm": 0.05261970344084726, "learning_rate": 9.9873417721519e-06, "loss": 2.0859, "step": 276 }, { "epoch": 0.01, "grad_norm": 0.054466522746173085, "learning_rate": 9.986329113924052e-06, "loss": 2.5625, "step": 278 }, { "epoch": 0.01, "grad_norm": 0.06804701762945423, "learning_rate": 9.985316455696203e-06, "loss": 2.8203, "step": 280 }, { "epoch": 0.01, "grad_norm": 0.05772343231173062, "learning_rate": 9.984303797468355e-06, "loss": 2.3496, "step": 282 }, { "epoch": 0.01, "grad_norm": 0.053545347629839375, "learning_rate": 9.983291139240507e-06, "loss": 3.1797, "step": 284 }, { "epoch": 0.01, "grad_norm": 0.06325664571765138, "learning_rate": 9.98227848101266e-06, "loss": 3.1719, "step": 286 }, { "epoch": 0.01, "grad_norm": 0.06480821776360049, "learning_rate": 9.981265822784811e-06, "loss": 2.0859, "step": 288 }, { "epoch": 0.01, "grad_norm": 0.044109436690912, "learning_rate": 9.980253164556963e-06, "loss": 2.8516, "step": 290 }, { "epoch": 0.01, "grad_norm": 0.05720379192483985, "learning_rate": 9.979240506329115e-06, "loss": 1.6055, "step": 292 }, { "epoch": 0.01, "grad_norm": 0.05018627784925598, "learning_rate": 9.978227848101267e-06, "loss": 1.375, "step": 294 }, { "epoch": 0.01, "grad_norm": 0.04823007306027534, "learning_rate": 9.977215189873419e-06, "loss": 2.3477, "step": 296 }, { "epoch": 0.01, "grad_norm": 0.05029189238276064, "learning_rate": 9.97620253164557e-06, "loss": 1.9141, "step": 298 }, { "epoch": 0.01, "grad_norm": 0.053517034957577904, "learning_rate": 9.975189873417723e-06, "loss": 2.166, "step": 300 }, { "epoch": 0.01, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 45.5078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 92.3828125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.8046875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.2406005859375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3388, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.644, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, "step": 300 }, { "epoch": 0.01, "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.1796875, "eval_specter_top15HN_validation.jsonl.gz_acc3": 22.4609375, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.484375, "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.357505798339844, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6656, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.01, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.375, "step": 300 }, { "epoch": 0.01, "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.4609375, "eval_nq_top15HN_validation.jsonl.gz_acc3": 96.2890625, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, "eval_nq_top15HN_validation.jsonl.gz_mrr": 71.95645904541016, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8291, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.91, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 300 }, { "epoch": 0.01, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 38.0859375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 79.8828125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98828125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.024009704589844, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4815, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.419, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 300 }, { "epoch": 0.01, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.9921875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.71875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.74642944335938, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.995, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.821, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 300 }, { "epoch": 0.01, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.2890625, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 93.75, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.20555114746094, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.7704, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.942, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 300 }, { "epoch": 0.01, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 84.1796875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.0312271118164, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.3256, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.198, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.097, "step": 300 }, { "epoch": 0.01, "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.796875, "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.28125, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.640625, "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.14747619628906, "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.7985, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.051, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, "step": 300 }, { "epoch": 0.01, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 41.9921875, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 86.328125, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.81640625, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 65.693603515625, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.4009, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.85, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.185, "step": 300 }, { "epoch": 0.01, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.8984375, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 93.5546875, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.1675033569336, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 10.2559, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.24, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.098, "step": 300 }, { "epoch": 0.01, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.53125, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.9921875, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.6953125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.58917999267578, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.8726, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.135, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.205, "step": 300 }, { "epoch": 0.01, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 32.6171875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 71.6796875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.84765625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 55.05112075805664, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.0688, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.549, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.071, "step": 300 }, { "epoch": 0.02, "grad_norm": 0.05220067643814575, "learning_rate": 9.974177215189874e-06, "loss": 1.0488, "step": 302 }, { "epoch": 0.02, "grad_norm": 0.04761786468609522, "learning_rate": 9.973164556962026e-06, "loss": 2.7422, "step": 304 }, { "epoch": 0.02, "grad_norm": 0.06625101584092043, "learning_rate": 9.972151898734178e-06, "loss": 3.7188, "step": 306 }, { "epoch": 0.02, "grad_norm": 0.030759557769197126, "learning_rate": 9.97113924050633e-06, "loss": 3.332, "step": 308 }, { "epoch": 0.02, "grad_norm": 0.09398290889068939, "learning_rate": 9.970126582278482e-06, "loss": 1.9922, "step": 310 }, { "epoch": 0.02, "grad_norm": 0.053608723396326066, "learning_rate": 9.969113924050634e-06, "loss": 2.9375, "step": 312 }, { "epoch": 0.02, "grad_norm": 0.061954101693703315, "learning_rate": 9.968101265822786e-06, "loss": 3.1172, "step": 314 }, { "epoch": 0.02, "grad_norm": 0.09075162197271411, "learning_rate": 9.967088607594938e-06, "loss": 3.0742, "step": 316 }, { "epoch": 0.02, "grad_norm": 0.05736468842676422, "learning_rate": 9.966075949367088e-06, "loss": 3.2969, "step": 318 }, { "epoch": 0.02, "grad_norm": 0.05043427744527494, "learning_rate": 9.965063291139242e-06, "loss": 2.1602, "step": 320 }, { "epoch": 0.02, "grad_norm": 0.05239520008604763, "learning_rate": 9.964050632911394e-06, "loss": 1.9922, "step": 322 }, { "epoch": 0.02, "grad_norm": 0.04720087325304379, "learning_rate": 9.963037974683545e-06, "loss": 2.668, "step": 324 }, { "epoch": 0.02, "grad_norm": 0.04631212913230197, "learning_rate": 9.962025316455697e-06, "loss": 3.5312, "step": 326 }, { "epoch": 0.02, "grad_norm": 0.0651010365528417, "learning_rate": 9.96101265822785e-06, "loss": 3.5859, "step": 328 }, { "epoch": 0.02, "grad_norm": 0.05901109953694509, "learning_rate": 9.960000000000001e-06, "loss": 2.3125, "step": 330 }, { "epoch": 0.02, "grad_norm": 0.05452453224180663, "learning_rate": 9.958987341772153e-06, "loss": 2.1113, "step": 332 }, { "epoch": 0.02, "grad_norm": 0.040191372325169936, "learning_rate": 9.957974683544305e-06, "loss": 3.5703, "step": 334 }, { "epoch": 0.02, "grad_norm": 0.0780827914897579, "learning_rate": 9.956962025316457e-06, "loss": 2.4844, "step": 336 }, { "epoch": 0.02, "grad_norm": 0.04856945184709343, "learning_rate": 9.955949367088609e-06, "loss": 4.2422, "step": 338 }, { "epoch": 0.02, "grad_norm": 0.057463653524251264, "learning_rate": 9.95493670886076e-06, "loss": 3.5547, "step": 340 }, { "epoch": 0.02, "grad_norm": 0.05588622681586604, "learning_rate": 9.953924050632913e-06, "loss": 3.4297, "step": 342 }, { "epoch": 0.02, "grad_norm": 0.05218677183317097, "learning_rate": 9.952911392405065e-06, "loss": 3.1797, "step": 344 }, { "epoch": 0.02, "grad_norm": 0.06249090031601202, "learning_rate": 9.951898734177215e-06, "loss": 3.2344, "step": 346 }, { "epoch": 0.02, "grad_norm": 0.05314294944842914, "learning_rate": 9.950886075949367e-06, "loss": 2.5938, "step": 348 }, { "epoch": 0.02, "grad_norm": 0.096944124609397, "learning_rate": 9.94987341772152e-06, "loss": 2.5469, "step": 350 }, { "epoch": 0.02, "grad_norm": 0.06123597867106346, "learning_rate": 9.948860759493672e-06, "loss": 2.375, "step": 352 }, { "epoch": 0.02, "grad_norm": 0.058049734216535656, "learning_rate": 9.947848101265824e-06, "loss": 3.5234, "step": 354 }, { "epoch": 0.02, "grad_norm": 0.04927350529384349, "learning_rate": 9.946835443037976e-06, "loss": 3.1016, "step": 356 }, { "epoch": 0.02, "grad_norm": 0.08193739278667748, "learning_rate": 9.945822784810128e-06, "loss": 2.043, "step": 358 }, { "epoch": 0.02, "grad_norm": 0.04891879246910809, "learning_rate": 9.94481012658228e-06, "loss": 2.6641, "step": 360 }, { "epoch": 0.02, "grad_norm": 0.05658411010001912, "learning_rate": 9.943797468354432e-06, "loss": 2.0156, "step": 362 }, { "epoch": 0.02, "grad_norm": 0.06126680664799663, "learning_rate": 9.942784810126584e-06, "loss": 3.3047, "step": 364 }, { "epoch": 0.02, "grad_norm": 0.042300489786256924, "learning_rate": 9.941772151898736e-06, "loss": 3.0312, "step": 366 }, { "epoch": 0.02, "grad_norm": 0.043547415767791385, "learning_rate": 9.940759493670887e-06, "loss": 3.25, "step": 368 }, { "epoch": 0.02, "grad_norm": 0.046855612063298045, "learning_rate": 9.93974683544304e-06, "loss": 1.8672, "step": 370 }, { "epoch": 0.02, "grad_norm": 0.04977376295679974, "learning_rate": 9.93873417721519e-06, "loss": 2.6406, "step": 372 }, { "epoch": 0.02, "grad_norm": 0.04732396273321346, "learning_rate": 9.937721518987341e-06, "loss": 1.6602, "step": 374 }, { "epoch": 0.02, "grad_norm": 0.048938423986438856, "learning_rate": 9.936708860759493e-06, "loss": 2.3281, "step": 376 }, { "epoch": 0.02, "grad_norm": 0.04944323057700011, "learning_rate": 9.935696202531645e-06, "loss": 2.6289, "step": 378 }, { "epoch": 0.02, "grad_norm": 0.048255186414602136, "learning_rate": 9.934683544303799e-06, "loss": 2.625, "step": 380 }, { "epoch": 0.02, "grad_norm": 0.041219753976543594, "learning_rate": 9.93367088607595e-06, "loss": 4.0547, "step": 382 }, { "epoch": 0.02, "grad_norm": 0.047695318412624185, "learning_rate": 9.932658227848103e-06, "loss": 1.873, "step": 384 }, { "epoch": 0.02, "grad_norm": 0.04174272882598536, "learning_rate": 9.931645569620255e-06, "loss": 3.5, "step": 386 }, { "epoch": 0.02, "grad_norm": 0.05577264201105512, "learning_rate": 9.930632911392407e-06, "loss": 3.5391, "step": 388 }, { "epoch": 0.02, "grad_norm": 0.078976293555083, "learning_rate": 9.929620253164558e-06, "loss": 3.0703, "step": 390 }, { "epoch": 0.02, "grad_norm": 0.05359446077111816, "learning_rate": 9.92860759493671e-06, "loss": 2.7891, "step": 392 }, { "epoch": 0.02, "grad_norm": 0.058576859582871475, "learning_rate": 9.927594936708862e-06, "loss": 3.2422, "step": 394 }, { "epoch": 0.02, "grad_norm": 0.05182847786102723, "learning_rate": 9.926582278481014e-06, "loss": 2.1953, "step": 396 }, { "epoch": 0.02, "grad_norm": 0.05513545900717149, "learning_rate": 9.925569620253164e-06, "loss": 2.9609, "step": 398 }, { "epoch": 0.02, "grad_norm": 0.04759815076884959, "learning_rate": 9.924556962025316e-06, "loss": 3.6016, "step": 400 }, { "epoch": 0.02, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 43.5546875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 90.8203125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.53609466552734, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2086, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.71, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 400 }, { "epoch": 0.02, "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.1796875, "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.484375, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.657047271728516, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.8575, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 22.398, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.35, "step": 400 }, { "epoch": 0.02, "eval_nq_top15HN_validation.jsonl.gz_acc1": 43.1640625, "eval_nq_top15HN_validation.jsonl.gz_acc3": 89.0625, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.87890625, "eval_nq_top15HN_validation.jsonl.gz_mrr": 66.59227752685547, "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.1426, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.744, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.09, "step": 400 }, { "epoch": 0.02, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 41.2109375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.5703125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98046875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 64.26415252685547, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.633, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.374, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.068, "step": 400 }, { "epoch": 0.02, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.40625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 84.9609375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.21744537353516, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.1982, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.715, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 400 }, { "epoch": 0.02, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.3359375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.03494262695312, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.914, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.864, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 400 }, { "epoch": 0.02, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.40625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.328125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.41943359375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.6481, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.01, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.094, "step": 400 }, { "epoch": 0.02, "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.578125, "eval_fever_top15HN_validation.jsonl.gz_acc3": 90.234375, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.765625, "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.61365509033203, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.386, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.906, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, "step": 400 }, { "epoch": 0.02, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.890625, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 80.078125, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.828125, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 60.95426940917969, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.2239, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.251, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.191, "step": 400 }, { "epoch": 0.02, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.8984375, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 93.359375, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.48290252685547, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 9.3648, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.834, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.107, "step": 400 }, { "epoch": 0.02, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.1640625, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.2109375, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.15265655517578, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.2019, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.303, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.192, "step": 400 }, { "epoch": 0.02, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 33.7890625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 75.0, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 57.39921569824219, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3277, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.467, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 400 }, { "epoch": 0.02, "grad_norm": 0.07222205536882423, "learning_rate": 9.923544303797468e-06, "loss": 1.3027, "step": 402 }, { "epoch": 0.02, "grad_norm": 0.15917535202581484, "learning_rate": 9.92253164556962e-06, "loss": 2.6562, "step": 404 }, { "epoch": 0.02, "grad_norm": 0.07928809201876366, "learning_rate": 9.921518987341772e-06, "loss": 1.5039, "step": 406 }, { "epoch": 0.02, "grad_norm": 0.0471266705915067, "learning_rate": 9.920506329113924e-06, "loss": 2.5078, "step": 408 }, { "epoch": 0.02, "grad_norm": 0.04866293621489947, "learning_rate": 9.919493670886078e-06, "loss": 2.875, "step": 410 }, { "epoch": 0.02, "grad_norm": 0.045016903500605984, "learning_rate": 9.91848101265823e-06, "loss": 1.7422, "step": 412 }, { "epoch": 0.02, "grad_norm": 0.0550550579066975, "learning_rate": 9.917468354430381e-06, "loss": 2.0195, "step": 414 }, { "epoch": 0.02, "grad_norm": 0.039185639839193286, "learning_rate": 9.916455696202533e-06, "loss": 2.2305, "step": 416 }, { "epoch": 0.02, "grad_norm": 0.06433655210455046, "learning_rate": 9.915443037974685e-06, "loss": 2.1875, "step": 418 }, { "epoch": 0.02, "grad_norm": 0.04953175680987281, "learning_rate": 9.914430379746837e-06, "loss": 2.082, "step": 420 }, { "epoch": 0.02, "grad_norm": 0.050612062350781264, "learning_rate": 9.913417721518989e-06, "loss": 1.3594, "step": 422 }, { "epoch": 0.02, "grad_norm": 0.04995271945096085, "learning_rate": 9.912405063291141e-06, "loss": 2.332, "step": 424 }, { "epoch": 0.02, "grad_norm": 0.04739567789158815, "learning_rate": 9.911392405063291e-06, "loss": 2.2227, "step": 426 }, { "epoch": 0.02, "grad_norm": 0.048251034334719445, "learning_rate": 9.910379746835443e-06, "loss": 1.8066, "step": 428 }, { "epoch": 0.02, "grad_norm": 0.062472987670653804, "learning_rate": 9.909367088607595e-06, "loss": 3.6719, "step": 430 }, { "epoch": 0.02, "grad_norm": 0.054186276016001844, "learning_rate": 9.908354430379747e-06, "loss": 2.2422, "step": 432 }, { "epoch": 0.02, "grad_norm": 0.07225226588103273, "learning_rate": 9.907341772151899e-06, "loss": 2.1523, "step": 434 }, { "epoch": 0.02, "grad_norm": 0.061083982011778226, "learning_rate": 9.90632911392405e-06, "loss": 2.7578, "step": 436 }, { "epoch": 0.02, "grad_norm": 0.059831340507868036, "learning_rate": 9.905316455696203e-06, "loss": 3.8359, "step": 438 }, { "epoch": 0.02, "grad_norm": 0.05045156333502398, "learning_rate": 9.904303797468356e-06, "loss": 1.6797, "step": 440 }, { "epoch": 0.02, "grad_norm": 0.046842201003395235, "learning_rate": 9.903291139240508e-06, "loss": 3.4922, "step": 442 }, { "epoch": 0.02, "grad_norm": 0.05029184145728825, "learning_rate": 9.90227848101266e-06, "loss": 3.0625, "step": 444 }, { "epoch": 0.02, "grad_norm": 0.07671286356421637, "learning_rate": 9.901265822784812e-06, "loss": 0.8242, "step": 446 }, { "epoch": 0.02, "grad_norm": 0.04915808295544781, "learning_rate": 9.900253164556964e-06, "loss": 3.0078, "step": 448 }, { "epoch": 0.02, "grad_norm": 0.080096966573764, "learning_rate": 9.899240506329116e-06, "loss": 3.6484, "step": 450 }, { "epoch": 0.02, "grad_norm": 0.058943602067565375, "learning_rate": 9.898227848101266e-06, "loss": 2.5898, "step": 452 }, { "epoch": 0.02, "grad_norm": 0.05135092632013379, "learning_rate": 9.897215189873418e-06, "loss": 2.9062, "step": 454 }, { "epoch": 0.02, "grad_norm": 0.06148651316849905, "learning_rate": 9.89620253164557e-06, "loss": 2.8867, "step": 456 }, { "epoch": 0.02, "grad_norm": 0.04512943900335472, "learning_rate": 9.895189873417722e-06, "loss": 2.75, "step": 458 }, { "epoch": 0.02, "grad_norm": 0.07181862208600082, "learning_rate": 9.894177215189874e-06, "loss": 3.0547, "step": 460 }, { "epoch": 0.02, "grad_norm": 0.06373905049284342, "learning_rate": 9.893164556962025e-06, "loss": 2.418, "step": 462 }, { "epoch": 0.02, "grad_norm": 0.08387698193619678, "learning_rate": 9.892151898734177e-06, "loss": 2.1758, "step": 464 }, { "epoch": 0.02, "grad_norm": 0.05084833656375217, "learning_rate": 9.89113924050633e-06, "loss": 2.5781, "step": 466 }, { "epoch": 0.02, "grad_norm": 0.05234011096545917, "learning_rate": 9.890126582278481e-06, "loss": 1.6719, "step": 468 }, { "epoch": 0.02, "grad_norm": 0.07821649022272881, "learning_rate": 9.889113924050635e-06, "loss": 1.7852, "step": 470 }, { "epoch": 0.02, "grad_norm": 0.059443530418835516, "learning_rate": 9.888101265822787e-06, "loss": 2.7422, "step": 472 }, { "epoch": 0.02, "grad_norm": 0.061142752223246014, "learning_rate": 9.887088607594939e-06, "loss": 3.3164, "step": 474 }, { "epoch": 0.02, "grad_norm": 0.07359821255430475, "learning_rate": 9.88607594936709e-06, "loss": 2.8906, "step": 476 }, { "epoch": 0.02, "grad_norm": 0.054895938291319654, "learning_rate": 9.88506329113924e-06, "loss": 0.9883, "step": 478 }, { "epoch": 0.02, "grad_norm": 0.045605934761492745, "learning_rate": 9.884050632911393e-06, "loss": 3.8203, "step": 480 }, { "epoch": 0.02, "grad_norm": 0.06526291645439951, "learning_rate": 9.883037974683545e-06, "loss": 3.6172, "step": 482 }, { "epoch": 0.02, "grad_norm": 0.06413356717781271, "learning_rate": 9.882025316455696e-06, "loss": 1.957, "step": 484 }, { "epoch": 0.02, "grad_norm": 0.053544808439294274, "learning_rate": 9.881012658227848e-06, "loss": 1.5898, "step": 486 }, { "epoch": 0.02, "grad_norm": 0.057347606537699075, "learning_rate": 9.88e-06, "loss": 2.5859, "step": 488 }, { "epoch": 0.02, "grad_norm": 0.060150115517773294, "learning_rate": 9.878987341772152e-06, "loss": 1.9141, "step": 490 }, { "epoch": 0.02, "grad_norm": 0.04980395120188664, "learning_rate": 9.877974683544304e-06, "loss": 3.1719, "step": 492 }, { "epoch": 0.02, "grad_norm": 0.05041464419822217, "learning_rate": 9.876962025316456e-06, "loss": 2.5312, "step": 494 }, { "epoch": 0.02, "grad_norm": 0.05121478836630337, "learning_rate": 9.875949367088608e-06, "loss": 3.25, "step": 496 }, { "epoch": 0.02, "grad_norm": 0.07952860073200034, "learning_rate": 9.87493670886076e-06, "loss": 2.7812, "step": 498 }, { "epoch": 0.03, "grad_norm": 0.05042393538929106, "learning_rate": 9.873924050632913e-06, "loss": 3.6094, "step": 500 }, { "epoch": 0.03, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 45.1171875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.5546875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 70.9421615600586, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2816, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.673, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 500 }, { "epoch": 0.03, "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.59375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.2890625, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.163715362548828, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.7785, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.034, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.36, "step": 500 }, { "epoch": 0.03, "eval_nq_top15HN_validation.jsonl.gz_acc1": 43.1640625, "eval_nq_top15HN_validation.jsonl.gz_acc3": 89.6484375, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 67.29576110839844, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.9868, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.825, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 500 }, { "epoch": 0.03, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 37.5, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 79.1015625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 1.0078125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 60.74357986450195, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.7402, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.342, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.068, "step": 500 }, { "epoch": 0.03, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.96875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 87.3046875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 66.07913970947266, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.0179, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.809, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 500 }, { "epoch": 0.03, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.484375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 93.9453125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.80879211425781, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.8716, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.887, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 500 }, { "epoch": 0.03, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.40625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.74609375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.67704010009766, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.6862, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.989, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.094, "step": 500 }, { "epoch": 0.03, "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.625, "eval_fever_top15HN_validation.jsonl.gz_acc3": 87.890625, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.65625, "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.1350326538086, "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.7505, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.063, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, "step": 500 }, { "epoch": 0.03, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 40.4296875, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 84.375, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8203125, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 64.05941009521484, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.1597, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.404, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.194, "step": 500 }, { "epoch": 0.03, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.484375, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 94.140625, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0703125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 71.08375549316406, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 9.3563, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.84, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.107, "step": 500 }, { "epoch": 0.03, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.3359375, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 93.359375, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 70.08747100830078, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.9513, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.926, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.202, "step": 500 }, { "epoch": 0.03, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 32.03125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 70.8984375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.63041687011719, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2012, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.507, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 500 }, { "epoch": 0.03, "grad_norm": 0.052086814624947436, "learning_rate": 9.872911392405065e-06, "loss": 2.5898, "step": 502 }, { "epoch": 0.03, "grad_norm": 0.1399877800142533, "learning_rate": 9.871898734177216e-06, "loss": 3.0859, "step": 504 }, { "epoch": 0.03, "grad_norm": 0.0532839732473988, "learning_rate": 9.870886075949367e-06, "loss": 2.2617, "step": 506 }, { "epoch": 0.03, "grad_norm": 0.15155453931410987, "learning_rate": 9.86987341772152e-06, "loss": 1.0664, "step": 508 }, { "epoch": 0.03, "grad_norm": 0.07651479570528692, "learning_rate": 9.868860759493671e-06, "loss": 2.2188, "step": 510 }, { "epoch": 0.03, "grad_norm": 0.07446787145246864, "learning_rate": 9.867848101265823e-06, "loss": 1.6797, "step": 512 }, { "epoch": 0.03, "grad_norm": 0.048532115622284516, "learning_rate": 9.866835443037975e-06, "loss": 2.5898, "step": 514 }, { "epoch": 0.03, "grad_norm": 0.0636538158151717, "learning_rate": 9.865822784810127e-06, "loss": 3.6484, "step": 516 }, { "epoch": 0.03, "grad_norm": 0.050666875139838745, "learning_rate": 9.864810126582279e-06, "loss": 1.0391, "step": 518 }, { "epoch": 0.03, "grad_norm": 0.05095008833138064, "learning_rate": 9.86379746835443e-06, "loss": 1.6992, "step": 520 }, { "epoch": 0.03, "grad_norm": 0.057319866044410354, "learning_rate": 9.862784810126583e-06, "loss": 2.1484, "step": 522 }, { "epoch": 0.03, "grad_norm": 0.05464334493499168, "learning_rate": 9.861772151898735e-06, "loss": 2.6172, "step": 524 }, { "epoch": 0.03, "grad_norm": 0.04411594193402404, "learning_rate": 9.860759493670887e-06, "loss": 2.8984, "step": 526 }, { "epoch": 0.03, "grad_norm": 0.07936098978533161, "learning_rate": 9.859746835443038e-06, "loss": 1.7305, "step": 528 }, { "epoch": 0.03, "grad_norm": 0.09236852220817833, "learning_rate": 9.858734177215192e-06, "loss": 2.1875, "step": 530 }, { "epoch": 0.03, "grad_norm": 0.07824501713336816, "learning_rate": 9.857721518987342e-06, "loss": 3.4297, "step": 532 }, { "epoch": 0.03, "grad_norm": 0.1470976833188794, "learning_rate": 9.856708860759494e-06, "loss": 2.5215, "step": 534 }, { "epoch": 0.03, "grad_norm": 0.04917188752231493, "learning_rate": 9.855696202531646e-06, "loss": 2.6484, "step": 536 }, { "epoch": 0.03, "grad_norm": 0.0827307002464502, "learning_rate": 9.854683544303798e-06, "loss": 2.1562, "step": 538 }, { "epoch": 0.03, "grad_norm": 0.08666793122490411, "learning_rate": 9.85367088607595e-06, "loss": 1.2793, "step": 540 }, { "epoch": 0.03, "grad_norm": 0.09175616234034045, "learning_rate": 9.852658227848102e-06, "loss": 1.8516, "step": 542 }, { "epoch": 0.03, "grad_norm": 0.046371319373000215, "learning_rate": 9.851645569620254e-06, "loss": 2.5625, "step": 544 }, { "epoch": 0.03, "grad_norm": 0.06035931438672101, "learning_rate": 9.850632911392406e-06, "loss": 2.2695, "step": 546 }, { "epoch": 0.03, "grad_norm": 0.04850263350227715, "learning_rate": 9.849620253164558e-06, "loss": 2.5547, "step": 548 }, { "epoch": 0.03, "grad_norm": 0.07522083134321193, "learning_rate": 9.84860759493671e-06, "loss": 3.4922, "step": 550 }, { "epoch": 0.03, "grad_norm": 0.06045883646056416, "learning_rate": 9.847594936708861e-06, "loss": 2.7852, "step": 552 }, { "epoch": 0.03, "grad_norm": 0.05489802073459884, "learning_rate": 9.846582278481013e-06, "loss": 2.6211, "step": 554 }, { "epoch": 0.03, "grad_norm": 0.06095202697702689, "learning_rate": 9.845569620253165e-06, "loss": 2.8984, "step": 556 }, { "epoch": 0.03, "grad_norm": 0.04973758122191133, "learning_rate": 9.844556962025317e-06, "loss": 1.791, "step": 558 }, { "epoch": 0.03, "grad_norm": 0.13716884502493287, "learning_rate": 9.843544303797469e-06, "loss": 3.3125, "step": 560 }, { "epoch": 0.03, "grad_norm": 0.04949379404632409, "learning_rate": 9.842531645569621e-06, "loss": 2.2852, "step": 562 }, { "epoch": 0.03, "grad_norm": 0.051544756346996144, "learning_rate": 9.841518987341773e-06, "loss": 1.6953, "step": 564 }, { "epoch": 0.03, "grad_norm": 0.056998301240872185, "learning_rate": 9.840506329113925e-06, "loss": 3.4766, "step": 566 }, { "epoch": 0.03, "grad_norm": 0.05953859829543366, "learning_rate": 9.839493670886077e-06, "loss": 2.1992, "step": 568 }, { "epoch": 0.03, "grad_norm": 0.05423558793405452, "learning_rate": 9.838481012658229e-06, "loss": 1.0488, "step": 570 }, { "epoch": 0.03, "grad_norm": 0.05084089297876364, "learning_rate": 9.83746835443038e-06, "loss": 2.9531, "step": 572 }, { "epoch": 0.03, "grad_norm": 0.0703427693327456, "learning_rate": 9.836455696202532e-06, "loss": 2.4375, "step": 574 }, { "epoch": 0.03, "grad_norm": 0.06170662786551502, "learning_rate": 9.835443037974684e-06, "loss": 3.2891, "step": 576 }, { "epoch": 0.03, "grad_norm": 0.08852110535169934, "learning_rate": 9.834430379746836e-06, "loss": 2.625, "step": 578 }, { "epoch": 0.03, "grad_norm": 0.10241258632199284, "learning_rate": 9.833417721518988e-06, "loss": 1.6133, "step": 580 }, { "epoch": 0.03, "grad_norm": 0.056497885218087136, "learning_rate": 9.83240506329114e-06, "loss": 2.3867, "step": 582 }, { "epoch": 0.03, "grad_norm": 0.053611763514035925, "learning_rate": 9.831392405063292e-06, "loss": 2.1406, "step": 584 }, { "epoch": 0.03, "grad_norm": 0.044289570354148024, "learning_rate": 9.830379746835444e-06, "loss": 2.8125, "step": 586 }, { "epoch": 0.03, "grad_norm": 0.051724720218463194, "learning_rate": 9.829367088607596e-06, "loss": 2.3438, "step": 588 }, { "epoch": 0.03, "grad_norm": 0.043367256971020826, "learning_rate": 9.828354430379748e-06, "loss": 2.2695, "step": 590 }, { "epoch": 0.03, "grad_norm": 0.08974582061892053, "learning_rate": 9.8273417721519e-06, "loss": 2.5742, "step": 592 }, { "epoch": 0.03, "grad_norm": 0.050289857638527925, "learning_rate": 9.826329113924051e-06, "loss": 1.0898, "step": 594 }, { "epoch": 0.03, "grad_norm": 0.04624382395844746, "learning_rate": 9.825316455696203e-06, "loss": 2.1914, "step": 596 }, { "epoch": 0.03, "grad_norm": 0.0413793982291362, "learning_rate": 9.824303797468355e-06, "loss": 2.9219, "step": 598 }, { "epoch": 0.03, "grad_norm": 0.053437477503598924, "learning_rate": 9.823291139240507e-06, "loss": 2.3242, "step": 600 }, { "epoch": 0.03, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 46.09375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.9453125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.8046875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.61688995361328, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.248, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.69, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 600 }, { "epoch": 0.03, "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.9609375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 24.21875, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4609375, "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.876731872558594, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6765, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.912, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.374, "step": 600 }, { "epoch": 0.03, "eval_nq_top15HN_validation.jsonl.gz_acc1": 45.5078125, "eval_nq_top15HN_validation.jsonl.gz_acc3": 93.9453125, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, "eval_nq_top15HN_validation.jsonl.gz_mrr": 69.94965362548828, "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.0773, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.778, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.09, "step": 600 }, { "epoch": 0.03, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.6484375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 83.3984375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.984375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 62.75840377807617, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5581, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.396, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 600 }, { "epoch": 0.03, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.40625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 84.1796875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 63.93449020385742, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8983, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.872, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 600 }, { "epoch": 0.03, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.921875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.94852447509766, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.028, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.803, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 600 }, { "epoch": 0.03, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.15625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.46388244628906, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.6996, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.982, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 600 }, { "epoch": 0.03, "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.8203125, "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.0859375, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.4375, "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.77207946777344, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.2949, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.928, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, "step": 600 }, { "epoch": 0.03, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 36.5234375, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 78.7109375, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.384422302246094, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.1332, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.468, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.195, "step": 600 }, { "epoch": 0.03, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.09375, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 93.5546875, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.30752563476562, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 10.2807, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.225, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.097, "step": 600 }, { "epoch": 0.03, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.7265625, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.3828125, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.6953125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.560791015625, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.8394, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.225, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.207, "step": 600 }, { "epoch": 0.03, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.6875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 68.1640625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.87109375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 52.34931564331055, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2839, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.481, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 600 }, { "epoch": 0.03, "grad_norm": 0.08623680100136266, "learning_rate": 9.822278481012659e-06, "loss": 2.6758, "step": 602 }, { "epoch": 0.03, "grad_norm": 0.07319114177823224, "learning_rate": 9.821265822784811e-06, "loss": 3.8359, "step": 604 }, { "epoch": 0.03, "grad_norm": 0.0482368697003861, "learning_rate": 9.820253164556963e-06, "loss": 2.4297, "step": 606 }, { "epoch": 0.03, "grad_norm": 0.07861562097515083, "learning_rate": 9.819240506329115e-06, "loss": 2.4766, "step": 608 }, { "epoch": 0.03, "grad_norm": 0.051800647140822956, "learning_rate": 9.818227848101267e-06, "loss": 3.5703, "step": 610 }, { "epoch": 0.03, "grad_norm": 0.05715139468084694, "learning_rate": 9.817215189873419e-06, "loss": 2.6406, "step": 612 }, { "epoch": 0.03, "grad_norm": 0.10738698652889787, "learning_rate": 9.81620253164557e-06, "loss": 1.7559, "step": 614 }, { "epoch": 0.03, "grad_norm": 0.0589747795295819, "learning_rate": 9.815189873417722e-06, "loss": 3.2188, "step": 616 }, { "epoch": 0.03, "grad_norm": 0.047291320031127355, "learning_rate": 9.814177215189874e-06, "loss": 2.7969, "step": 618 }, { "epoch": 0.03, "grad_norm": 0.0513134336965957, "learning_rate": 9.813164556962026e-06, "loss": 3.2422, "step": 620 }, { "epoch": 0.03, "grad_norm": 0.05058905562434262, "learning_rate": 9.812151898734178e-06, "loss": 2.2773, "step": 622 }, { "epoch": 0.03, "grad_norm": 0.07249453092368723, "learning_rate": 9.81113924050633e-06, "loss": 3.4219, "step": 624 }, { "epoch": 0.03, "grad_norm": 0.04474715275304292, "learning_rate": 9.810126582278482e-06, "loss": 2.625, "step": 626 }, { "epoch": 0.03, "grad_norm": 0.10601883505288384, "learning_rate": 9.809113924050634e-06, "loss": 2.4453, "step": 628 }, { "epoch": 0.03, "grad_norm": 0.04627960285472802, "learning_rate": 9.808101265822786e-06, "loss": 2.0195, "step": 630 }, { "epoch": 0.03, "grad_norm": 0.0651145970413386, "learning_rate": 9.807088607594938e-06, "loss": 3.3125, "step": 632 }, { "epoch": 0.03, "grad_norm": 0.048961687998195856, "learning_rate": 9.80607594936709e-06, "loss": 2.3555, "step": 634 }, { "epoch": 0.03, "grad_norm": 0.08215525597931916, "learning_rate": 9.805063291139241e-06, "loss": 2.793, "step": 636 }, { "epoch": 0.03, "grad_norm": 0.05335831196369767, "learning_rate": 9.804050632911393e-06, "loss": 2.8516, "step": 638 }, { "epoch": 0.03, "grad_norm": 0.05642278685436397, "learning_rate": 9.803037974683545e-06, "loss": 2.3594, "step": 640 }, { "epoch": 0.03, "grad_norm": 0.05549078037094988, "learning_rate": 9.802025316455697e-06, "loss": 1.5977, "step": 642 }, { "epoch": 0.03, "grad_norm": 0.05058085579920028, "learning_rate": 9.801012658227849e-06, "loss": 2.4688, "step": 644 }, { "epoch": 0.03, "grad_norm": 0.06410392898663864, "learning_rate": 9.800000000000001e-06, "loss": 4.0469, "step": 646 }, { "epoch": 0.03, "grad_norm": 0.05075679015744242, "learning_rate": 9.798987341772153e-06, "loss": 1.3359, "step": 648 }, { "epoch": 0.03, "grad_norm": 0.06223558769043007, "learning_rate": 9.797974683544305e-06, "loss": 2.6797, "step": 650 }, { "epoch": 0.03, "grad_norm": 0.04602290584883312, "learning_rate": 9.796962025316457e-06, "loss": 1.1328, "step": 652 }, { "epoch": 0.03, "grad_norm": 0.06882275137670477, "learning_rate": 9.795949367088609e-06, "loss": 3.5547, "step": 654 }, { "epoch": 0.03, "grad_norm": 0.044864511663979555, "learning_rate": 9.79493670886076e-06, "loss": 1.9414, "step": 656 }, { "epoch": 0.03, "grad_norm": 0.04974066836528442, "learning_rate": 9.793924050632912e-06, "loss": 2.1836, "step": 658 }, { "epoch": 0.03, "grad_norm": 0.048577610153630706, "learning_rate": 9.792911392405064e-06, "loss": 3.2109, "step": 660 }, { "epoch": 0.03, "grad_norm": 0.07990453093749185, "learning_rate": 9.791898734177216e-06, "loss": 3.8438, "step": 662 }, { "epoch": 0.03, "grad_norm": 0.04685956979146973, "learning_rate": 9.790886075949367e-06, "loss": 3.2188, "step": 664 }, { "epoch": 0.03, "grad_norm": 0.04572710820131021, "learning_rate": 9.78987341772152e-06, "loss": 3.2031, "step": 666 }, { "epoch": 0.03, "grad_norm": 0.04694385984204392, "learning_rate": 9.788860759493672e-06, "loss": 3.3047, "step": 668 }, { "epoch": 0.03, "grad_norm": 0.0572023022137153, "learning_rate": 9.787848101265824e-06, "loss": 3.0391, "step": 670 }, { "epoch": 0.03, "grad_norm": 0.04710781129215592, "learning_rate": 9.786835443037976e-06, "loss": 2.2109, "step": 672 }, { "epoch": 0.03, "grad_norm": 0.07245719172520465, "learning_rate": 9.785822784810128e-06, "loss": 1.6289, "step": 674 }, { "epoch": 0.03, "grad_norm": 0.061101444487292524, "learning_rate": 9.78481012658228e-06, "loss": 3.0078, "step": 676 }, { "epoch": 0.03, "grad_norm": 0.1327970089012375, "learning_rate": 9.783797468354432e-06, "loss": 3.3203, "step": 678 }, { "epoch": 0.03, "grad_norm": 0.06351862027611924, "learning_rate": 9.782784810126583e-06, "loss": 3.9688, "step": 680 }, { "epoch": 0.03, "grad_norm": 0.0551890108401501, "learning_rate": 9.781772151898735e-06, "loss": 1.5664, "step": 682 }, { "epoch": 0.03, "grad_norm": 0.049497806599666815, "learning_rate": 9.780759493670887e-06, "loss": 3.3438, "step": 684 }, { "epoch": 0.03, "grad_norm": 0.05577083436309615, "learning_rate": 9.77974683544304e-06, "loss": 3.5, "step": 686 }, { "epoch": 0.03, "grad_norm": 0.04930049337356384, "learning_rate": 9.778734177215191e-06, "loss": 3.5234, "step": 688 }, { "epoch": 0.03, "grad_norm": 0.052658271181243624, "learning_rate": 9.777721518987343e-06, "loss": 2.4375, "step": 690 }, { "epoch": 0.03, "grad_norm": 0.05890374238080107, "learning_rate": 9.776708860759493e-06, "loss": 2.4102, "step": 692 }, { "epoch": 0.03, "grad_norm": 0.05692843983127017, "learning_rate": 9.775696202531645e-06, "loss": 3.8516, "step": 694 }, { "epoch": 0.03, "grad_norm": 0.08032851209382136, "learning_rate": 9.774683544303799e-06, "loss": 3.2617, "step": 696 }, { "epoch": 0.03, "grad_norm": 0.06266368353357782, "learning_rate": 9.77367088607595e-06, "loss": 2.5938, "step": 698 }, { "epoch": 0.04, "grad_norm": 0.055326096687423366, "learning_rate": 9.772658227848103e-06, "loss": 3.5781, "step": 700 }, { "epoch": 0.04, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.53125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 92.1875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.31864929199219, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2991, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.664, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 700 }, { "epoch": 0.04, "eval_specter_top15HN_validation.jsonl.gz_acc1": 11.1328125, "eval_specter_top15HN_validation.jsonl.gz_acc3": 25.1953125, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4609375, "eval_specter_top15HN_validation.jsonl.gz_mrr": 23.333736419677734, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.8764, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 22.25, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.348, "step": 700 }, { "epoch": 0.04, "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.4609375, "eval_nq_top15HN_validation.jsonl.gz_acc3": 97.0703125, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.85546875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 72.60289764404297, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.9461, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.847, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 700 }, { "epoch": 0.04, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 83.3984375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98046875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.90742111206055, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5324, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.404, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 700 }, { "epoch": 0.04, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.40625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 84.5703125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 64.74537658691406, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.0849, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.774, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.09, "step": 700 }, { "epoch": 0.04, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.8515625, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 96.09375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 72.10258483886719, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.8533, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.897, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 700 }, { "epoch": 0.04, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.40625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.1328125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.01837158203125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.9244, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.858, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 700 }, { "epoch": 0.04, "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.625, "eval_fever_top15HN_validation.jsonl.gz_acc3": 87.890625, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.46875, "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.89733123779297, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.0426, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.989, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, "step": 700 }, { "epoch": 0.04, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 41.6015625, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 85.7421875, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.81640625, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 65.13924407958984, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.2631, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.16, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.19, "step": 700 }, { "epoch": 0.04, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.2890625, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 94.140625, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.59991455078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 5.745, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 11.14, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.174, "step": 700 }, { "epoch": 0.04, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.9453125, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.015625, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.7109375, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 67.97984313964844, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.9407, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.954, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.202, "step": 700 }, { "epoch": 0.04, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 32.2265625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 70.1171875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.84765625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.56657791137695, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.286, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.48, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 700 }, { "epoch": 0.04, "grad_norm": 0.06118596899639704, "learning_rate": 9.771645569620254e-06, "loss": 3.2109, "step": 702 }, { "epoch": 0.04, "grad_norm": 0.047725355354412885, "learning_rate": 9.770632911392406e-06, "loss": 1.8242, "step": 704 }, { "epoch": 0.04, "grad_norm": 0.052086155288461826, "learning_rate": 9.769620253164558e-06, "loss": 1.8867, "step": 706 }, { "epoch": 0.04, "grad_norm": 0.059152476425263945, "learning_rate": 9.76860759493671e-06, "loss": 2.6211, "step": 708 }, { "epoch": 0.04, "grad_norm": 0.04760669560978858, "learning_rate": 9.767594936708862e-06, "loss": 2.6523, "step": 710 }, { "epoch": 0.04, "grad_norm": 0.04015573940759585, "learning_rate": 9.766582278481014e-06, "loss": 3.4375, "step": 712 }, { "epoch": 0.04, "grad_norm": 0.04941952895660446, "learning_rate": 9.765569620253166e-06, "loss": 1.8672, "step": 714 }, { "epoch": 0.04, "grad_norm": 0.06014704393875058, "learning_rate": 9.764556962025318e-06, "loss": 3.2344, "step": 716 }, { "epoch": 0.04, "grad_norm": 0.05626127416491607, "learning_rate": 9.763544303797468e-06, "loss": 1.3164, "step": 718 }, { "epoch": 0.04, "grad_norm": 0.04878155156099776, "learning_rate": 9.76253164556962e-06, "loss": 2.1953, "step": 720 }, { "epoch": 0.04, "grad_norm": 0.05715478816178281, "learning_rate": 9.761518987341772e-06, "loss": 3.0781, "step": 722 }, { "epoch": 0.04, "grad_norm": 0.09967059703652872, "learning_rate": 9.760506329113924e-06, "loss": 3.2266, "step": 724 }, { "epoch": 0.04, "grad_norm": 0.05764160810613535, "learning_rate": 9.759493670886077e-06, "loss": 1.4375, "step": 726 }, { "epoch": 0.04, "grad_norm": 0.06727665333940962, "learning_rate": 9.75848101265823e-06, "loss": 3.0078, "step": 728 }, { "epoch": 0.04, "grad_norm": 0.05330506559159864, "learning_rate": 9.757468354430381e-06, "loss": 1.9844, "step": 730 }, { "epoch": 0.04, "grad_norm": 0.0732283674553054, "learning_rate": 9.756455696202533e-06, "loss": 2.4297, "step": 732 }, { "epoch": 0.04, "grad_norm": 0.06311516586623442, "learning_rate": 9.755443037974685e-06, "loss": 2.2441, "step": 734 }, { "epoch": 0.04, "grad_norm": 0.0711140836729622, "learning_rate": 9.754430379746837e-06, "loss": 1.2246, "step": 736 }, { "epoch": 0.04, "grad_norm": 0.06195722460862401, "learning_rate": 9.753417721518989e-06, "loss": 3.9219, "step": 738 }, { "epoch": 0.04, "grad_norm": 0.03857369362196024, "learning_rate": 9.75240506329114e-06, "loss": 2.9766, "step": 740 }, { "epoch": 0.04, "grad_norm": 0.08141326010590223, "learning_rate": 9.751392405063293e-06, "loss": 2.1875, "step": 742 }, { "epoch": 0.04, "grad_norm": 0.053241870631494596, "learning_rate": 9.750379746835443e-06, "loss": 2.3203, "step": 744 }, { "epoch": 0.04, "grad_norm": 0.052583485208944805, "learning_rate": 9.749367088607595e-06, "loss": 1.3594, "step": 746 }, { "epoch": 0.04, "grad_norm": 0.06262550939445087, "learning_rate": 9.748354430379747e-06, "loss": 3.8125, "step": 748 }, { "epoch": 0.04, "grad_norm": 0.05713627433538821, "learning_rate": 9.747341772151899e-06, "loss": 2.9922, "step": 750 }, { "epoch": 0.04, "grad_norm": 0.08006391748138383, "learning_rate": 9.74632911392405e-06, "loss": 2.6172, "step": 752 }, { "epoch": 0.04, "grad_norm": 0.04952527656245675, "learning_rate": 9.745316455696202e-06, "loss": 3.6562, "step": 754 }, { "epoch": 0.04, "grad_norm": 0.09081931780198628, "learning_rate": 9.744303797468356e-06, "loss": 1.4219, "step": 756 }, { "epoch": 0.04, "grad_norm": 0.0949847693923792, "learning_rate": 9.743291139240508e-06, "loss": 3.4688, "step": 758 }, { "epoch": 0.04, "grad_norm": 0.050663076963228845, "learning_rate": 9.74227848101266e-06, "loss": 1.0195, "step": 760 }, { "epoch": 0.04, "grad_norm": 0.0497400270790924, "learning_rate": 9.741265822784812e-06, "loss": 1.3047, "step": 762 }, { "epoch": 0.04, "grad_norm": 0.1255160423475118, "learning_rate": 9.740253164556964e-06, "loss": 0.7227, "step": 764 }, { "epoch": 0.04, "grad_norm": 0.05129981962807472, "learning_rate": 9.739240506329116e-06, "loss": 1.4102, "step": 766 }, { "epoch": 0.04, "grad_norm": 0.07927376346910217, "learning_rate": 9.738227848101267e-06, "loss": 2.9648, "step": 768 }, { "epoch": 0.04, "grad_norm": 0.04935552207195148, "learning_rate": 9.73721518987342e-06, "loss": 1.3008, "step": 770 }, { "epoch": 0.04, "grad_norm": 0.049573584190090544, "learning_rate": 9.73620253164557e-06, "loss": 2.9609, "step": 772 }, { "epoch": 0.04, "grad_norm": 0.056698471570980416, "learning_rate": 9.735189873417721e-06, "loss": 2.2383, "step": 774 }, { "epoch": 0.04, "grad_norm": 0.04851942949305104, "learning_rate": 9.734177215189873e-06, "loss": 3.0234, "step": 776 }, { "epoch": 0.04, "grad_norm": 0.05230940565676036, "learning_rate": 9.733164556962025e-06, "loss": 2.4453, "step": 778 }, { "epoch": 0.04, "grad_norm": 0.05314584973307411, "learning_rate": 9.732151898734177e-06, "loss": 2.6914, "step": 780 }, { "epoch": 0.04, "grad_norm": 0.0411558012354048, "learning_rate": 9.731139240506329e-06, "loss": 2.9922, "step": 782 }, { "epoch": 0.04, "grad_norm": 0.08371388601948121, "learning_rate": 9.730126582278481e-06, "loss": 1.3887, "step": 784 }, { "epoch": 0.04, "grad_norm": 0.051910963285288374, "learning_rate": 9.729113924050635e-06, "loss": 2.2695, "step": 786 }, { "epoch": 0.04, "grad_norm": 0.0828403888156875, "learning_rate": 9.728101265822787e-06, "loss": 1.623, "step": 788 }, { "epoch": 0.04, "grad_norm": 0.0981728100247187, "learning_rate": 9.727088607594938e-06, "loss": 1.5234, "step": 790 }, { "epoch": 0.04, "grad_norm": 0.04084709431955858, "learning_rate": 9.72607594936709e-06, "loss": 1.8711, "step": 792 }, { "epoch": 0.04, "grad_norm": 0.07636058795930367, "learning_rate": 9.725063291139242e-06, "loss": 4.625, "step": 794 }, { "epoch": 0.04, "grad_norm": 0.04264657114245911, "learning_rate": 9.724050632911394e-06, "loss": 2.9941, "step": 796 }, { "epoch": 0.04, "grad_norm": 0.05140111703740243, "learning_rate": 9.723037974683544e-06, "loss": 1.9727, "step": 798 }, { "epoch": 0.04, "grad_norm": 0.04100141392608911, "learning_rate": 9.722025316455696e-06, "loss": 3.3281, "step": 800 }, { "epoch": 0.04, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.7265625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.6015625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.17064666748047, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.4337, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.597, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, "step": 800 }, { "epoch": 0.04, "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.3984375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 20.1171875, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.484375, "eval_specter_top15HN_validation.jsonl.gz_mrr": 19.849475860595703, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.5955, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.658, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.385, "step": 800 }, { "epoch": 0.04, "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.0703125, "eval_nq_top15HN_validation.jsonl.gz_acc3": 95.8984375, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, "eval_nq_top15HN_validation.jsonl.gz_mrr": 71.50338745117188, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.9137, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.864, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 800 }, { "epoch": 0.04, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 38.28125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 81.25, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9921875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.09928512573242, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5899, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.387, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 800 }, { "epoch": 0.04, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.578125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.71875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.71888732910156, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.9099, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.866, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 800 }, { "epoch": 0.04, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.921875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.62092590332031, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.0013, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.818, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 800 }, { "epoch": 0.04, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.6015625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.1328125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 65.20923614501953, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.7597, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.948, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 800 }, { "epoch": 0.04, "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.8671875, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.34375, "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.94634246826172, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.3804, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.907, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, "step": 800 }, { "epoch": 0.04, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 27.9296875, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 63.28125, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.87890625, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 49.52406692504883, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.0823, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.593, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.197, "step": 800 }, { "epoch": 0.04, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.875, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 95.1171875, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0625, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 71.49986267089844, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 9.3374, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.854, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.107, "step": 800 }, { "epoch": 0.04, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.9453125, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.9921875, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.51725769042969, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.8596, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.17, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.206, "step": 800 }, { "epoch": 0.04, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.296875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 67.578125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 52.38294219970703, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2007, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.507, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 800 }, { "epoch": 0.04, "grad_norm": 0.09124114355291447, "learning_rate": 9.721012658227848e-06, "loss": 1.7773, "step": 802 }, { "epoch": 0.04, "grad_norm": 0.054038518364640103, "learning_rate": 9.72e-06, "loss": 1.9258, "step": 804 }, { "epoch": 0.04, "grad_norm": 0.039128087533217365, "learning_rate": 9.718987341772152e-06, "loss": 3.5234, "step": 806 }, { "epoch": 0.04, "grad_norm": 0.08953747686898128, "learning_rate": 9.717974683544304e-06, "loss": 1.7305, "step": 808 }, { "epoch": 0.04, "grad_norm": 0.08799844856665831, "learning_rate": 9.716962025316456e-06, "loss": 3.25, "step": 810 }, { "epoch": 0.04, "grad_norm": 0.08959604406929138, "learning_rate": 9.715949367088608e-06, "loss": 1.707, "step": 812 }, { "epoch": 0.04, "grad_norm": 0.07830294965984795, "learning_rate": 9.71493670886076e-06, "loss": 2.4766, "step": 814 }, { "epoch": 0.04, "grad_norm": 0.08465842673927493, "learning_rate": 9.713924050632913e-06, "loss": 2.5508, "step": 816 }, { "epoch": 0.04, "grad_norm": 0.04974901837385099, "learning_rate": 9.712911392405065e-06, "loss": 1.6953, "step": 818 }, { "epoch": 0.04, "grad_norm": 0.0779470742221457, "learning_rate": 9.711898734177217e-06, "loss": 1.666, "step": 820 }, { "epoch": 0.04, "grad_norm": 0.0599924747024499, "learning_rate": 9.710886075949369e-06, "loss": 4.0312, "step": 822 }, { "epoch": 0.04, "grad_norm": 0.04762067121368817, "learning_rate": 9.70987341772152e-06, "loss": 2.1953, "step": 824 }, { "epoch": 0.04, "grad_norm": 0.058344615339498314, "learning_rate": 9.708860759493671e-06, "loss": 2.9648, "step": 826 }, { "epoch": 0.04, "grad_norm": 0.08011354558563408, "learning_rate": 9.707848101265823e-06, "loss": 1.1094, "step": 828 }, { "epoch": 0.04, "grad_norm": 0.04945642122097788, "learning_rate": 9.706835443037975e-06, "loss": 2.6172, "step": 830 }, { "epoch": 0.04, "grad_norm": 0.050330923165945715, "learning_rate": 9.705822784810127e-06, "loss": 1.3594, "step": 832 }, { "epoch": 0.04, "grad_norm": 0.055200305484761326, "learning_rate": 9.704810126582279e-06, "loss": 2.8359, "step": 834 }, { "epoch": 0.04, "grad_norm": 0.07118322079438398, "learning_rate": 9.70379746835443e-06, "loss": 2.7109, "step": 836 }, { "epoch": 0.04, "grad_norm": 0.08242670176144666, "learning_rate": 9.702784810126583e-06, "loss": 3.1641, "step": 838 }, { "epoch": 0.04, "grad_norm": 0.13921593334632498, "learning_rate": 9.701772151898734e-06, "loss": 2.293, "step": 840 }, { "epoch": 0.04, "grad_norm": 0.04956587870814121, "learning_rate": 9.700759493670886e-06, "loss": 4.1875, "step": 842 }, { "epoch": 0.04, "grad_norm": 0.06392285373789786, "learning_rate": 9.699746835443038e-06, "loss": 2.4727, "step": 844 }, { "epoch": 0.04, "grad_norm": 0.05315458905982316, "learning_rate": 9.698734177215192e-06, "loss": 2.6484, "step": 846 }, { "epoch": 0.04, "grad_norm": 0.0666317861392757, "learning_rate": 9.697721518987344e-06, "loss": 2.334, "step": 848 }, { "epoch": 0.04, "grad_norm": 0.05001469588005142, "learning_rate": 9.696708860759494e-06, "loss": 2.2031, "step": 850 }, { "epoch": 0.04, "grad_norm": 0.05907508517646925, "learning_rate": 9.695696202531646e-06, "loss": 2.3516, "step": 852 }, { "epoch": 0.04, "grad_norm": 0.0905842584665758, "learning_rate": 9.694683544303798e-06, "loss": 3.1172, "step": 854 }, { "epoch": 0.04, "grad_norm": 0.09929452015435283, "learning_rate": 9.69367088607595e-06, "loss": 2.6172, "step": 856 }, { "epoch": 0.04, "grad_norm": 0.06623645068675771, "learning_rate": 9.692658227848102e-06, "loss": 3.875, "step": 858 }, { "epoch": 0.04, "grad_norm": 0.048775790301383154, "learning_rate": 9.691645569620254e-06, "loss": 2.2676, "step": 860 }, { "epoch": 0.04, "grad_norm": 0.06093985556060049, "learning_rate": 9.690632911392405e-06, "loss": 3.625, "step": 862 }, { "epoch": 0.04, "grad_norm": 0.04870386047009894, "learning_rate": 9.689620253164557e-06, "loss": 2.2148, "step": 864 }, { "epoch": 0.04, "grad_norm": 0.0576400145878143, "learning_rate": 9.68860759493671e-06, "loss": 3.4375, "step": 866 }, { "epoch": 0.04, "grad_norm": 0.07046074934432127, "learning_rate": 9.687594936708861e-06, "loss": 2.8281, "step": 868 }, { "epoch": 0.04, "grad_norm": 0.028188252640599154, "learning_rate": 9.686582278481013e-06, "loss": 3.2188, "step": 870 }, { "epoch": 0.04, "grad_norm": 0.041067254121758504, "learning_rate": 9.685569620253165e-06, "loss": 2.1641, "step": 872 }, { "epoch": 0.04, "grad_norm": 0.0605214184651647, "learning_rate": 9.684556962025317e-06, "loss": 1.8828, "step": 874 }, { "epoch": 0.04, "grad_norm": 0.11912955951896191, "learning_rate": 9.68354430379747e-06, "loss": 1.7461, "step": 876 }, { "epoch": 0.04, "grad_norm": 0.061897867924752575, "learning_rate": 9.68253164556962e-06, "loss": 3.6094, "step": 878 }, { "epoch": 0.04, "grad_norm": 0.047340252282043194, "learning_rate": 9.681518987341773e-06, "loss": 1.1426, "step": 880 }, { "epoch": 0.04, "grad_norm": 0.053958879742531454, "learning_rate": 9.680506329113925e-06, "loss": 1.3281, "step": 882 }, { "epoch": 0.04, "grad_norm": 0.05998403293897633, "learning_rate": 9.679493670886076e-06, "loss": 3.2031, "step": 884 }, { "epoch": 0.04, "grad_norm": 0.0527429419199256, "learning_rate": 9.678481012658228e-06, "loss": 3.4297, "step": 886 }, { "epoch": 0.04, "grad_norm": 0.05507073065314479, "learning_rate": 9.67746835443038e-06, "loss": 4.0234, "step": 888 }, { "epoch": 0.04, "grad_norm": 0.04697289030607602, "learning_rate": 9.676455696202532e-06, "loss": 2.3047, "step": 890 }, { "epoch": 0.04, "grad_norm": 0.06050305742169189, "learning_rate": 9.675443037974684e-06, "loss": 3.7109, "step": 892 }, { "epoch": 0.04, "grad_norm": 0.08237426148645782, "learning_rate": 9.674430379746836e-06, "loss": 2.5312, "step": 894 }, { "epoch": 0.04, "grad_norm": 0.05394054450123622, "learning_rate": 9.673417721518988e-06, "loss": 2.6484, "step": 896 }, { "epoch": 0.04, "grad_norm": 0.047424890816645304, "learning_rate": 9.67240506329114e-06, "loss": 3.4141, "step": 898 }, { "epoch": 0.04, "grad_norm": 0.045427998247335184, "learning_rate": 9.671392405063292e-06, "loss": 2.3906, "step": 900 }, { "epoch": 0.04, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 43.9453125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 90.8203125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.61907958984375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.1916, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.719, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 900 }, { "epoch": 0.04, "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.1796875, "eval_specter_top15HN_validation.jsonl.gz_acc3": 22.65625, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.067502975463867, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.9599, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 21.622, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.338, "step": 900 }, { "epoch": 0.04, "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.4609375, "eval_nq_top15HN_validation.jsonl.gz_acc3": 96.484375, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.859375, "eval_nq_top15HN_validation.jsonl.gz_mrr": 72.28506469726562, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.7191, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.971, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 900 }, { "epoch": 0.04, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 37.890625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 79.8828125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.99609375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 60.593994140625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5927, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.386, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 900 }, { "epoch": 0.04, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.6015625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.9375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.39681243896484, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.0105, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.813, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 900 }, { "epoch": 0.04, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.2890625, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.3359375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.64515686035156, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.6221, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 6.025, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.094, "step": 900 }, { "epoch": 0.04, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.796875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.1328125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.52625274658203, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 11.2361, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.696, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 900 }, { "epoch": 0.04, "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.40625, "eval_fever_top15HN_validation.jsonl.gz_acc3": 87.890625, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.265625, "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.78397369384766, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.0766, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.981, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, "step": 900 }, { "epoch": 0.04, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.6953125, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 79.8828125, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 60.25852966308594, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.6182, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.392, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.178, "step": 900 }, { "epoch": 0.04, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 47.0703125, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 95.5078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0703125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 71.48451232910156, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.4162, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.63, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.135, "step": 900 }, { "epoch": 0.04, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.359375, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 90.8203125, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 67.97981262207031, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.6943, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.634, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.213, "step": 900 }, { "epoch": 0.04, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.25, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 71.2890625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.8671875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.27470016479492, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2779, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.482, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 900 }, { "epoch": 0.05, "grad_norm": 0.050036580052467146, "learning_rate": 9.670379746835444e-06, "loss": 1.7422, "step": 902 }, { "epoch": 0.05, "grad_norm": 0.08750306884033199, "learning_rate": 9.669367088607596e-06, "loss": 3.0234, "step": 904 }, { "epoch": 0.05, "grad_norm": 0.054223431084887544, "learning_rate": 9.668354430379747e-06, "loss": 1.2734, "step": 906 }, { "epoch": 0.05, "grad_norm": 0.04860806227931339, "learning_rate": 9.6673417721519e-06, "loss": 2.5, "step": 908 }, { "epoch": 0.05, "grad_norm": 0.046013887268149076, "learning_rate": 9.666329113924051e-06, "loss": 2.9844, "step": 910 }, { "epoch": 0.05, "grad_norm": 0.050160615800086725, "learning_rate": 9.665316455696203e-06, "loss": 2.5547, "step": 912 }, { "epoch": 0.05, "grad_norm": 0.04468121541121031, "learning_rate": 9.664303797468355e-06, "loss": 1.9375, "step": 914 }, { "epoch": 0.05, "grad_norm": 0.04783497737037472, "learning_rate": 9.663291139240507e-06, "loss": 2.0352, "step": 916 }, { "epoch": 0.05, "grad_norm": 0.09345820833778327, "learning_rate": 9.662278481012659e-06, "loss": 2.168, "step": 918 }, { "epoch": 0.05, "grad_norm": 0.06951969940170355, "learning_rate": 9.66126582278481e-06, "loss": 2.3984, "step": 920 }, { "epoch": 0.05, "grad_norm": 0.06585958155557771, "learning_rate": 9.660253164556963e-06, "loss": 2.8086, "step": 922 }, { "epoch": 0.05, "grad_norm": 0.049140557647652856, "learning_rate": 9.659240506329115e-06, "loss": 3.2344, "step": 924 }, { "epoch": 0.05, "grad_norm": 0.059633313539989644, "learning_rate": 9.658227848101267e-06, "loss": 2.4727, "step": 926 }, { "epoch": 0.05, "grad_norm": 0.05415301028520223, "learning_rate": 9.657215189873418e-06, "loss": 1.9766, "step": 928 }, { "epoch": 0.05, "grad_norm": 0.14115480122341062, "learning_rate": 9.65620253164557e-06, "loss": 1.0664, "step": 930 }, { "epoch": 0.05, "grad_norm": 0.07533532273665657, "learning_rate": 9.655189873417722e-06, "loss": 1.7285, "step": 932 }, { "epoch": 0.05, "grad_norm": 0.07803117363115943, "learning_rate": 9.654177215189874e-06, "loss": 2.8867, "step": 934 }, { "epoch": 0.05, "grad_norm": 0.05760761157164807, "learning_rate": 9.653164556962026e-06, "loss": 3.5547, "step": 936 }, { "epoch": 0.05, "grad_norm": 0.047013965888968307, "learning_rate": 9.652151898734178e-06, "loss": 2.2969, "step": 938 }, { "epoch": 0.05, "grad_norm": 0.056829253521407774, "learning_rate": 9.65113924050633e-06, "loss": 2.7656, "step": 940 }, { "epoch": 0.05, "grad_norm": 0.05157490792319996, "learning_rate": 9.650126582278482e-06, "loss": 3.3789, "step": 942 }, { "epoch": 0.05, "grad_norm": 0.046949382218649915, "learning_rate": 9.649113924050634e-06, "loss": 3.2266, "step": 944 }, { "epoch": 0.05, "grad_norm": 0.04753575432612994, "learning_rate": 9.648101265822786e-06, "loss": 3.7656, "step": 946 }, { "epoch": 0.05, "grad_norm": 0.10061038747859499, "learning_rate": 9.647088607594938e-06, "loss": 2.6094, "step": 948 }, { "epoch": 0.05, "grad_norm": 0.06412995845649233, "learning_rate": 9.64607594936709e-06, "loss": 3.4062, "step": 950 }, { "epoch": 0.05, "grad_norm": 0.08623045600554047, "learning_rate": 9.645063291139241e-06, "loss": 2.4766, "step": 952 }, { "epoch": 0.05, "grad_norm": 0.04602576410444357, "learning_rate": 9.644050632911393e-06, "loss": 1.9336, "step": 954 }, { "epoch": 0.05, "grad_norm": 0.04350935406033656, "learning_rate": 9.643037974683545e-06, "loss": 3.2734, "step": 956 }, { "epoch": 0.05, "grad_norm": 0.09300895711041486, "learning_rate": 9.642025316455697e-06, "loss": 1.8359, "step": 958 }, { "epoch": 0.05, "grad_norm": 0.07751284619736806, "learning_rate": 9.641012658227849e-06, "loss": 2.8477, "step": 960 }, { "epoch": 0.05, "grad_norm": 0.07144651596010991, "learning_rate": 9.640000000000001e-06, "loss": 2.1484, "step": 962 }, { "epoch": 0.05, "grad_norm": 0.0453484485109134, "learning_rate": 9.638987341772153e-06, "loss": 2.4805, "step": 964 }, { "epoch": 0.05, "grad_norm": 0.06045196577803476, "learning_rate": 9.637974683544305e-06, "loss": 3.1406, "step": 966 }, { "epoch": 0.05, "grad_norm": 0.0781837361792116, "learning_rate": 9.636962025316457e-06, "loss": 2.0078, "step": 968 }, { "epoch": 0.05, "grad_norm": 0.05631230003710945, "learning_rate": 9.635949367088609e-06, "loss": 4.2266, "step": 970 }, { "epoch": 0.05, "grad_norm": 0.048879455765286225, "learning_rate": 9.63493670886076e-06, "loss": 2.4883, "step": 972 }, { "epoch": 0.05, "grad_norm": 0.056321168124699894, "learning_rate": 9.633924050632912e-06, "loss": 2.7969, "step": 974 }, { "epoch": 0.05, "grad_norm": 0.06945744800844852, "learning_rate": 9.632911392405064e-06, "loss": 1.6406, "step": 976 }, { "epoch": 0.05, "grad_norm": 0.05138501149962276, "learning_rate": 9.631898734177216e-06, "loss": 2.0156, "step": 978 }, { "epoch": 0.05, "grad_norm": 0.04910087937385888, "learning_rate": 9.630886075949368e-06, "loss": 1.9805, "step": 980 }, { "epoch": 0.05, "grad_norm": 0.040761615221141585, "learning_rate": 9.62987341772152e-06, "loss": 2.2109, "step": 982 }, { "epoch": 0.05, "grad_norm": 0.06756451544128708, "learning_rate": 9.628860759493672e-06, "loss": 3.4922, "step": 984 }, { "epoch": 0.05, "grad_norm": 0.06468878276562036, "learning_rate": 9.627848101265824e-06, "loss": 1.6211, "step": 986 }, { "epoch": 0.05, "grad_norm": 0.058701650293584054, "learning_rate": 9.626835443037976e-06, "loss": 3.3359, "step": 988 }, { "epoch": 0.05, "grad_norm": 0.05275824015630596, "learning_rate": 9.625822784810128e-06, "loss": 2.8359, "step": 990 }, { "epoch": 0.05, "grad_norm": 0.08322458852258427, "learning_rate": 9.62481012658228e-06, "loss": 2.8203, "step": 992 }, { "epoch": 0.05, "grad_norm": 0.058744221438640755, "learning_rate": 9.623797468354431e-06, "loss": 3.2188, "step": 994 }, { "epoch": 0.05, "grad_norm": 0.04628008079307602, "learning_rate": 9.622784810126583e-06, "loss": 3.4453, "step": 996 }, { "epoch": 0.05, "grad_norm": 0.0431775552063716, "learning_rate": 9.621772151898735e-06, "loss": 2.9844, "step": 998 }, { "epoch": 0.05, "grad_norm": 0.04889821743736598, "learning_rate": 9.620759493670887e-06, "loss": 1.9961, "step": 1000 }, { "epoch": 0.05, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 43.9453125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.015625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.9426040649414, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.5594, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.537, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, "step": 1000 }, { "epoch": 0.05, "eval_specter_top15HN_validation.jsonl.gz_acc1": 10.9375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 25.5859375, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, "eval_specter_top15HN_validation.jsonl.gz_mrr": 22.294132232666016, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6865, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.823, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.372, "step": 1000 }, { "epoch": 0.05, "eval_nq_top15HN_validation.jsonl.gz_acc1": 46.484375, "eval_nq_top15HN_validation.jsonl.gz_acc3": 95.5078125, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.859375, "eval_nq_top15HN_validation.jsonl.gz_mrr": 70.62860107421875, "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.0182, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.809, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 1000 }, { "epoch": 0.05, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.453125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 82.03125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 1.0, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 62.479881286621094, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.3734, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.453, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.07, "step": 1000 }, { "epoch": 0.05, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.6015625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 84.765625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 64.69393920898438, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.7185, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.971, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 1000 }, { "epoch": 0.05, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.0703125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.77542114257812, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.7697, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.943, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 1000 }, { "epoch": 0.05, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 42.1875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.71875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.74609375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 66.02581787109375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.9732, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.832, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 1000 }, { "epoch": 0.05, "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.8203125, "eval_fever_top15HN_validation.jsonl.gz_acc3": 87.109375, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.328125, "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.22736358642578, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.149, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.963, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, "step": 1000 }, { "epoch": 0.05, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 39.453125, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 82.8125, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8203125, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 62.93122863769531, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.3156, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.04, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.188, "step": 1000 }, { "epoch": 0.05, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.3125, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 92.1875, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 69.69808197021484, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.1307, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.975, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.14, "step": 1000 }, { "epoch": 0.05, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.140625, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.6015625, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.21117401123047, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0989, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.552, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.196, "step": 1000 }, { "epoch": 0.05, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.0546875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 71.09375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.84375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 53.50191116333008, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.1417, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.526, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.071, "step": 1000 }, { "epoch": 0.05, "grad_norm": 0.1051790754037906, "learning_rate": 9.619746835443039e-06, "loss": 3.2422, "step": 1002 }, { "epoch": 0.05, "grad_norm": 0.048465183349567016, "learning_rate": 9.618734177215191e-06, "loss": 2.8438, "step": 1004 }, { "epoch": 0.05, "grad_norm": 0.09111892155303337, "learning_rate": 9.617721518987343e-06, "loss": 2.2812, "step": 1006 }, { "epoch": 0.05, "grad_norm": 0.04420698460934349, "learning_rate": 9.616708860759495e-06, "loss": 1.623, "step": 1008 }, { "epoch": 0.05, "grad_norm": 0.06009480694948726, "learning_rate": 9.615696202531645e-06, "loss": 3.6016, "step": 1010 }, { "epoch": 0.05, "grad_norm": 0.05852060321964003, "learning_rate": 9.614683544303799e-06, "loss": 1.9648, "step": 1012 }, { "epoch": 0.05, "grad_norm": 0.05124931163805301, "learning_rate": 9.61367088607595e-06, "loss": 2.1523, "step": 1014 }, { "epoch": 0.05, "grad_norm": 0.04208769859193327, "learning_rate": 9.612658227848102e-06, "loss": 3.1094, "step": 1016 }, { "epoch": 0.05, "grad_norm": 0.04342283285576621, "learning_rate": 9.611645569620254e-06, "loss": 2.0156, "step": 1018 }, { "epoch": 0.05, "grad_norm": 0.06310583574382549, "learning_rate": 9.610632911392406e-06, "loss": 2.4336, "step": 1020 }, { "epoch": 0.05, "grad_norm": 0.0555905224403037, "learning_rate": 9.609620253164558e-06, "loss": 1.9141, "step": 1022 }, { "epoch": 0.05, "grad_norm": 0.09598113085999012, "learning_rate": 9.60860759493671e-06, "loss": 2.4375, "step": 1024 }, { "epoch": 0.05, "grad_norm": 0.04483096367031134, "learning_rate": 9.607594936708862e-06, "loss": 2.6016, "step": 1026 }, { "epoch": 0.05, "grad_norm": 0.06407673346174275, "learning_rate": 9.606582278481014e-06, "loss": 3.0781, "step": 1028 }, { "epoch": 0.05, "grad_norm": 0.04087651737640146, "learning_rate": 9.605569620253166e-06, "loss": 3.0391, "step": 1030 }, { "epoch": 0.05, "grad_norm": 0.03836897528630472, "learning_rate": 9.604556962025318e-06, "loss": 2.1992, "step": 1032 }, { "epoch": 0.05, "grad_norm": 0.05442708888502564, "learning_rate": 9.60354430379747e-06, "loss": 2.332, "step": 1034 }, { "epoch": 0.05, "grad_norm": 0.06122074521504932, "learning_rate": 9.602531645569621e-06, "loss": 3.2031, "step": 1036 }, { "epoch": 0.05, "grad_norm": 0.06290658191568707, "learning_rate": 9.601518987341772e-06, "loss": 2.4922, "step": 1038 }, { "epoch": 0.05, "grad_norm": 0.05438893210599195, "learning_rate": 9.600506329113924e-06, "loss": 4.0859, "step": 1040 }, { "epoch": 0.05, "grad_norm": 0.049195805473244494, "learning_rate": 9.599493670886077e-06, "loss": 2.9766, "step": 1042 }, { "epoch": 0.05, "grad_norm": 0.037989359450545684, "learning_rate": 9.598481012658229e-06, "loss": 3.1172, "step": 1044 }, { "epoch": 0.05, "grad_norm": 0.04042347846520677, "learning_rate": 9.597468354430381e-06, "loss": 3.3828, "step": 1046 }, { "epoch": 0.05, "grad_norm": 0.056616150978125716, "learning_rate": 9.596455696202533e-06, "loss": 3.5156, "step": 1048 }, { "epoch": 0.05, "grad_norm": 0.06247782872755643, "learning_rate": 9.595443037974685e-06, "loss": 2.5625, "step": 1050 }, { "epoch": 0.05, "grad_norm": 0.04270352660849398, "learning_rate": 9.594430379746837e-06, "loss": 3.0547, "step": 1052 }, { "epoch": 0.05, "grad_norm": 0.06908827189697059, "learning_rate": 9.593417721518989e-06, "loss": 2.0293, "step": 1054 }, { "epoch": 0.05, "grad_norm": 0.043921742476219626, "learning_rate": 9.59240506329114e-06, "loss": 2.2656, "step": 1056 }, { "epoch": 0.05, "grad_norm": 0.04369788983343829, "learning_rate": 9.591392405063292e-06, "loss": 2.6836, "step": 1058 }, { "epoch": 0.05, "grad_norm": 0.08091988614439645, "learning_rate": 9.590379746835444e-06, "loss": 3.2656, "step": 1060 }, { "epoch": 0.05, "grad_norm": 0.04862948304771229, "learning_rate": 9.589367088607596e-06, "loss": 2.8906, "step": 1062 }, { "epoch": 0.05, "grad_norm": 0.05063830715511126, "learning_rate": 9.588354430379747e-06, "loss": 3.5781, "step": 1064 }, { "epoch": 0.05, "grad_norm": 0.06719689705471395, "learning_rate": 9.587341772151898e-06, "loss": 2.625, "step": 1066 }, { "epoch": 0.05, "grad_norm": 0.042284904505660866, "learning_rate": 9.58632911392405e-06, "loss": 3.0859, "step": 1068 }, { "epoch": 0.05, "grad_norm": 0.07499442266051606, "learning_rate": 9.585316455696202e-06, "loss": 2.9922, "step": 1070 }, { "epoch": 0.05, "grad_norm": 0.04871753327913073, "learning_rate": 9.584303797468356e-06, "loss": 2.8047, "step": 1072 }, { "epoch": 0.05, "grad_norm": 0.05076539268863179, "learning_rate": 9.583291139240508e-06, "loss": 1.0117, "step": 1074 }, { "epoch": 0.05, "grad_norm": 0.07091122924332464, "learning_rate": 9.58227848101266e-06, "loss": 4.0938, "step": 1076 }, { "epoch": 0.05, "grad_norm": 0.0532042729713229, "learning_rate": 9.581265822784812e-06, "loss": 1.3438, "step": 1078 }, { "epoch": 0.05, "grad_norm": 0.04624432240575401, "learning_rate": 9.580253164556963e-06, "loss": 2.2188, "step": 1080 }, { "epoch": 0.05, "grad_norm": 0.07944419616558628, "learning_rate": 9.579240506329115e-06, "loss": 3.0234, "step": 1082 }, { "epoch": 0.05, "grad_norm": 0.09200908602869431, "learning_rate": 9.578227848101267e-06, "loss": 3.0078, "step": 1084 }, { "epoch": 0.05, "grad_norm": 0.07169391038822008, "learning_rate": 9.57721518987342e-06, "loss": 3.0977, "step": 1086 }, { "epoch": 0.05, "grad_norm": 0.05875376864911037, "learning_rate": 9.576202531645571e-06, "loss": 2.8516, "step": 1088 }, { "epoch": 0.05, "grad_norm": 0.05735444721093439, "learning_rate": 9.575189873417721e-06, "loss": 3.8359, "step": 1090 }, { "epoch": 0.05, "grad_norm": 0.07550008738233234, "learning_rate": 9.574177215189873e-06, "loss": 3.168, "step": 1092 }, { "epoch": 0.05, "grad_norm": 0.0585409741347276, "learning_rate": 9.573164556962025e-06, "loss": 3.25, "step": 1094 }, { "epoch": 0.05, "grad_norm": 0.052561465436180746, "learning_rate": 9.572151898734177e-06, "loss": 1.668, "step": 1096 }, { "epoch": 0.05, "grad_norm": 0.06026978886216898, "learning_rate": 9.571139240506329e-06, "loss": 3.4219, "step": 1098 }, { "epoch": 0.06, "grad_norm": 0.04588854459918244, "learning_rate": 9.570126582278481e-06, "loss": 3.2266, "step": 1100 }, { "epoch": 0.06, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.140625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.6015625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.69398498535156, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.1096, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.761, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.09, "step": 1100 }, { "epoch": 0.06, "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.875, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.127887725830078, "eval_specter_top15HN_validation.jsonl.gz_runtime": 3.101, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 20.639, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.322, "step": 1100 }, { "epoch": 0.06, "eval_nq_top15HN_validation.jsonl.gz_acc1": 46.484375, "eval_nq_top15HN_validation.jsonl.gz_acc3": 94.921875, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.8671875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 71.43641662597656, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8465, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.901, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 1100 }, { "epoch": 0.06, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.0390625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 83.203125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9921875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 62.347381591796875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4238, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.437, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 1100 }, { "epoch": 0.06, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.1875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.9140625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.015625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.74319458007812, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.0489, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.792, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 1100 }, { "epoch": 0.06, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.265625, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.18213653564453, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.6349, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 6.018, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.094, "step": 1100 }, { "epoch": 0.06, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 39.6484375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 83.3984375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.26298141479492, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 11.1767, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.726, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 1100 }, { "epoch": 0.06, "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.625, "eval_fever_top15HN_validation.jsonl.gz_acc3": 87.6953125, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.21875, "eval_fever_top15HN_validation.jsonl.gz_mrr": 64.98330688476562, "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.9442, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.014, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, "step": 1100 }, { "epoch": 0.06, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 36.328125, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 77.9296875, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83984375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.63578414916992, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.6196, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.389, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.178, "step": 1100 }, { "epoch": 0.06, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 44.3359375, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 91.015625, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0859375, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 68.1395263671875, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 10.0366, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.377, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.1, "step": 1100 }, { "epoch": 0.06, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.3359375, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.40625, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.68175506591797, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.1925, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.326, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.193, "step": 1100 }, { "epoch": 0.06, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 28.515625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 65.234375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 51.298561096191406, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3039, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.474, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 1100 }, { "epoch": 0.06, "grad_norm": 0.05279039568598626, "learning_rate": 9.569113924050634e-06, "loss": 1.3438, "step": 1102 }, { "epoch": 0.06, "grad_norm": 0.07920092652592756, "learning_rate": 9.568101265822786e-06, "loss": 2.4219, "step": 1104 }, { "epoch": 0.06, "grad_norm": 0.05941126846638345, "learning_rate": 9.567088607594938e-06, "loss": 2.6836, "step": 1106 }, { "epoch": 0.06, "grad_norm": 0.04922287399559275, "learning_rate": 9.56607594936709e-06, "loss": 3.1406, "step": 1108 }, { "epoch": 0.06, "grad_norm": 0.04368593973418131, "learning_rate": 9.565063291139242e-06, "loss": 1.9141, "step": 1110 }, { "epoch": 0.06, "grad_norm": 0.05961420052796132, "learning_rate": 9.564050632911394e-06, "loss": 3.6953, "step": 1112 }, { "epoch": 0.06, "grad_norm": 0.057590175212679104, "learning_rate": 9.563037974683546e-06, "loss": 3.9531, "step": 1114 }, { "epoch": 0.06, "grad_norm": 0.045713870301452, "learning_rate": 9.562025316455698e-06, "loss": 2.2109, "step": 1116 }, { "epoch": 0.06, "grad_norm": 0.04966124044261182, "learning_rate": 9.561012658227848e-06, "loss": 1.5859, "step": 1118 }, { "epoch": 0.06, "grad_norm": 0.04931794060312549, "learning_rate": 9.56e-06, "loss": 2.6719, "step": 1120 }, { "epoch": 0.06, "grad_norm": 0.056823119950011, "learning_rate": 9.558987341772152e-06, "loss": 2.375, "step": 1122 }, { "epoch": 0.06, "grad_norm": 0.03956124379267595, "learning_rate": 9.557974683544304e-06, "loss": 2.7578, "step": 1124 }, { "epoch": 0.06, "grad_norm": 0.05956590745616876, "learning_rate": 9.556962025316456e-06, "loss": 1.1328, "step": 1126 }, { "epoch": 0.06, "grad_norm": 0.05229656063756952, "learning_rate": 9.555949367088608e-06, "loss": 2.6602, "step": 1128 }, { "epoch": 0.06, "grad_norm": 0.05015559554787169, "learning_rate": 9.55493670886076e-06, "loss": 3.3516, "step": 1130 }, { "epoch": 0.06, "grad_norm": 0.053045505149946784, "learning_rate": 9.553924050632913e-06, "loss": 1.3164, "step": 1132 }, { "epoch": 0.06, "grad_norm": 0.049300460314765836, "learning_rate": 9.552911392405065e-06, "loss": 3.3359, "step": 1134 }, { "epoch": 0.06, "grad_norm": 0.0509355475045662, "learning_rate": 9.551898734177217e-06, "loss": 3.2266, "step": 1136 }, { "epoch": 0.06, "grad_norm": 0.05684153506318937, "learning_rate": 9.550886075949369e-06, "loss": 3.5078, "step": 1138 }, { "epoch": 0.06, "grad_norm": 0.06443433054086191, "learning_rate": 9.54987341772152e-06, "loss": 1.709, "step": 1140 }, { "epoch": 0.06, "grad_norm": 0.08719597593676645, "learning_rate": 9.548860759493673e-06, "loss": 1.9453, "step": 1142 }, { "epoch": 0.06, "grad_norm": 0.04787175813071486, "learning_rate": 9.547848101265823e-06, "loss": 2.6562, "step": 1144 }, { "epoch": 0.06, "grad_norm": 0.08201162069341617, "learning_rate": 9.546835443037975e-06, "loss": 2.9336, "step": 1146 }, { "epoch": 0.06, "grad_norm": 0.1180019803959435, "learning_rate": 9.545822784810127e-06, "loss": 2.2656, "step": 1148 }, { "epoch": 0.06, "grad_norm": 0.04238823903190658, "learning_rate": 9.544810126582279e-06, "loss": 3.2266, "step": 1150 }, { "epoch": 0.06, "grad_norm": 0.06004974253490697, "learning_rate": 9.54379746835443e-06, "loss": 3.6797, "step": 1152 }, { "epoch": 0.06, "grad_norm": 0.058263064089826816, "learning_rate": 9.542784810126582e-06, "loss": 3.4688, "step": 1154 }, { "epoch": 0.06, "grad_norm": 0.05411841624054384, "learning_rate": 9.541772151898734e-06, "loss": 1.75, "step": 1156 }, { "epoch": 0.06, "grad_norm": 0.041361774375433126, "learning_rate": 9.540759493670886e-06, "loss": 2.9844, "step": 1158 }, { "epoch": 0.06, "grad_norm": 0.06539487268354072, "learning_rate": 9.539746835443038e-06, "loss": 2.8438, "step": 1160 }, { "epoch": 0.06, "grad_norm": 0.0581568382694382, "learning_rate": 9.538734177215192e-06, "loss": 2.7734, "step": 1162 }, { "epoch": 0.06, "grad_norm": 0.06767073223000843, "learning_rate": 9.537721518987344e-06, "loss": 2.0039, "step": 1164 }, { "epoch": 0.06, "grad_norm": 0.052823466084387614, "learning_rate": 9.536708860759496e-06, "loss": 3.1719, "step": 1166 }, { "epoch": 0.06, "grad_norm": 0.09770782062698304, "learning_rate": 9.535696202531647e-06, "loss": 2.9375, "step": 1168 }, { "epoch": 0.06, "grad_norm": 0.054946196005232324, "learning_rate": 9.534683544303798e-06, "loss": 3.1562, "step": 1170 }, { "epoch": 0.06, "grad_norm": 0.058723423439784234, "learning_rate": 9.53367088607595e-06, "loss": 2.4082, "step": 1172 }, { "epoch": 0.06, "grad_norm": 0.05838846764255246, "learning_rate": 9.532658227848101e-06, "loss": 3.2734, "step": 1174 }, { "epoch": 0.06, "grad_norm": 0.08719324695026287, "learning_rate": 9.531645569620253e-06, "loss": 2.9141, "step": 1176 }, { "epoch": 0.06, "grad_norm": 0.047306934247421356, "learning_rate": 9.530632911392405e-06, "loss": 2.4297, "step": 1178 }, { "epoch": 0.06, "grad_norm": 0.050959286466225565, "learning_rate": 9.529620253164557e-06, "loss": 1.9648, "step": 1180 }, { "epoch": 0.06, "grad_norm": 0.0431167454308949, "learning_rate": 9.528607594936709e-06, "loss": 2.5859, "step": 1182 }, { "epoch": 0.06, "grad_norm": 0.058744259091542635, "learning_rate": 9.527594936708861e-06, "loss": 4.5781, "step": 1184 }, { "epoch": 0.06, "grad_norm": 0.04515997872087969, "learning_rate": 9.526582278481013e-06, "loss": 2.7422, "step": 1186 }, { "epoch": 0.06, "grad_norm": 0.052408032035697646, "learning_rate": 9.525569620253165e-06, "loss": 2.4062, "step": 1188 }, { "epoch": 0.06, "grad_norm": 0.04951277906648105, "learning_rate": 9.524556962025317e-06, "loss": 1.3359, "step": 1190 }, { "epoch": 0.06, "grad_norm": 0.0534640838422442, "learning_rate": 9.52354430379747e-06, "loss": 1.8633, "step": 1192 }, { "epoch": 0.06, "grad_norm": 0.1104300439314054, "learning_rate": 9.522531645569622e-06, "loss": 3.7266, "step": 1194 }, { "epoch": 0.06, "grad_norm": 0.06492888179270434, "learning_rate": 9.521518987341772e-06, "loss": 4.1016, "step": 1196 }, { "epoch": 0.06, "grad_norm": 0.05672739428401729, "learning_rate": 9.520506329113924e-06, "loss": 1.7734, "step": 1198 }, { "epoch": 0.06, "grad_norm": 0.11597608027192513, "learning_rate": 9.519493670886076e-06, "loss": 3.2734, "step": 1200 }, { "epoch": 0.06, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 43.75, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 90.625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.00003814697266, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2038, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.712, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 1200 }, { "epoch": 0.06, "eval_specter_top15HN_validation.jsonl.gz_acc1": 6.8359375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 16.9921875, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.5, "eval_specter_top15HN_validation.jsonl.gz_mrr": 17.340431213378906, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.7204, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.526, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.368, "step": 1200 }, { "epoch": 0.06, "eval_nq_top15HN_validation.jsonl.gz_acc1": 46.6796875, "eval_nq_top15HN_validation.jsonl.gz_acc3": 94.53125, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.859375, "eval_nq_top15HN_validation.jsonl.gz_mrr": 70.92469787597656, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8603, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.893, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 1200 }, { "epoch": 0.06, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.0625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 81.8359375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.99609375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.614593505859375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.3938, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.446, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 1200 }, { "epoch": 0.06, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.96875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.9140625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.015625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 66.26858520507812, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.6973, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.983, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 1200 }, { "epoch": 0.06, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.0703125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.19760131835938, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.1286, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.751, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.09, "step": 1200 }, { "epoch": 0.06, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.6015625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.546875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.35404205322266, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.7198, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.97, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 1200 }, { "epoch": 0.06, "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.2109375, "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.4765625, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.25, "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.49232482910156, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.4377, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.893, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, "step": 1200 }, { "epoch": 0.06, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 31.640625, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 70.1171875, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.859375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 53.657962799072266, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 6.3677, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 10.051, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.157, "step": 1200 }, { "epoch": 0.06, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.484375, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 94.140625, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.94774627685547, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 10.3442, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.187, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.097, "step": 1200 }, { "epoch": 0.06, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.53125, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.3828125, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.87773132324219, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.1139, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.515, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.196, "step": 1200 }, { "epoch": 0.06, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 32.03125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 72.0703125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.3194694519043, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2679, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.486, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 1200 }, { "epoch": 0.06, "grad_norm": 0.056524486518832275, "learning_rate": 9.518481012658228e-06, "loss": 2.8906, "step": 1202 }, { "epoch": 0.06, "grad_norm": 0.06884576916644804, "learning_rate": 9.51746835443038e-06, "loss": 2.3203, "step": 1204 }, { "epoch": 0.06, "grad_norm": 0.05492846585742184, "learning_rate": 9.516455696202532e-06, "loss": 2.4922, "step": 1206 }, { "epoch": 0.06, "grad_norm": 0.05842822257833723, "learning_rate": 9.515443037974684e-06, "loss": 2.0664, "step": 1208 }, { "epoch": 0.06, "grad_norm": 0.05828818675606454, "learning_rate": 9.514430379746836e-06, "loss": 3.4531, "step": 1210 }, { "epoch": 0.06, "grad_norm": 0.04523097341723584, "learning_rate": 9.513417721518988e-06, "loss": 1.3281, "step": 1212 }, { "epoch": 0.06, "grad_norm": 0.06246682120852486, "learning_rate": 9.51240506329114e-06, "loss": 2.8594, "step": 1214 }, { "epoch": 0.06, "grad_norm": 0.059914705380358084, "learning_rate": 9.511392405063292e-06, "loss": 2.7891, "step": 1216 }, { "epoch": 0.06, "grad_norm": 0.07123881358873205, "learning_rate": 9.510379746835443e-06, "loss": 2.9766, "step": 1218 }, { "epoch": 0.06, "grad_norm": 0.05839221388528459, "learning_rate": 9.509367088607595e-06, "loss": 2.3359, "step": 1220 }, { "epoch": 0.06, "grad_norm": 0.06452918622039662, "learning_rate": 9.508354430379749e-06, "loss": 2.6523, "step": 1222 }, { "epoch": 0.06, "grad_norm": 0.1029877383143472, "learning_rate": 9.5073417721519e-06, "loss": 3.0391, "step": 1224 }, { "epoch": 0.06, "grad_norm": 0.057089100933038525, "learning_rate": 9.506329113924051e-06, "loss": 1.8223, "step": 1226 }, { "epoch": 0.06, "grad_norm": 0.05166973185716093, "learning_rate": 9.505316455696203e-06, "loss": 1.8984, "step": 1228 }, { "epoch": 0.06, "grad_norm": 0.0513550181895363, "learning_rate": 9.504303797468355e-06, "loss": 2.2773, "step": 1230 }, { "epoch": 0.06, "grad_norm": 0.04847340483605805, "learning_rate": 9.503291139240507e-06, "loss": 2.3633, "step": 1232 }, { "epoch": 0.06, "grad_norm": 0.04858819186140292, "learning_rate": 9.502278481012659e-06, "loss": 2.3047, "step": 1234 }, { "epoch": 0.06, "grad_norm": 0.07721735194112753, "learning_rate": 9.50126582278481e-06, "loss": 1.8027, "step": 1236 }, { "epoch": 0.06, "grad_norm": 0.059295450113036254, "learning_rate": 9.500253164556963e-06, "loss": 3.7891, "step": 1238 }, { "epoch": 0.06, "grad_norm": 0.05862288743975563, "learning_rate": 9.499240506329114e-06, "loss": 2.3359, "step": 1240 }, { "epoch": 0.06, "grad_norm": 0.049113536200244313, "learning_rate": 9.498227848101266e-06, "loss": 4.2344, "step": 1242 }, { "epoch": 0.06, "grad_norm": 0.05863364570109533, "learning_rate": 9.497215189873418e-06, "loss": 4.2812, "step": 1244 }, { "epoch": 0.06, "grad_norm": 0.058465190758347246, "learning_rate": 9.49620253164557e-06, "loss": 3.8906, "step": 1246 }, { "epoch": 0.06, "grad_norm": 0.05745728983604673, "learning_rate": 9.495189873417722e-06, "loss": 2.4609, "step": 1248 }, { "epoch": 0.06, "grad_norm": 0.048940790847378936, "learning_rate": 9.494177215189874e-06, "loss": 3.0859, "step": 1250 }, { "epoch": 0.06, "grad_norm": 0.04515179075015809, "learning_rate": 9.493164556962026e-06, "loss": 3.5078, "step": 1252 }, { "epoch": 0.06, "grad_norm": 0.05148185633372578, "learning_rate": 9.492151898734178e-06, "loss": 3.8828, "step": 1254 }, { "epoch": 0.06, "grad_norm": 0.044922590250043165, "learning_rate": 9.49113924050633e-06, "loss": 2.5742, "step": 1256 }, { "epoch": 0.06, "grad_norm": 0.049305376386839274, "learning_rate": 9.490126582278482e-06, "loss": 4.1719, "step": 1258 }, { "epoch": 0.06, "grad_norm": 0.0482360612014124, "learning_rate": 9.489113924050634e-06, "loss": 3.2812, "step": 1260 }, { "epoch": 0.06, "grad_norm": 0.07422881936481786, "learning_rate": 9.488101265822785e-06, "loss": 2.4844, "step": 1262 }, { "epoch": 0.06, "grad_norm": 0.04932619932404644, "learning_rate": 9.487088607594937e-06, "loss": 1.7988, "step": 1264 }, { "epoch": 0.06, "grad_norm": 0.0466248073944634, "learning_rate": 9.48607594936709e-06, "loss": 3.2188, "step": 1266 }, { "epoch": 0.06, "grad_norm": 0.05597919637602129, "learning_rate": 9.485063291139241e-06, "loss": 2.4297, "step": 1268 }, { "epoch": 0.06, "grad_norm": 0.05073715551356432, "learning_rate": 9.484050632911393e-06, "loss": 1.3438, "step": 1270 }, { "epoch": 0.06, "grad_norm": 0.08772414122295938, "learning_rate": 9.483037974683545e-06, "loss": 1.6367, "step": 1272 }, { "epoch": 0.06, "grad_norm": 0.05087985039711778, "learning_rate": 9.482025316455697e-06, "loss": 2.2109, "step": 1274 }, { "epoch": 0.06, "grad_norm": 0.08264556266724159, "learning_rate": 9.481012658227849e-06, "loss": 3.4297, "step": 1276 }, { "epoch": 0.06, "grad_norm": 0.04929517062153266, "learning_rate": 9.48e-06, "loss": 1.6758, "step": 1278 }, { "epoch": 0.06, "grad_norm": 0.06794824948761272, "learning_rate": 9.478987341772153e-06, "loss": 3.2344, "step": 1280 }, { "epoch": 0.06, "grad_norm": 0.04781934324935959, "learning_rate": 9.477974683544305e-06, "loss": 1.875, "step": 1282 }, { "epoch": 0.06, "grad_norm": 0.12420030807854848, "learning_rate": 9.476962025316456e-06, "loss": 1.8809, "step": 1284 }, { "epoch": 0.06, "grad_norm": 0.10396640173006078, "learning_rate": 9.475949367088608e-06, "loss": 3.1992, "step": 1286 }, { "epoch": 0.06, "grad_norm": 0.05578843452162679, "learning_rate": 9.47493670886076e-06, "loss": 2.5586, "step": 1288 }, { "epoch": 0.06, "grad_norm": 0.053276918030993055, "learning_rate": 9.473924050632912e-06, "loss": 2.2227, "step": 1290 }, { "epoch": 0.06, "grad_norm": 0.04548986934593862, "learning_rate": 9.472911392405064e-06, "loss": 2.3203, "step": 1292 }, { "epoch": 0.06, "grad_norm": 0.05489945634450488, "learning_rate": 9.471898734177216e-06, "loss": 3.2969, "step": 1294 }, { "epoch": 0.06, "grad_norm": 0.07382813673170693, "learning_rate": 9.470886075949368e-06, "loss": 3.2656, "step": 1296 }, { "epoch": 0.06, "grad_norm": 0.059025889512360236, "learning_rate": 9.46987341772152e-06, "loss": 2.1094, "step": 1298 }, { "epoch": 0.07, "grad_norm": 0.04774363910554431, "learning_rate": 9.468860759493672e-06, "loss": 3.3281, "step": 1300 }, { "epoch": 0.07, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.921875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 92.1875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.9459457397461, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.4723, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.579, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, "step": 1300 }, { "epoch": 0.07, "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.3984375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.09375, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.150863647460938, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.5115, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 25.483, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.398, "step": 1300 }, { "epoch": 0.07, "eval_nq_top15HN_validation.jsonl.gz_acc1": 42.578125, "eval_nq_top15HN_validation.jsonl.gz_acc3": 88.8671875, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 67.16609954833984, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8744, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.885, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 1300 }, { "epoch": 0.07, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.84375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 82.2265625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98828125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 62.61825942993164, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.3521, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.459, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.07, "step": 1300 }, { "epoch": 0.07, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.96875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 88.28125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.015625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 66.84114837646484, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.9535, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.843, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 1300 }, { "epoch": 0.07, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.3359375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.85847473144531, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.6944, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.984, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.094, "step": 1300 }, { "epoch": 0.07, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 42.578125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 87.890625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 66.45761108398438, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 9.7009, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.597, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.103, "step": 1300 }, { "epoch": 0.07, "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.578125, "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.2578125, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.203125, "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.21710205078125, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.2036, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.95, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, "step": 1300 }, { "epoch": 0.07, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 36.328125, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 77.5390625, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.69415283203125, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.7199, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.189, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.175, "step": 1300 }, { "epoch": 0.07, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.8984375, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 92.96875, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0703125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.08352661132812, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.2862, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.724, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.121, "step": 1300 }, { "epoch": 0.07, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.140625, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.578125, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.6953125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.10311889648438, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.4982, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 11.64, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.182, "step": 1300 }, { "epoch": 0.07, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 28.90625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 67.7734375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 52.01654052734375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.261, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.488, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 1300 }, { "epoch": 0.07, "grad_norm": 0.04688377347936095, "learning_rate": 9.467848101265824e-06, "loss": 2.5625, "step": 1302 }, { "epoch": 0.07, "grad_norm": 0.0535641115572392, "learning_rate": 9.466835443037976e-06, "loss": 2.2617, "step": 1304 }, { "epoch": 0.07, "grad_norm": 0.04625202248233324, "learning_rate": 9.465822784810127e-06, "loss": 2.1016, "step": 1306 }, { "epoch": 0.07, "grad_norm": 0.043697642071917904, "learning_rate": 9.46481012658228e-06, "loss": 1.168, "step": 1308 }, { "epoch": 0.07, "grad_norm": 0.05362168829827214, "learning_rate": 9.463797468354431e-06, "loss": 2.5625, "step": 1310 }, { "epoch": 0.07, "grad_norm": 0.06975665191217256, "learning_rate": 9.462784810126583e-06, "loss": 3.9844, "step": 1312 }, { "epoch": 0.07, "grad_norm": 0.046593111547768955, "learning_rate": 9.461772151898735e-06, "loss": 2.8438, "step": 1314 }, { "epoch": 0.07, "grad_norm": 0.05971039459331579, "learning_rate": 9.460759493670887e-06, "loss": 3.0469, "step": 1316 }, { "epoch": 0.07, "grad_norm": 0.05051453173596236, "learning_rate": 9.459746835443039e-06, "loss": 2.0156, "step": 1318 }, { "epoch": 0.07, "grad_norm": 0.07619243735540052, "learning_rate": 9.45873417721519e-06, "loss": 3.5625, "step": 1320 }, { "epoch": 0.07, "grad_norm": 0.046237781745614805, "learning_rate": 9.457721518987343e-06, "loss": 2.3125, "step": 1322 }, { "epoch": 0.07, "grad_norm": 0.04485813314355889, "learning_rate": 9.456708860759495e-06, "loss": 2.2305, "step": 1324 }, { "epoch": 0.07, "grad_norm": 0.06336058879470456, "learning_rate": 9.455696202531647e-06, "loss": 1.9297, "step": 1326 }, { "epoch": 0.07, "grad_norm": 0.049017667050821896, "learning_rate": 9.454683544303798e-06, "loss": 1.8242, "step": 1328 }, { "epoch": 0.07, "grad_norm": 0.074566984117343, "learning_rate": 9.45367088607595e-06, "loss": 1.793, "step": 1330 }, { "epoch": 0.07, "grad_norm": 0.08532472260482739, "learning_rate": 9.452658227848102e-06, "loss": 1.4082, "step": 1332 }, { "epoch": 0.07, "grad_norm": 0.04695226837525632, "learning_rate": 9.451645569620254e-06, "loss": 2.3438, "step": 1334 }, { "epoch": 0.07, "grad_norm": 0.04484711521428046, "learning_rate": 9.450632911392406e-06, "loss": 3.1562, "step": 1336 }, { "epoch": 0.07, "grad_norm": 0.04490462262118974, "learning_rate": 9.449620253164558e-06, "loss": 3.8438, "step": 1338 }, { "epoch": 0.07, "grad_norm": 0.054645353919445395, "learning_rate": 9.44860759493671e-06, "loss": 4.4766, "step": 1340 }, { "epoch": 0.07, "grad_norm": 0.09472661165845717, "learning_rate": 9.447594936708862e-06, "loss": 2.1719, "step": 1342 }, { "epoch": 0.07, "grad_norm": 0.05608091164616737, "learning_rate": 9.446582278481014e-06, "loss": 3.0391, "step": 1344 }, { "epoch": 0.07, "grad_norm": 0.05956456868043248, "learning_rate": 9.445569620253166e-06, "loss": 3.5469, "step": 1346 }, { "epoch": 0.07, "grad_norm": 0.0620189985372643, "learning_rate": 9.444556962025318e-06, "loss": 3.125, "step": 1348 }, { "epoch": 0.07, "grad_norm": 0.04915043549864365, "learning_rate": 9.44354430379747e-06, "loss": 3.1875, "step": 1350 }, { "epoch": 0.07, "grad_norm": 0.04903168447437554, "learning_rate": 9.442531645569621e-06, "loss": 3.5391, "step": 1352 }, { "epoch": 0.07, "grad_norm": 0.07603047463550469, "learning_rate": 9.441518987341773e-06, "loss": 1.748, "step": 1354 }, { "epoch": 0.07, "grad_norm": 0.08795396323964288, "learning_rate": 9.440506329113923e-06, "loss": 2.2148, "step": 1356 }, { "epoch": 0.07, "grad_norm": 0.06394860010428646, "learning_rate": 9.439493670886077e-06, "loss": 2.1758, "step": 1358 }, { "epoch": 0.07, "grad_norm": 0.048523865697111014, "learning_rate": 9.438481012658229e-06, "loss": 2.6094, "step": 1360 }, { "epoch": 0.07, "grad_norm": 0.044744379334593756, "learning_rate": 9.437468354430381e-06, "loss": 1.2461, "step": 1362 }, { "epoch": 0.07, "grad_norm": 0.06115671450767947, "learning_rate": 9.436455696202533e-06, "loss": 1.0859, "step": 1364 }, { "epoch": 0.07, "grad_norm": 0.05822151080901583, "learning_rate": 9.435443037974685e-06, "loss": 3.4766, "step": 1366 }, { "epoch": 0.07, "grad_norm": 0.07973667069949618, "learning_rate": 9.434430379746837e-06, "loss": 1.6836, "step": 1368 }, { "epoch": 0.07, "grad_norm": 0.05697131019979492, "learning_rate": 9.433417721518989e-06, "loss": 3.3516, "step": 1370 }, { "epoch": 0.07, "grad_norm": 0.047056398472087486, "learning_rate": 9.43240506329114e-06, "loss": 1.9688, "step": 1372 }, { "epoch": 0.07, "grad_norm": 0.07977068722005347, "learning_rate": 9.431392405063292e-06, "loss": 2.6289, "step": 1374 }, { "epoch": 0.07, "grad_norm": 0.05825913569638537, "learning_rate": 9.430379746835444e-06, "loss": 3.3594, "step": 1376 }, { "epoch": 0.07, "grad_norm": 0.043133510537226256, "learning_rate": 9.429367088607596e-06, "loss": 2.8984, "step": 1378 }, { "epoch": 0.07, "grad_norm": 0.07160798353070594, "learning_rate": 9.428354430379748e-06, "loss": 1.9961, "step": 1380 }, { "epoch": 0.07, "grad_norm": 0.043526473443776595, "learning_rate": 9.4273417721519e-06, "loss": 2.6641, "step": 1382 }, { "epoch": 0.07, "grad_norm": 0.09154773952846151, "learning_rate": 9.42632911392405e-06, "loss": 2.6328, "step": 1384 }, { "epoch": 0.07, "grad_norm": 0.04607802371298212, "learning_rate": 9.425316455696202e-06, "loss": 2.043, "step": 1386 }, { "epoch": 0.07, "grad_norm": 0.04583141069326187, "learning_rate": 9.424303797468356e-06, "loss": 2.5625, "step": 1388 }, { "epoch": 0.07, "grad_norm": 0.047856476589815795, "learning_rate": 9.423291139240508e-06, "loss": 2.1719, "step": 1390 }, { "epoch": 0.07, "grad_norm": 0.045235948292313685, "learning_rate": 9.42227848101266e-06, "loss": 1.7793, "step": 1392 }, { "epoch": 0.07, "grad_norm": 0.0728387370082503, "learning_rate": 9.421265822784811e-06, "loss": 3.2422, "step": 1394 }, { "epoch": 0.07, "grad_norm": 0.038953904358725366, "learning_rate": 9.420253164556963e-06, "loss": 2.8125, "step": 1396 }, { "epoch": 0.07, "grad_norm": 0.04573349280022919, "learning_rate": 9.419240506329115e-06, "loss": 2.6016, "step": 1398 }, { "epoch": 0.07, "grad_norm": 0.11480601349227437, "learning_rate": 9.418227848101267e-06, "loss": 2.9766, "step": 1400 }, { "epoch": 0.07, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.7265625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.6015625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.1888427734375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2467, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.691, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 1400 }, { "epoch": 0.07, "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.984375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 22.4609375, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.395097732543945, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.5738, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.866, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.389, "step": 1400 }, { "epoch": 0.07, "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.4609375, "eval_nq_top15HN_validation.jsonl.gz_acc3": 96.484375, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.859375, "eval_nq_top15HN_validation.jsonl.gz_mrr": 72.35719299316406, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.9538, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.843, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 1400 }, { "epoch": 0.07, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.2578125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 81.25, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9921875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.98875045776367, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.7454, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.34, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.068, "step": 1400 }, { "epoch": 0.07, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 43.1640625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.71875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.83100128173828, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.9759, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.831, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 1400 }, { "epoch": 0.07, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.09375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 93.359375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.10130310058594, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.758, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.949, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 1400 }, { "epoch": 0.07, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.6015625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.9375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.8533935546875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.6245, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.024, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.094, "step": 1400 }, { "epoch": 0.07, "eval_fever_top15HN_validation.jsonl.gz_acc1": 43.359375, "eval_fever_top15HN_validation.jsonl.gz_acc3": 90.625, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.265625, "eval_fever_top15HN_validation.jsonl.gz_mrr": 67.08191680908203, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.273, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.933, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, "step": 1400 }, { "epoch": 0.07, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 42.1875, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 87.5, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8203125, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 66.17656707763672, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.7472, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.136, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.174, "step": 1400 }, { "epoch": 0.07, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.6796875, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 94.3359375, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0703125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.69469451904297, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 6.5911, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 9.71, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.152, "step": 1400 }, { "epoch": 0.07, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.75, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.40625, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.7109375, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.02898406982422, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.9021, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.056, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.204, "step": 1400 }, { "epoch": 0.07, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.296875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 66.6015625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.86328125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 51.64873504638672, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.5054, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.412, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.069, "step": 1400 }, { "epoch": 0.07, "grad_norm": 0.055843672095774365, "learning_rate": 9.417215189873419e-06, "loss": 2.7344, "step": 1402 }, { "epoch": 0.07, "grad_norm": 0.05515526710896152, "learning_rate": 9.416202531645571e-06, "loss": 2.4395, "step": 1404 }, { "epoch": 0.07, "grad_norm": 0.08982747905012575, "learning_rate": 9.415189873417723e-06, "loss": 3.6719, "step": 1406 }, { "epoch": 0.07, "grad_norm": 0.05196739669400878, "learning_rate": 9.414177215189875e-06, "loss": 1.8359, "step": 1408 }, { "epoch": 0.07, "grad_norm": 0.0627278444078641, "learning_rate": 9.413164556962025e-06, "loss": 2.2461, "step": 1410 }, { "epoch": 0.07, "grad_norm": 0.05884919908680651, "learning_rate": 9.412151898734177e-06, "loss": 2.5234, "step": 1412 }, { "epoch": 0.07, "grad_norm": 0.05744260264645288, "learning_rate": 9.411139240506329e-06, "loss": 2.8867, "step": 1414 }, { "epoch": 0.07, "grad_norm": 0.051897095461469187, "learning_rate": 9.41012658227848e-06, "loss": 2.0156, "step": 1416 }, { "epoch": 0.07, "grad_norm": 0.05879349636390874, "learning_rate": 9.409113924050634e-06, "loss": 2.4766, "step": 1418 }, { "epoch": 0.07, "grad_norm": 0.04729306777999818, "learning_rate": 9.408101265822786e-06, "loss": 1.8867, "step": 1420 }, { "epoch": 0.07, "grad_norm": 0.0421728790409431, "learning_rate": 9.407088607594938e-06, "loss": 2.5, "step": 1422 }, { "epoch": 0.07, "grad_norm": 0.12666988320747452, "learning_rate": 9.40607594936709e-06, "loss": 2.2227, "step": 1424 }, { "epoch": 0.07, "grad_norm": 0.06732281487807565, "learning_rate": 9.405063291139242e-06, "loss": 2.8477, "step": 1426 }, { "epoch": 0.07, "grad_norm": 0.03916516818938533, "learning_rate": 9.404050632911394e-06, "loss": 2.7266, "step": 1428 }, { "epoch": 0.07, "grad_norm": 0.04706147969736972, "learning_rate": 9.403037974683546e-06, "loss": 2.6992, "step": 1430 }, { "epoch": 0.07, "grad_norm": 0.04955077897200769, "learning_rate": 9.402025316455698e-06, "loss": 2.9688, "step": 1432 }, { "epoch": 0.07, "grad_norm": 0.04341814358564929, "learning_rate": 9.40101265822785e-06, "loss": 2.4688, "step": 1434 }, { "epoch": 0.07, "grad_norm": 0.05735382001404066, "learning_rate": 9.4e-06, "loss": 2.6055, "step": 1436 }, { "epoch": 0.07, "grad_norm": 0.07465431435276457, "learning_rate": 9.398987341772152e-06, "loss": 4.1406, "step": 1438 }, { "epoch": 0.07, "grad_norm": 0.0420444573512173, "learning_rate": 9.397974683544304e-06, "loss": 3.2422, "step": 1440 }, { "epoch": 0.07, "grad_norm": 0.05085789187930648, "learning_rate": 9.396962025316456e-06, "loss": 1.6133, "step": 1442 }, { "epoch": 0.07, "grad_norm": 0.08454571029108997, "learning_rate": 9.395949367088607e-06, "loss": 2.0781, "step": 1444 }, { "epoch": 0.07, "grad_norm": 0.07139657679173401, "learning_rate": 9.39493670886076e-06, "loss": 2.1387, "step": 1446 }, { "epoch": 0.07, "grad_norm": 0.04756116820932081, "learning_rate": 9.393924050632913e-06, "loss": 2.168, "step": 1448 }, { "epoch": 0.07, "grad_norm": 0.04401770959594053, "learning_rate": 9.392911392405065e-06, "loss": 3.5078, "step": 1450 }, { "epoch": 0.07, "grad_norm": 0.045790448883903524, "learning_rate": 9.391898734177217e-06, "loss": 1.9648, "step": 1452 }, { "epoch": 0.07, "grad_norm": 0.05377943050007024, "learning_rate": 9.390886075949369e-06, "loss": 3.875, "step": 1454 }, { "epoch": 0.07, "grad_norm": 0.04606808093150142, "learning_rate": 9.38987341772152e-06, "loss": 2.8438, "step": 1456 }, { "epoch": 0.07, "grad_norm": 0.04546882059970107, "learning_rate": 9.388860759493672e-06, "loss": 1.6523, "step": 1458 }, { "epoch": 0.07, "grad_norm": 0.07704364625121993, "learning_rate": 9.387848101265824e-06, "loss": 1.1895, "step": 1460 }, { "epoch": 0.07, "grad_norm": 0.07143045147801531, "learning_rate": 9.386835443037976e-06, "loss": 3.6172, "step": 1462 }, { "epoch": 0.07, "grad_norm": 0.06199252590022181, "learning_rate": 9.385822784810127e-06, "loss": 2.3066, "step": 1464 }, { "epoch": 0.07, "grad_norm": 0.05668568463084169, "learning_rate": 9.384810126582278e-06, "loss": 1.084, "step": 1466 }, { "epoch": 0.07, "grad_norm": 0.12850516952142793, "learning_rate": 9.38379746835443e-06, "loss": 1.5801, "step": 1468 }, { "epoch": 0.07, "grad_norm": 0.04329007335741428, "learning_rate": 9.382784810126582e-06, "loss": 2.5781, "step": 1470 }, { "epoch": 0.07, "grad_norm": 0.09555574181806, "learning_rate": 9.381772151898734e-06, "loss": 3.2812, "step": 1472 }, { "epoch": 0.07, "grad_norm": 0.05880979405295948, "learning_rate": 9.380759493670886e-06, "loss": 2.8203, "step": 1474 }, { "epoch": 0.07, "grad_norm": 0.049091451604602945, "learning_rate": 9.379746835443038e-06, "loss": 2.9141, "step": 1476 }, { "epoch": 0.07, "grad_norm": 0.07680641181557653, "learning_rate": 9.378734177215192e-06, "loss": 1.9922, "step": 1478 }, { "epoch": 0.07, "grad_norm": 0.057593120381473364, "learning_rate": 9.377721518987343e-06, "loss": 2.6523, "step": 1480 }, { "epoch": 0.07, "grad_norm": 0.09099344445781292, "learning_rate": 9.376708860759495e-06, "loss": 3.0547, "step": 1482 }, { "epoch": 0.07, "grad_norm": 0.07127214583778112, "learning_rate": 9.375696202531647e-06, "loss": 1.7109, "step": 1484 }, { "epoch": 0.07, "grad_norm": 0.04901323516280243, "learning_rate": 9.3746835443038e-06, "loss": 1.9648, "step": 1486 }, { "epoch": 0.07, "grad_norm": 0.06218888476222176, "learning_rate": 9.373670886075951e-06, "loss": 1.1504, "step": 1488 }, { "epoch": 0.07, "grad_norm": 0.043001537655645475, "learning_rate": 9.372658227848101e-06, "loss": 2.1172, "step": 1490 }, { "epoch": 0.07, "grad_norm": 0.058695091585682646, "learning_rate": 9.371645569620253e-06, "loss": 3.3984, "step": 1492 }, { "epoch": 0.07, "grad_norm": 0.048734630242664174, "learning_rate": 9.370632911392405e-06, "loss": 2.9062, "step": 1494 }, { "epoch": 0.07, "grad_norm": 0.05142460712334133, "learning_rate": 9.369620253164557e-06, "loss": 2.9219, "step": 1496 }, { "epoch": 0.07, "grad_norm": 0.048336168592599454, "learning_rate": 9.368607594936709e-06, "loss": 1.3008, "step": 1498 }, { "epoch": 0.07, "grad_norm": 0.07034287194094196, "learning_rate": 9.367594936708861e-06, "loss": 1.1387, "step": 1500 }, { "epoch": 0.07, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.3359375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.6015625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.29191589355469, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3089, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.659, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, "step": 1500 }, { "epoch": 0.07, "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.984375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.484375, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.484375, "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.72687530517578, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.65, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.151, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.377, "step": 1500 }, { "epoch": 0.07, "eval_nq_top15HN_validation.jsonl.gz_acc1": 45.5078125, "eval_nq_top15HN_validation.jsonl.gz_acc3": 94.140625, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.8671875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 70.34400939941406, "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.0356, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.799, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 1500 }, { "epoch": 0.07, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 41.40625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 85.7421875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98046875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 64.21460723876953, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4385, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.433, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 1500 }, { "epoch": 0.07, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.96875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 87.3046875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 66.57454681396484, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.7972, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.927, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 1500 }, { "epoch": 0.07, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.0703125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.7265625, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.02752685546875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.861, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.893, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 1500 }, { "epoch": 0.07, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.015625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.59078979492188, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.5358, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.074, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.095, "step": 1500 }, { "epoch": 0.07, "eval_fever_top15HN_validation.jsonl.gz_acc1": 39.6484375, "eval_fever_top15HN_validation.jsonl.gz_acc3": 86.9140625, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.234375, "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.64114379882812, "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.9459, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.014, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, "step": 1500 }, { "epoch": 0.07, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 35.15625, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 75.78125, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83984375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 57.90193557739258, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.3191, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.032, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.188, "step": 1500 }, { "epoch": 0.07, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 44.53125, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 91.2109375, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0859375, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 68.68386840820312, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 10.3583, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.179, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.097, "step": 1500 }, { "epoch": 0.07, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.53125, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.1875, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.2659912109375, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0954, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.56, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.196, "step": 1500 }, { "epoch": 0.07, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 30.6640625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 70.1171875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.86328125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 52.90107727050781, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.4006, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.444, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.069, "step": 1500 }, { "epoch": 0.08, "grad_norm": 0.058479193123082245, "learning_rate": 9.366582278481013e-06, "loss": 3.1406, "step": 1502 }, { "epoch": 0.08, "grad_norm": 0.05144200831789539, "learning_rate": 9.365569620253165e-06, "loss": 1.9023, "step": 1504 }, { "epoch": 0.08, "grad_norm": 0.0415613001994433, "learning_rate": 9.364556962025317e-06, "loss": 2.9141, "step": 1506 }, { "epoch": 0.08, "grad_norm": 0.05266671122440496, "learning_rate": 9.36354430379747e-06, "loss": 2.2188, "step": 1508 }, { "epoch": 0.08, "grad_norm": 0.06444994239463296, "learning_rate": 9.362531645569622e-06, "loss": 2.6758, "step": 1510 }, { "epoch": 0.08, "grad_norm": 0.06834124597210364, "learning_rate": 9.361518987341774e-06, "loss": 1.9805, "step": 1512 }, { "epoch": 0.08, "grad_norm": 0.06300774724452027, "learning_rate": 9.360506329113926e-06, "loss": 2.9531, "step": 1514 }, { "epoch": 0.08, "grad_norm": 0.04840310128919646, "learning_rate": 9.359493670886076e-06, "loss": 3.5703, "step": 1516 }, { "epoch": 0.08, "grad_norm": 0.054293535407710335, "learning_rate": 9.358481012658228e-06, "loss": 2.5938, "step": 1518 }, { "epoch": 0.08, "grad_norm": 0.04978796968184069, "learning_rate": 9.35746835443038e-06, "loss": 3.375, "step": 1520 }, { "epoch": 0.08, "grad_norm": 0.06343862426870277, "learning_rate": 9.356455696202532e-06, "loss": 2.6953, "step": 1522 }, { "epoch": 0.08, "grad_norm": 0.04267087281847612, "learning_rate": 9.355443037974684e-06, "loss": 2.5312, "step": 1524 }, { "epoch": 0.08, "grad_norm": 0.05536594787449037, "learning_rate": 9.354430379746836e-06, "loss": 3.0469, "step": 1526 }, { "epoch": 0.08, "grad_norm": 0.048765163359707465, "learning_rate": 9.353417721518988e-06, "loss": 1.375, "step": 1528 }, { "epoch": 0.08, "grad_norm": 0.03965024597525289, "learning_rate": 9.35240506329114e-06, "loss": 2.2383, "step": 1530 }, { "epoch": 0.08, "grad_norm": 0.05573923071709513, "learning_rate": 9.351392405063291e-06, "loss": 3.3203, "step": 1532 }, { "epoch": 0.08, "grad_norm": 0.0479853933542111, "learning_rate": 9.350379746835443e-06, "loss": 2.2305, "step": 1534 }, { "epoch": 0.08, "grad_norm": 0.040700681106451066, "learning_rate": 9.349367088607595e-06, "loss": 3.1953, "step": 1536 }, { "epoch": 0.08, "grad_norm": 0.04475551099431461, "learning_rate": 9.348354430379749e-06, "loss": 2.1875, "step": 1538 }, { "epoch": 0.08, "grad_norm": 0.049553725050291646, "learning_rate": 9.3473417721519e-06, "loss": 3.2344, "step": 1540 }, { "epoch": 0.08, "grad_norm": 0.04670156490937008, "learning_rate": 9.346329113924051e-06, "loss": 1.9141, "step": 1542 }, { "epoch": 0.08, "grad_norm": 0.05663231468073976, "learning_rate": 9.345316455696203e-06, "loss": 2.2871, "step": 1544 }, { "epoch": 0.08, "grad_norm": 0.06318704899297736, "learning_rate": 9.344303797468355e-06, "loss": 2.9453, "step": 1546 }, { "epoch": 0.08, "grad_norm": 0.059460718933132796, "learning_rate": 9.343291139240507e-06, "loss": 3.1484, "step": 1548 }, { "epoch": 0.08, "grad_norm": 0.0711919992524241, "learning_rate": 9.342278481012659e-06, "loss": 1.1777, "step": 1550 }, { "epoch": 0.08, "grad_norm": 0.07330078641859443, "learning_rate": 9.34126582278481e-06, "loss": 2.1367, "step": 1552 }, { "epoch": 0.08, "grad_norm": 0.08338509817001034, "learning_rate": 9.340253164556962e-06, "loss": 2.0977, "step": 1554 }, { "epoch": 0.08, "grad_norm": 0.08714453539283397, "learning_rate": 9.339240506329114e-06, "loss": 1.6953, "step": 1556 }, { "epoch": 0.08, "grad_norm": 0.09924684731192908, "learning_rate": 9.338227848101266e-06, "loss": 1.9922, "step": 1558 }, { "epoch": 0.08, "grad_norm": 0.038239102309762, "learning_rate": 9.337215189873418e-06, "loss": 2.7891, "step": 1560 }, { "epoch": 0.08, "grad_norm": 0.05314827892824287, "learning_rate": 9.33620253164557e-06, "loss": 2.7109, "step": 1562 }, { "epoch": 0.08, "grad_norm": 0.05981892354958667, "learning_rate": 9.335189873417722e-06, "loss": 1.1152, "step": 1564 }, { "epoch": 0.08, "grad_norm": 0.10543378939286623, "learning_rate": 9.334177215189874e-06, "loss": 2.334, "step": 1566 }, { "epoch": 0.08, "grad_norm": 0.12095726689494823, "learning_rate": 9.333164556962027e-06, "loss": 2.5273, "step": 1568 }, { "epoch": 0.08, "grad_norm": 0.04771441150677178, "learning_rate": 9.332151898734178e-06, "loss": 3.293, "step": 1570 }, { "epoch": 0.08, "grad_norm": 0.05849347076840012, "learning_rate": 9.33113924050633e-06, "loss": 3.2422, "step": 1572 }, { "epoch": 0.08, "grad_norm": 0.05734448026106365, "learning_rate": 9.330126582278481e-06, "loss": 3.1797, "step": 1574 }, { "epoch": 0.08, "grad_norm": 0.051223518523306436, "learning_rate": 9.329113924050633e-06, "loss": 2.5, "step": 1576 }, { "epoch": 0.08, "grad_norm": 0.09701413768488983, "learning_rate": 9.328101265822785e-06, "loss": 1.7031, "step": 1578 }, { "epoch": 0.08, "grad_norm": 0.048150416298882136, "learning_rate": 9.327088607594937e-06, "loss": 2.2734, "step": 1580 }, { "epoch": 0.08, "grad_norm": 0.09993600585851185, "learning_rate": 9.326075949367089e-06, "loss": 3.0391, "step": 1582 }, { "epoch": 0.08, "grad_norm": 0.05125470855427429, "learning_rate": 9.325063291139241e-06, "loss": 3.7031, "step": 1584 }, { "epoch": 0.08, "grad_norm": 0.09079844714057336, "learning_rate": 9.324050632911393e-06, "loss": 1.7188, "step": 1586 }, { "epoch": 0.08, "grad_norm": 0.041271469047698434, "learning_rate": 9.323037974683545e-06, "loss": 2.4805, "step": 1588 }, { "epoch": 0.08, "grad_norm": 0.046549116257717855, "learning_rate": 9.322025316455697e-06, "loss": 2.8203, "step": 1590 }, { "epoch": 0.08, "grad_norm": 0.04959674743954773, "learning_rate": 9.321012658227849e-06, "loss": 2.7188, "step": 1592 }, { "epoch": 0.08, "grad_norm": 0.061474583923480014, "learning_rate": 9.32e-06, "loss": 3.3984, "step": 1594 }, { "epoch": 0.08, "grad_norm": 0.09163191359628134, "learning_rate": 9.318987341772152e-06, "loss": 1.6719, "step": 1596 }, { "epoch": 0.08, "grad_norm": 0.045633306114650114, "learning_rate": 9.317974683544304e-06, "loss": 2.9531, "step": 1598 }, { "epoch": 0.08, "grad_norm": 0.044980513613666485, "learning_rate": 9.316962025316456e-06, "loss": 2.6484, "step": 1600 }, { "epoch": 0.08, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.7265625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 92.578125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.8046875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.30206298828125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.4303, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.599, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, "step": 1600 }, { "epoch": 0.08, "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.203125, "eval_specter_top15HN_validation.jsonl.gz_acc3": 20.3125, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.484375, "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.398147583007812, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6925, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.77, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.371, "step": 1600 }, { "epoch": 0.08, "eval_nq_top15HN_validation.jsonl.gz_acc1": 44.140625, "eval_nq_top15HN_validation.jsonl.gz_acc3": 91.6015625, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.87109375, "eval_nq_top15HN_validation.jsonl.gz_mrr": 68.9178695678711, "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.066, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.783, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.09, "step": 1600 }, { "epoch": 0.08, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.1796875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98828125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.6385498046875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4703, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.423, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 1600 }, { "epoch": 0.08, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.9921875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.3515625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.1825942993164, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8864, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.879, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 1600 }, { "epoch": 0.08, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.2890625, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 93.75, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.52351379394531, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.4642, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 6.116, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.096, "step": 1600 }, { "epoch": 0.08, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 39.84375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 83.7890625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.52267837524414, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.7581, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.949, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 1600 }, { "epoch": 0.08, "eval_fever_top15HN_validation.jsonl.gz_acc1": 39.453125, "eval_fever_top15HN_validation.jsonl.gz_acc3": 86.71875, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.1875, "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.20020294189453, "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.9427, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.014, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, "step": 1600 }, { "epoch": 0.08, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 35.3515625, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 76.5625, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83984375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 58.69310760498047, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.267, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.151, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.19, "step": 1600 }, { "epoch": 0.08, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 44.140625, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 90.4296875, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 68.63660430908203, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.4904, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.538, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.118, "step": 1600 }, { "epoch": 0.08, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 45.8984375, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 93.359375, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.04869079589844, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0113, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.771, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.2, "step": 1600 }, { "epoch": 0.08, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.8359375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 72.65625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.984291076660156, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3615, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.456, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 1600 }, { "epoch": 0.08, "grad_norm": 0.06245480333735374, "learning_rate": 9.315949367088608e-06, "loss": 3.1953, "step": 1602 }, { "epoch": 0.08, "grad_norm": 0.055605246607420805, "learning_rate": 9.31493670886076e-06, "loss": 2.3262, "step": 1604 }, { "epoch": 0.08, "grad_norm": 0.0501153666439784, "learning_rate": 9.313924050632912e-06, "loss": 2.6094, "step": 1606 }, { "epoch": 0.08, "grad_norm": 0.07462734447902677, "learning_rate": 9.312911392405064e-06, "loss": 2.5078, "step": 1608 }, { "epoch": 0.08, "grad_norm": 0.05460964561976558, "learning_rate": 9.311898734177216e-06, "loss": 2.1641, "step": 1610 }, { "epoch": 0.08, "grad_norm": 0.04699422654304627, "learning_rate": 9.310886075949368e-06, "loss": 2.8047, "step": 1612 }, { "epoch": 0.08, "grad_norm": 0.05171422202408438, "learning_rate": 9.30987341772152e-06, "loss": 3.1562, "step": 1614 }, { "epoch": 0.08, "grad_norm": 0.053386026278170844, "learning_rate": 9.308860759493672e-06, "loss": 1.0215, "step": 1616 }, { "epoch": 0.08, "grad_norm": 0.051189341730886406, "learning_rate": 9.307848101265823e-06, "loss": 1.9531, "step": 1618 }, { "epoch": 0.08, "grad_norm": 0.05831005838918187, "learning_rate": 9.306835443037975e-06, "loss": 3.2656, "step": 1620 }, { "epoch": 0.08, "grad_norm": 0.04834690668545663, "learning_rate": 9.305822784810127e-06, "loss": 2.4219, "step": 1622 }, { "epoch": 0.08, "grad_norm": 0.04807847581015744, "learning_rate": 9.30481012658228e-06, "loss": 1.6738, "step": 1624 }, { "epoch": 0.08, "grad_norm": 0.05549176218970007, "learning_rate": 9.303797468354431e-06, "loss": 3.2188, "step": 1626 }, { "epoch": 0.08, "grad_norm": 0.07532291608421006, "learning_rate": 9.302784810126583e-06, "loss": 3.3516, "step": 1628 }, { "epoch": 0.08, "grad_norm": 0.05981979151667419, "learning_rate": 9.301772151898735e-06, "loss": 3.7891, "step": 1630 }, { "epoch": 0.08, "grad_norm": 0.049378118050634336, "learning_rate": 9.300759493670887e-06, "loss": 2.3281, "step": 1632 }, { "epoch": 0.08, "grad_norm": 0.05510804145055033, "learning_rate": 9.299746835443039e-06, "loss": 2.9375, "step": 1634 }, { "epoch": 0.08, "grad_norm": 0.05964761549202754, "learning_rate": 9.29873417721519e-06, "loss": 2.6172, "step": 1636 }, { "epoch": 0.08, "grad_norm": 0.05041859270183951, "learning_rate": 9.297721518987343e-06, "loss": 3.1641, "step": 1638 }, { "epoch": 0.08, "grad_norm": 0.04798209622883979, "learning_rate": 9.296708860759494e-06, "loss": 1.8809, "step": 1640 }, { "epoch": 0.08, "grad_norm": 0.05573792952238686, "learning_rate": 9.295696202531646e-06, "loss": 3.1094, "step": 1642 }, { "epoch": 0.08, "grad_norm": 0.04795052794632855, "learning_rate": 9.294683544303798e-06, "loss": 2.7969, "step": 1644 }, { "epoch": 0.08, "grad_norm": 0.12025901515926583, "learning_rate": 9.29367088607595e-06, "loss": 2.6758, "step": 1646 }, { "epoch": 0.08, "grad_norm": 0.06672087285753964, "learning_rate": 9.292658227848102e-06, "loss": 3.9844, "step": 1648 }, { "epoch": 0.08, "grad_norm": 0.06282660709028401, "learning_rate": 9.291645569620254e-06, "loss": 3.1328, "step": 1650 }, { "epoch": 0.08, "grad_norm": 0.058677668875888905, "learning_rate": 9.290632911392406e-06, "loss": 3.2188, "step": 1652 }, { "epoch": 0.08, "grad_norm": 0.05333625411921265, "learning_rate": 9.289620253164558e-06, "loss": 3.5547, "step": 1654 }, { "epoch": 0.08, "grad_norm": 0.04452332145815023, "learning_rate": 9.28860759493671e-06, "loss": 1.6543, "step": 1656 }, { "epoch": 0.08, "grad_norm": 0.12435004639940082, "learning_rate": 9.287594936708862e-06, "loss": 2.0605, "step": 1658 }, { "epoch": 0.08, "grad_norm": 0.059831108966171234, "learning_rate": 9.286582278481014e-06, "loss": 2.6836, "step": 1660 }, { "epoch": 0.08, "grad_norm": 0.053703210014712156, "learning_rate": 9.285569620253165e-06, "loss": 2.0977, "step": 1662 }, { "epoch": 0.08, "grad_norm": 0.0901318471300885, "learning_rate": 9.284556962025317e-06, "loss": 2.2734, "step": 1664 }, { "epoch": 0.08, "grad_norm": 0.05244016033163033, "learning_rate": 9.28354430379747e-06, "loss": 2.9375, "step": 1666 }, { "epoch": 0.08, "grad_norm": 0.08174274152119362, "learning_rate": 9.282531645569621e-06, "loss": 1.625, "step": 1668 }, { "epoch": 0.08, "grad_norm": 0.058284333958605467, "learning_rate": 9.281518987341773e-06, "loss": 3.8438, "step": 1670 }, { "epoch": 0.08, "grad_norm": 0.04967242799542885, "learning_rate": 9.280506329113925e-06, "loss": 1.9492, "step": 1672 }, { "epoch": 0.08, "grad_norm": 0.04943855662398272, "learning_rate": 9.279493670886077e-06, "loss": 2.582, "step": 1674 }, { "epoch": 0.08, "grad_norm": 0.05505936079786661, "learning_rate": 9.278481012658229e-06, "loss": 2.6211, "step": 1676 }, { "epoch": 0.08, "grad_norm": 0.0461512612947597, "learning_rate": 9.27746835443038e-06, "loss": 2.6562, "step": 1678 }, { "epoch": 0.08, "grad_norm": 0.04974146645078623, "learning_rate": 9.276455696202533e-06, "loss": 3.0703, "step": 1680 }, { "epoch": 0.08, "grad_norm": 0.056930263889751004, "learning_rate": 9.275443037974685e-06, "loss": 3.5, "step": 1682 }, { "epoch": 0.08, "grad_norm": 0.12321897233652232, "learning_rate": 9.274430379746836e-06, "loss": 0.9648, "step": 1684 }, { "epoch": 0.08, "grad_norm": 0.06047479315003769, "learning_rate": 9.273417721518988e-06, "loss": 2.8594, "step": 1686 }, { "epoch": 0.08, "grad_norm": 0.048279307732973814, "learning_rate": 9.27240506329114e-06, "loss": 2.0195, "step": 1688 }, { "epoch": 0.08, "grad_norm": 0.05167384804499145, "learning_rate": 9.271392405063292e-06, "loss": 1.3398, "step": 1690 }, { "epoch": 0.08, "grad_norm": 0.06352858612327222, "learning_rate": 9.270379746835444e-06, "loss": 3.7422, "step": 1692 }, { "epoch": 0.08, "grad_norm": 0.06069008364933182, "learning_rate": 9.269367088607596e-06, "loss": 3.2969, "step": 1694 }, { "epoch": 0.08, "grad_norm": 0.04565140006110923, "learning_rate": 9.268354430379748e-06, "loss": 2.3047, "step": 1696 }, { "epoch": 0.08, "grad_norm": 0.06061226444027013, "learning_rate": 9.2673417721519e-06, "loss": 2.6641, "step": 1698 }, { "epoch": 0.09, "grad_norm": 0.06055021661335569, "learning_rate": 9.266329113924052e-06, "loss": 2.6523, "step": 1700 }, { "epoch": 0.09, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.53125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.796875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.34986877441406, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.4029, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.613, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, "step": 1700 }, { "epoch": 0.09, "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.203125, "eval_specter_top15HN_validation.jsonl.gz_acc3": 21.2890625, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, "eval_specter_top15HN_validation.jsonl.gz_mrr": 19.810283660888672, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.7101, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.615, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.369, "step": 1700 }, { "epoch": 0.09, "eval_nq_top15HN_validation.jsonl.gz_acc1": 45.3125, "eval_nq_top15HN_validation.jsonl.gz_acc3": 92.96875, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, "eval_nq_top15HN_validation.jsonl.gz_mrr": 69.57865142822266, "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.1636, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.733, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.09, "step": 1700 }, { "epoch": 0.09, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.8203125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.5703125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.98828125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 64.43778228759766, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4692, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.423, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 1700 }, { "epoch": 0.09, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.1875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.9375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.38245391845703, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.761, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.947, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 1700 }, { "epoch": 0.09, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.265625, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.3125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.38700866699219, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.8454, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.901, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 1700 }, { "epoch": 0.09, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 84.765625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.84552764892578, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.8499, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.899, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 1700 }, { "epoch": 0.09, "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.6484375, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.03125, "eval_fever_top15HN_validation.jsonl.gz_mrr": 67.33321380615234, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.0186, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.995, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, "step": 1700 }, { "epoch": 0.09, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 41.9921875, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 87.109375, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.81640625, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 65.82962799072266, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.8881, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 10.869, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.17, "step": 1700 }, { "epoch": 0.09, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.09375, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 93.1640625, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.27595520019531, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.3487, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.666, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.12, "step": 1700 }, { "epoch": 0.09, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.3359375, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.3828125, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.87727355957031, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.8468, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.205, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.206, "step": 1700 }, { "epoch": 0.09, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.8359375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 70.8984375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.73493194580078, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.1071, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.537, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.071, "step": 1700 }, { "epoch": 0.09, "grad_norm": 0.04881214834140313, "learning_rate": 9.265316455696202e-06, "loss": 1.5977, "step": 1702 }, { "epoch": 0.09, "grad_norm": 0.05453646827872593, "learning_rate": 9.264303797468356e-06, "loss": 2.5781, "step": 1704 }, { "epoch": 0.09, "grad_norm": 0.06013464957874684, "learning_rate": 9.263291139240507e-06, "loss": 0.8926, "step": 1706 }, { "epoch": 0.09, "grad_norm": 0.06752355707010797, "learning_rate": 9.26227848101266e-06, "loss": 2.2012, "step": 1708 }, { "epoch": 0.09, "grad_norm": 0.04740198019318174, "learning_rate": 9.261265822784811e-06, "loss": 2.1914, "step": 1710 }, { "epoch": 0.09, "grad_norm": 0.06168270499607577, "learning_rate": 9.260253164556963e-06, "loss": 3.4688, "step": 1712 }, { "epoch": 0.09, "grad_norm": 0.05425417322129795, "learning_rate": 9.259240506329115e-06, "loss": 3.1719, "step": 1714 }, { "epoch": 0.09, "grad_norm": 0.06609753416942989, "learning_rate": 9.258227848101267e-06, "loss": 2.1836, "step": 1716 }, { "epoch": 0.09, "grad_norm": 0.04879519305718599, "learning_rate": 9.257215189873419e-06, "loss": 1.3203, "step": 1718 }, { "epoch": 0.09, "grad_norm": 0.06707616844340519, "learning_rate": 9.25620253164557e-06, "loss": 1.1934, "step": 1720 }, { "epoch": 0.09, "grad_norm": 0.04621434055706063, "learning_rate": 9.255189873417723e-06, "loss": 2.0195, "step": 1722 }, { "epoch": 0.09, "grad_norm": 0.07293254248042395, "learning_rate": 9.254177215189875e-06, "loss": 2.5, "step": 1724 }, { "epoch": 0.09, "grad_norm": 0.048994009049322364, "learning_rate": 9.253164556962027e-06, "loss": 2.5898, "step": 1726 }, { "epoch": 0.09, "grad_norm": 0.04525483393881031, "learning_rate": 9.252151898734178e-06, "loss": 1.2891, "step": 1728 }, { "epoch": 0.09, "grad_norm": 0.07582602480871231, "learning_rate": 9.251139240506329e-06, "loss": 3.5938, "step": 1730 }, { "epoch": 0.09, "grad_norm": 0.050116497906216896, "learning_rate": 9.25012658227848e-06, "loss": 1.2734, "step": 1732 }, { "epoch": 0.09, "grad_norm": 0.06075701854481855, "learning_rate": 9.249113924050634e-06, "loss": 3.4375, "step": 1734 }, { "epoch": 0.09, "grad_norm": 0.07519665939809185, "learning_rate": 9.248101265822786e-06, "loss": 2.8008, "step": 1736 }, { "epoch": 0.09, "grad_norm": 0.056641325864073055, "learning_rate": 9.247088607594938e-06, "loss": 2.918, "step": 1738 }, { "epoch": 0.09, "grad_norm": 0.042566341445080966, "learning_rate": 9.24607594936709e-06, "loss": 2.5625, "step": 1740 }, { "epoch": 0.09, "grad_norm": 0.0574531442464776, "learning_rate": 9.245063291139242e-06, "loss": 3.5391, "step": 1742 }, { "epoch": 0.09, "grad_norm": 0.07581751572601363, "learning_rate": 9.244050632911394e-06, "loss": 2.2578, "step": 1744 }, { "epoch": 0.09, "grad_norm": 0.04726256151754022, "learning_rate": 9.243037974683546e-06, "loss": 4.1797, "step": 1746 }, { "epoch": 0.09, "grad_norm": 0.059749398568774544, "learning_rate": 9.242025316455698e-06, "loss": 2.9922, "step": 1748 }, { "epoch": 0.09, "grad_norm": 0.04809433552690452, "learning_rate": 9.24101265822785e-06, "loss": 2.2031, "step": 1750 }, { "epoch": 0.09, "grad_norm": 0.04826640079361988, "learning_rate": 9.240000000000001e-06, "loss": 1.9219, "step": 1752 }, { "epoch": 0.09, "grad_norm": 0.046456755919968916, "learning_rate": 9.238987341772153e-06, "loss": 2.7031, "step": 1754 }, { "epoch": 0.09, "grad_norm": 0.044265683952215536, "learning_rate": 9.237974683544303e-06, "loss": 2.5625, "step": 1756 }, { "epoch": 0.09, "grad_norm": 0.073608300381369, "learning_rate": 9.236962025316455e-06, "loss": 1.9961, "step": 1758 }, { "epoch": 0.09, "grad_norm": 0.048301831144158945, "learning_rate": 9.235949367088607e-06, "loss": 3.0078, "step": 1760 }, { "epoch": 0.09, "grad_norm": 0.0501519559569957, "learning_rate": 9.23493670886076e-06, "loss": 2.0586, "step": 1762 }, { "epoch": 0.09, "grad_norm": 0.04577847285196145, "learning_rate": 9.233924050632913e-06, "loss": 3.6562, "step": 1764 }, { "epoch": 0.09, "grad_norm": 0.04563139274633882, "learning_rate": 9.232911392405065e-06, "loss": 2.5703, "step": 1766 }, { "epoch": 0.09, "grad_norm": 0.04125628654212963, "learning_rate": 9.231898734177217e-06, "loss": 2.8047, "step": 1768 }, { "epoch": 0.09, "grad_norm": 0.04008124908648469, "learning_rate": 9.230886075949368e-06, "loss": 1.6816, "step": 1770 }, { "epoch": 0.09, "grad_norm": 0.05475037868292045, "learning_rate": 9.22987341772152e-06, "loss": 4.0625, "step": 1772 }, { "epoch": 0.09, "grad_norm": 0.09761848673236384, "learning_rate": 9.228860759493672e-06, "loss": 1.5488, "step": 1774 }, { "epoch": 0.09, "grad_norm": 0.03963164610530929, "learning_rate": 9.227848101265824e-06, "loss": 2.7656, "step": 1776 }, { "epoch": 0.09, "grad_norm": 0.05981526276955224, "learning_rate": 9.226835443037976e-06, "loss": 2.9609, "step": 1778 }, { "epoch": 0.09, "grad_norm": 0.05052165933225442, "learning_rate": 9.225822784810128e-06, "loss": 1.3398, "step": 1780 }, { "epoch": 0.09, "grad_norm": 0.029083882392062527, "learning_rate": 9.224810126582278e-06, "loss": 3.5781, "step": 1782 }, { "epoch": 0.09, "grad_norm": 0.05857585196525194, "learning_rate": 9.22379746835443e-06, "loss": 3.6719, "step": 1784 }, { "epoch": 0.09, "grad_norm": 0.04577420801792604, "learning_rate": 9.222784810126582e-06, "loss": 1.7109, "step": 1786 }, { "epoch": 0.09, "grad_norm": 0.09168536433981106, "learning_rate": 9.221772151898734e-06, "loss": 1.6562, "step": 1788 }, { "epoch": 0.09, "grad_norm": 0.0643609862212717, "learning_rate": 9.220759493670886e-06, "loss": 3.1484, "step": 1790 }, { "epoch": 0.09, "grad_norm": 0.04811850844606396, "learning_rate": 9.219746835443038e-06, "loss": 1.1309, "step": 1792 }, { "epoch": 0.09, "grad_norm": 0.09587399643250918, "learning_rate": 9.218734177215191e-06, "loss": 3.6406, "step": 1794 }, { "epoch": 0.09, "grad_norm": 0.05002566010695197, "learning_rate": 9.217721518987343e-06, "loss": 2.8125, "step": 1796 }, { "epoch": 0.09, "grad_norm": 0.06295928731342211, "learning_rate": 9.216708860759495e-06, "loss": 0.9199, "step": 1798 }, { "epoch": 0.09, "grad_norm": 0.04770305027941822, "learning_rate": 9.215696202531647e-06, "loss": 2.3984, "step": 1800 }, { "epoch": 0.09, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 45.3125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.5546875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.60789489746094, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3753, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.626, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, "step": 1800 }, { "epoch": 0.09, "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 22.65625, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4609375, "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.590526580810547, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.557, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 25.03, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.391, "step": 1800 }, { "epoch": 0.09, "eval_nq_top15HN_validation.jsonl.gz_acc1": 44.921875, "eval_nq_top15HN_validation.jsonl.gz_acc3": 92.578125, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.8671875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 69.26657104492188, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.6898, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.987, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.094, "step": 1800 }, { "epoch": 0.09, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.453125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 82.8125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.984375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.29865264892578, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5334, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.404, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 1800 }, { "epoch": 0.09, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.796875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.9375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.24208068847656, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.2523, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.688, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 1800 }, { "epoch": 0.09, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.32222747802734, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.9238, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.859, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 1800 }, { "epoch": 0.09, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.2109375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.3515625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.73181915283203, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.7931, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.93, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 1800 }, { "epoch": 0.09, "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.28125, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.03125, "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.28300476074219, "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.9696, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.008, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, "step": 1800 }, { "epoch": 0.09, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 36.328125, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 76.7578125, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83984375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.691566467285156, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.1729, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.372, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.193, "step": 1800 }, { "epoch": 0.09, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.5078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 93.1640625, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0703125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.11920928955078, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.6727, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.38, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.115, "step": 1800 }, { "epoch": 0.09, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.75, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.9921875, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.05628204345703, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.7868, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 13.37, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.209, "step": 1800 }, { "epoch": 0.09, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 30.2734375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 68.5546875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 53.30010223388672, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 13.8704, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.614, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.072, "step": 1800 }, { "epoch": 0.09, "grad_norm": 0.04950437980448833, "learning_rate": 9.214683544303799e-06, "loss": 2.0, "step": 1802 }, { "epoch": 0.09, "grad_norm": 0.04629660184842734, "learning_rate": 9.213670886075951e-06, "loss": 1.6211, "step": 1804 }, { "epoch": 0.09, "grad_norm": 0.04969426845613648, "learning_rate": 9.212658227848103e-06, "loss": 1.3438, "step": 1806 }, { "epoch": 0.09, "grad_norm": 0.06037668180690101, "learning_rate": 9.211645569620255e-06, "loss": 2.8711, "step": 1808 }, { "epoch": 0.09, "grad_norm": 0.04550840404027804, "learning_rate": 9.210632911392405e-06, "loss": 0.9297, "step": 1810 }, { "epoch": 0.09, "grad_norm": 0.05465350201216224, "learning_rate": 9.209620253164557e-06, "loss": 2.1621, "step": 1812 }, { "epoch": 0.09, "grad_norm": 0.0486391200337659, "learning_rate": 9.208607594936709e-06, "loss": 2.7812, "step": 1814 }, { "epoch": 0.09, "grad_norm": 0.04918824198346791, "learning_rate": 9.20759493670886e-06, "loss": 3.1328, "step": 1816 }, { "epoch": 0.09, "grad_norm": 0.05974561661788206, "learning_rate": 9.206582278481013e-06, "loss": 2.4414, "step": 1818 }, { "epoch": 0.09, "grad_norm": 0.0600907115810377, "learning_rate": 9.205569620253165e-06, "loss": 2.7109, "step": 1820 }, { "epoch": 0.09, "grad_norm": 0.10297627561908523, "learning_rate": 9.204556962025316e-06, "loss": 3.5859, "step": 1822 }, { "epoch": 0.09, "grad_norm": 0.048003279768414474, "learning_rate": 9.20354430379747e-06, "loss": 1.9453, "step": 1824 }, { "epoch": 0.09, "grad_norm": 0.05041769450216464, "learning_rate": 9.202531645569622e-06, "loss": 2.4531, "step": 1826 }, { "epoch": 0.09, "grad_norm": 0.0550317635952239, "learning_rate": 9.201518987341774e-06, "loss": 3.0312, "step": 1828 }, { "epoch": 0.09, "grad_norm": 0.053536765593840685, "learning_rate": 9.200506329113926e-06, "loss": 1.9258, "step": 1830 }, { "epoch": 0.09, "grad_norm": 0.06965423447002039, "learning_rate": 9.199493670886078e-06, "loss": 3.9922, "step": 1832 }, { "epoch": 0.09, "grad_norm": 0.049126054691502434, "learning_rate": 9.19848101265823e-06, "loss": 2.2617, "step": 1834 }, { "epoch": 0.09, "grad_norm": 0.06392229281055582, "learning_rate": 9.19746835443038e-06, "loss": 3.7344, "step": 1836 }, { "epoch": 0.09, "grad_norm": 0.056924425469819684, "learning_rate": 9.196455696202532e-06, "loss": 3.4531, "step": 1838 }, { "epoch": 0.09, "grad_norm": 0.05449698462956753, "learning_rate": 9.195443037974684e-06, "loss": 3.8984, "step": 1840 }, { "epoch": 0.09, "grad_norm": 0.04942440256487059, "learning_rate": 9.194430379746836e-06, "loss": 2.8359, "step": 1842 }, { "epoch": 0.09, "grad_norm": 0.038117458370006276, "learning_rate": 9.193417721518987e-06, "loss": 3.0078, "step": 1844 }, { "epoch": 0.09, "grad_norm": 0.08574255865012372, "learning_rate": 9.19240506329114e-06, "loss": 1.7461, "step": 1846 }, { "epoch": 0.09, "grad_norm": 0.06234107636512481, "learning_rate": 9.191392405063291e-06, "loss": 2.5898, "step": 1848 }, { "epoch": 0.09, "grad_norm": 0.037910434556300145, "learning_rate": 9.190379746835443e-06, "loss": 2.8594, "step": 1850 }, { "epoch": 0.09, "grad_norm": 0.07145740607425113, "learning_rate": 9.189367088607595e-06, "loss": 3.1094, "step": 1852 }, { "epoch": 0.09, "grad_norm": 0.04669389907423529, "learning_rate": 9.188354430379749e-06, "loss": 2.1016, "step": 1854 }, { "epoch": 0.09, "grad_norm": 0.04517261096123241, "learning_rate": 9.1873417721519e-06, "loss": 1.2969, "step": 1856 }, { "epoch": 0.09, "grad_norm": 0.04764797244335215, "learning_rate": 9.186329113924052e-06, "loss": 1.3242, "step": 1858 }, { "epoch": 0.09, "grad_norm": 0.05860796398372702, "learning_rate": 9.185316455696204e-06, "loss": 2.5586, "step": 1860 }, { "epoch": 0.09, "grad_norm": 0.06007110457642946, "learning_rate": 9.184303797468355e-06, "loss": 3.7266, "step": 1862 }, { "epoch": 0.09, "grad_norm": 0.044862363101762844, "learning_rate": 9.183291139240506e-06, "loss": 3.3906, "step": 1864 }, { "epoch": 0.09, "grad_norm": 0.06256240098904445, "learning_rate": 9.182278481012658e-06, "loss": 4.0781, "step": 1866 }, { "epoch": 0.09, "grad_norm": 0.049413462774685786, "learning_rate": 9.18126582278481e-06, "loss": 2.5625, "step": 1868 }, { "epoch": 0.09, "grad_norm": 0.07872266149646259, "learning_rate": 9.180253164556962e-06, "loss": 3.0625, "step": 1870 }, { "epoch": 0.09, "grad_norm": 0.051236237205907774, "learning_rate": 9.179240506329114e-06, "loss": 2.0938, "step": 1872 }, { "epoch": 0.09, "grad_norm": 0.06067358494293436, "learning_rate": 9.178227848101266e-06, "loss": 2.6484, "step": 1874 }, { "epoch": 0.09, "grad_norm": 0.051347557367408814, "learning_rate": 9.177215189873418e-06, "loss": 2.25, "step": 1876 }, { "epoch": 0.09, "grad_norm": 0.04541107915125041, "learning_rate": 9.17620253164557e-06, "loss": 1.5117, "step": 1878 }, { "epoch": 0.09, "grad_norm": 0.061742002673103556, "learning_rate": 9.175189873417722e-06, "loss": 3.3477, "step": 1880 }, { "epoch": 0.09, "grad_norm": 0.04388859983545845, "learning_rate": 9.174177215189874e-06, "loss": 2.7969, "step": 1882 }, { "epoch": 0.09, "grad_norm": 0.05028624860068555, "learning_rate": 9.173164556962027e-06, "loss": 1.3867, "step": 1884 }, { "epoch": 0.09, "grad_norm": 0.037567079793479514, "learning_rate": 9.17215189873418e-06, "loss": 2.1758, "step": 1886 }, { "epoch": 0.09, "grad_norm": 0.07804356741746, "learning_rate": 9.17113924050633e-06, "loss": 2.3438, "step": 1888 }, { "epoch": 0.09, "grad_norm": 0.060313828023189554, "learning_rate": 9.170126582278481e-06, "loss": 3.2109, "step": 1890 }, { "epoch": 0.09, "grad_norm": 0.04613124112724252, "learning_rate": 9.169113924050633e-06, "loss": 2.7109, "step": 1892 }, { "epoch": 0.09, "grad_norm": 0.06529932058523641, "learning_rate": 9.168101265822785e-06, "loss": 4.5625, "step": 1894 }, { "epoch": 0.09, "grad_norm": 0.027167055894885384, "learning_rate": 9.167088607594937e-06, "loss": 3.2383, "step": 1896 }, { "epoch": 0.09, "grad_norm": 0.08827428262780188, "learning_rate": 9.166075949367089e-06, "loss": 2.5859, "step": 1898 }, { "epoch": 0.1, "grad_norm": 0.05137435098021163, "learning_rate": 9.165063291139241e-06, "loss": 2.7031, "step": 1900 }, { "epoch": 0.1, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.140625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.2109375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.92784118652344, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.1295, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.75, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.09, "step": 1900 }, { "epoch": 0.1, "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 23.2421875, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.729602813720703, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.7467, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.301, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.364, "step": 1900 }, { "epoch": 0.1, "eval_nq_top15HN_validation.jsonl.gz_acc1": 42.96875, "eval_nq_top15HN_validation.jsonl.gz_acc3": 88.4765625, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 66.89068603515625, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8056, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.923, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 1900 }, { "epoch": 0.1, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 36.1328125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 77.1484375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 1.0078125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 59.08802795410156, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.583, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.389, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 1900 }, { "epoch": 0.1, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 40.4296875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 83.7890625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 63.918670654296875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.7193, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.971, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 1900 }, { "epoch": 0.1, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.3359375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.03221130371094, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.6832, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.991, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.094, "step": 1900 }, { "epoch": 0.1, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.40625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.1328125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 65.09347534179688, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.8683, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.889, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 1900 }, { "epoch": 0.1, "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.8203125, "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.0859375, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.046875, "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.58374786376953, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.0313, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.992, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, "step": 1900 }, { "epoch": 0.1, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 43.359375, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 88.671875, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.81640625, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 67.02096557617188, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.1522, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.422, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.194, "step": 1900 }, { "epoch": 0.1, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 44.7265625, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 90.8203125, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 68.63248443603516, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.4707, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.555, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.118, "step": 1900 }, { "epoch": 0.1, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.9453125, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.9921875, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 69.39515686035156, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.9344, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.97, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.203, "step": 1900 }, { "epoch": 0.1, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 30.6640625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 70.5078125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.84765625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 53.77684020996094, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.5273, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.405, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.069, "step": 1900 }, { "epoch": 0.1, "grad_norm": 0.053785170928229424, "learning_rate": 9.164050632911393e-06, "loss": 3.8828, "step": 1902 }, { "epoch": 0.1, "grad_norm": 0.0461868598426467, "learning_rate": 9.163037974683545e-06, "loss": 1.375, "step": 1904 }, { "epoch": 0.1, "grad_norm": 0.05002143622006523, "learning_rate": 9.162025316455697e-06, "loss": 1.875, "step": 1906 }, { "epoch": 0.1, "grad_norm": 0.04870023429722329, "learning_rate": 9.161012658227848e-06, "loss": 3.2969, "step": 1908 }, { "epoch": 0.1, "grad_norm": 0.05839344995308881, "learning_rate": 9.16e-06, "loss": 3.9062, "step": 1910 }, { "epoch": 0.1, "grad_norm": 0.08302669871278688, "learning_rate": 9.158987341772152e-06, "loss": 1.6152, "step": 1912 }, { "epoch": 0.1, "grad_norm": 0.046646267906599995, "learning_rate": 9.157974683544306e-06, "loss": 3.2656, "step": 1914 }, { "epoch": 0.1, "grad_norm": 0.07758324358919483, "learning_rate": 9.156962025316456e-06, "loss": 2.6406, "step": 1916 }, { "epoch": 0.1, "grad_norm": 0.07478548742257349, "learning_rate": 9.155949367088608e-06, "loss": 2.5898, "step": 1918 }, { "epoch": 0.1, "grad_norm": 0.04206241097580769, "learning_rate": 9.15493670886076e-06, "loss": 2.6328, "step": 1920 }, { "epoch": 0.1, "grad_norm": 0.04248502997322808, "learning_rate": 9.153924050632912e-06, "loss": 2.7734, "step": 1922 }, { "epoch": 0.1, "grad_norm": 0.0966406030354209, "learning_rate": 9.152911392405064e-06, "loss": 3.0234, "step": 1924 }, { "epoch": 0.1, "grad_norm": 0.04103611961996913, "learning_rate": 9.151898734177216e-06, "loss": 2.2148, "step": 1926 }, { "epoch": 0.1, "grad_norm": 0.037762788617241944, "learning_rate": 9.150886075949368e-06, "loss": 4.0156, "step": 1928 }, { "epoch": 0.1, "grad_norm": 0.05577557670502644, "learning_rate": 9.14987341772152e-06, "loss": 3.9766, "step": 1930 }, { "epoch": 0.1, "grad_norm": 0.047125922095544774, "learning_rate": 9.148860759493671e-06, "loss": 1.9453, "step": 1932 }, { "epoch": 0.1, "grad_norm": 0.06720927600831134, "learning_rate": 9.147848101265823e-06, "loss": 2.1562, "step": 1934 }, { "epoch": 0.1, "grad_norm": 0.026518614274956994, "learning_rate": 9.146835443037975e-06, "loss": 3.2344, "step": 1936 }, { "epoch": 0.1, "grad_norm": 0.04774969312496838, "learning_rate": 9.145822784810127e-06, "loss": 2.8594, "step": 1938 }, { "epoch": 0.1, "grad_norm": 0.05511600074734071, "learning_rate": 9.144810126582279e-06, "loss": 4.2578, "step": 1940 }, { "epoch": 0.1, "grad_norm": 0.048985406763597, "learning_rate": 9.143797468354431e-06, "loss": 2.6406, "step": 1942 }, { "epoch": 0.1, "grad_norm": 0.05636077840457455, "learning_rate": 9.142784810126583e-06, "loss": 3.6094, "step": 1944 }, { "epoch": 0.1, "grad_norm": 0.05418663050549037, "learning_rate": 9.141772151898735e-06, "loss": 1.3633, "step": 1946 }, { "epoch": 0.1, "grad_norm": 0.04458754979305412, "learning_rate": 9.140759493670887e-06, "loss": 1.9141, "step": 1948 }, { "epoch": 0.1, "grad_norm": 0.12327256395105521, "learning_rate": 9.139746835443039e-06, "loss": 3.2422, "step": 1950 }, { "epoch": 0.1, "grad_norm": 0.050617511112204895, "learning_rate": 9.13873417721519e-06, "loss": 1.793, "step": 1952 }, { "epoch": 0.1, "grad_norm": 0.043921037433058696, "learning_rate": 9.137721518987342e-06, "loss": 2.6172, "step": 1954 }, { "epoch": 0.1, "grad_norm": 0.044706298993781236, "learning_rate": 9.136708860759494e-06, "loss": 1.5742, "step": 1956 }, { "epoch": 0.1, "grad_norm": 0.09443373497653566, "learning_rate": 9.135696202531646e-06, "loss": 2.7266, "step": 1958 }, { "epoch": 0.1, "grad_norm": 0.04091474069685536, "learning_rate": 9.134683544303798e-06, "loss": 1.6895, "step": 1960 }, { "epoch": 0.1, "grad_norm": 0.07368260844955699, "learning_rate": 9.13367088607595e-06, "loss": 2.3438, "step": 1962 }, { "epoch": 0.1, "grad_norm": 0.04875191940753315, "learning_rate": 9.132658227848102e-06, "loss": 2.0039, "step": 1964 }, { "epoch": 0.1, "grad_norm": 0.04480736068493538, "learning_rate": 9.131645569620254e-06, "loss": 1.9961, "step": 1966 }, { "epoch": 0.1, "grad_norm": 0.03828623414436692, "learning_rate": 9.130632911392406e-06, "loss": 1.9297, "step": 1968 }, { "epoch": 0.1, "grad_norm": 0.05066921180757211, "learning_rate": 9.129620253164558e-06, "loss": 1.3828, "step": 1970 }, { "epoch": 0.1, "grad_norm": 0.051897470073736116, "learning_rate": 9.12860759493671e-06, "loss": 2.8516, "step": 1972 }, { "epoch": 0.1, "grad_norm": 0.08454973915446604, "learning_rate": 9.127594936708861e-06, "loss": 2.3281, "step": 1974 }, { "epoch": 0.1, "grad_norm": 0.051129258223901825, "learning_rate": 9.126582278481013e-06, "loss": 3.4531, "step": 1976 }, { "epoch": 0.1, "grad_norm": 0.055346604302165245, "learning_rate": 9.125569620253165e-06, "loss": 2.3535, "step": 1978 }, { "epoch": 0.1, "grad_norm": 0.05440316191711523, "learning_rate": 9.124556962025317e-06, "loss": 3.2344, "step": 1980 }, { "epoch": 0.1, "grad_norm": 0.04880116433640819, "learning_rate": 9.123544303797469e-06, "loss": 1.9531, "step": 1982 }, { "epoch": 0.1, "grad_norm": 0.13712174422285836, "learning_rate": 9.122531645569621e-06, "loss": 2.0703, "step": 1984 }, { "epoch": 0.1, "grad_norm": 0.05736294109541064, "learning_rate": 9.121518987341773e-06, "loss": 2.5859, "step": 1986 }, { "epoch": 0.1, "grad_norm": 0.05249456121664606, "learning_rate": 9.120506329113925e-06, "loss": 2.582, "step": 1988 }, { "epoch": 0.1, "grad_norm": 0.05620227956798809, "learning_rate": 9.119493670886077e-06, "loss": 3.5391, "step": 1990 }, { "epoch": 0.1, "grad_norm": 0.02457150950267937, "learning_rate": 9.118481012658229e-06, "loss": 3.2578, "step": 1992 }, { "epoch": 0.1, "grad_norm": 0.04719030609534645, "learning_rate": 9.11746835443038e-06, "loss": 1.9531, "step": 1994 }, { "epoch": 0.1, "grad_norm": 0.1192003241257573, "learning_rate": 9.116455696202532e-06, "loss": 3.5938, "step": 1996 }, { "epoch": 0.1, "grad_norm": 0.11820927850372065, "learning_rate": 9.115443037974684e-06, "loss": 3.5, "step": 1998 }, { "epoch": 0.1, "grad_norm": 0.04342049766791241, "learning_rate": 9.114430379746836e-06, "loss": 1.9102, "step": 2000 }, { "epoch": 0.1, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.3359375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.796875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.31722259521484, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.1798, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.725, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 2000 }, { "epoch": 0.1, "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.0078125, "eval_specter_top15HN_validation.jsonl.gz_acc3": 19.140625, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.484375, "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.005353927612305, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.56, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 25.0, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.391, "step": 2000 }, { "epoch": 0.1, "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.8515625, "eval_nq_top15HN_validation.jsonl.gz_acc3": 96.484375, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.85546875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 72.37664794921875, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.9675, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.835, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 2000 }, { "epoch": 0.1, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 38.671875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 80.859375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.99609375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 62.15817642211914, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.2599, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.488, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.07, "step": 2000 }, { "epoch": 0.1, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.6015625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.7421875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 64.71186828613281, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.928, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.856, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 2000 }, { "epoch": 0.1, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.4609375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.5078125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.7751693725586, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.902, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.87, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 2000 }, { "epoch": 0.1, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 42.96875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 88.0859375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 66.7734146118164, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.75, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.953, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 2000 }, { "epoch": 0.1, "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.4296875, "eval_fever_top15HN_validation.jsonl.gz_acc3": 86.9140625, "eval_fever_top15HN_validation.jsonl.gz_loss": 1.9296875, "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.1298828125, "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.9037, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.024, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, "step": 2000 }, { "epoch": 0.1, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.890625, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 80.859375, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 60.76201629638672, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.1301, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.475, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.195, "step": 2000 }, { "epoch": 0.1, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 44.921875, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 91.6015625, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0859375, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 69.22639465332031, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 9.6422, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.638, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.104, "step": 2000 }, { "epoch": 0.1, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.359375, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.015625, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.24501037597656, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.1615, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.4, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.194, "step": 2000 }, { "epoch": 0.1, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 33.0078125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 74.0234375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.8515625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 56.28733444213867, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3188, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.47, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 2000 }, { "epoch": 0.1, "grad_norm": 0.05222602039717748, "learning_rate": 9.113417721518988e-06, "loss": 3.9219, "step": 2002 }, { "epoch": 0.1, "grad_norm": 0.08077667713595828, "learning_rate": 9.11240506329114e-06, "loss": 3.5, "step": 2004 }, { "epoch": 0.1, "grad_norm": 0.0742884923114933, "learning_rate": 9.111392405063292e-06, "loss": 3.1172, "step": 2006 }, { "epoch": 0.1, "grad_norm": 0.11188361384486585, "learning_rate": 9.110379746835444e-06, "loss": 1.6562, "step": 2008 }, { "epoch": 0.1, "grad_norm": 0.06437902091949863, "learning_rate": 9.109367088607596e-06, "loss": 1.2559, "step": 2010 }, { "epoch": 0.1, "grad_norm": 0.06530103204629609, "learning_rate": 9.108354430379748e-06, "loss": 3.7578, "step": 2012 }, { "epoch": 0.1, "grad_norm": 0.060746099738566636, "learning_rate": 9.1073417721519e-06, "loss": 2.1758, "step": 2014 }, { "epoch": 0.1, "grad_norm": 0.08138424450084206, "learning_rate": 9.106329113924052e-06, "loss": 2.5703, "step": 2016 }, { "epoch": 0.1, "grad_norm": 0.056468531591402575, "learning_rate": 9.105316455696203e-06, "loss": 2.5781, "step": 2018 }, { "epoch": 0.1, "grad_norm": 0.042063520801115216, "learning_rate": 9.104303797468355e-06, "loss": 3.3672, "step": 2020 }, { "epoch": 0.1, "grad_norm": 0.043034170481913506, "learning_rate": 9.103291139240507e-06, "loss": 3.8203, "step": 2022 }, { "epoch": 0.1, "grad_norm": 0.05769275098821008, "learning_rate": 9.10227848101266e-06, "loss": 3.4609, "step": 2024 }, { "epoch": 0.1, "grad_norm": 0.04417989427229762, "learning_rate": 9.101265822784811e-06, "loss": 2.8594, "step": 2026 }, { "epoch": 0.1, "grad_norm": 0.07052014737004543, "learning_rate": 9.100253164556963e-06, "loss": 2.1562, "step": 2028 }, { "epoch": 0.1, "grad_norm": 0.09688645225943193, "learning_rate": 9.099240506329115e-06, "loss": 4.0234, "step": 2030 }, { "epoch": 0.1, "grad_norm": 0.04689199169468624, "learning_rate": 9.098227848101267e-06, "loss": 2.6406, "step": 2032 }, { "epoch": 0.1, "grad_norm": 0.04508824741386778, "learning_rate": 9.097215189873419e-06, "loss": 3.1484, "step": 2034 }, { "epoch": 0.1, "grad_norm": 0.03670749802610796, "learning_rate": 9.09620253164557e-06, "loss": 3.5547, "step": 2036 }, { "epoch": 0.1, "grad_norm": 0.0516138977486577, "learning_rate": 9.095189873417723e-06, "loss": 1.543, "step": 2038 }, { "epoch": 0.1, "grad_norm": 0.0538854278902059, "learning_rate": 9.094177215189874e-06, "loss": 2.1797, "step": 2040 }, { "epoch": 0.1, "grad_norm": 0.08355958376302385, "learning_rate": 9.093164556962026e-06, "loss": 3.7031, "step": 2042 }, { "epoch": 0.1, "grad_norm": 0.07888511195179149, "learning_rate": 9.092151898734178e-06, "loss": 3.1562, "step": 2044 }, { "epoch": 0.1, "grad_norm": 0.044843880698050134, "learning_rate": 9.09113924050633e-06, "loss": 1.9375, "step": 2046 }, { "epoch": 0.1, "grad_norm": 0.0844975914287397, "learning_rate": 9.09012658227848e-06, "loss": 3.4961, "step": 2048 }, { "epoch": 0.1, "grad_norm": 0.0488064193164904, "learning_rate": 9.089113924050634e-06, "loss": 2.0195, "step": 2050 }, { "epoch": 0.1, "grad_norm": 0.08197222584340348, "learning_rate": 9.088101265822786e-06, "loss": 2.3711, "step": 2052 }, { "epoch": 0.1, "grad_norm": 0.04561496764349859, "learning_rate": 9.087088607594938e-06, "loss": 2.6445, "step": 2054 }, { "epoch": 0.1, "grad_norm": 0.044886212083239126, "learning_rate": 9.08607594936709e-06, "loss": 2.9141, "step": 2056 }, { "epoch": 0.1, "grad_norm": 0.050742476130656494, "learning_rate": 9.085063291139242e-06, "loss": 2.9531, "step": 2058 }, { "epoch": 0.1, "grad_norm": 0.05563293598990602, "learning_rate": 9.084050632911394e-06, "loss": 3.5703, "step": 2060 }, { "epoch": 0.1, "grad_norm": 0.051513608597992276, "learning_rate": 9.083037974683545e-06, "loss": 1.0918, "step": 2062 }, { "epoch": 0.1, "grad_norm": 0.039394328240253074, "learning_rate": 9.082025316455697e-06, "loss": 2.9141, "step": 2064 }, { "epoch": 0.1, "grad_norm": 0.04942604191134841, "learning_rate": 9.08101265822785e-06, "loss": 2.3398, "step": 2066 }, { "epoch": 0.1, "grad_norm": 0.053334874655355094, "learning_rate": 9.080000000000001e-06, "loss": 3.9453, "step": 2068 }, { "epoch": 0.1, "grad_norm": 0.067724388386229, "learning_rate": 9.078987341772153e-06, "loss": 1.1211, "step": 2070 }, { "epoch": 0.1, "grad_norm": 0.046290634426354706, "learning_rate": 9.077974683544305e-06, "loss": 1.3906, "step": 2072 }, { "epoch": 0.1, "grad_norm": 0.07936121862274997, "learning_rate": 9.076962025316457e-06, "loss": 1.373, "step": 2074 }, { "epoch": 0.1, "grad_norm": 0.049220064217566195, "learning_rate": 9.075949367088607e-06, "loss": 4.2734, "step": 2076 }, { "epoch": 0.1, "grad_norm": 0.04754414632707146, "learning_rate": 9.074936708860759e-06, "loss": 2.4922, "step": 2078 }, { "epoch": 0.1, "grad_norm": 0.04787318801734571, "learning_rate": 9.073924050632913e-06, "loss": 1.3906, "step": 2080 }, { "epoch": 0.1, "grad_norm": 0.1139301835727937, "learning_rate": 9.072911392405065e-06, "loss": 2.3945, "step": 2082 }, { "epoch": 0.1, "grad_norm": 0.04584280660291769, "learning_rate": 9.071898734177216e-06, "loss": 1.8242, "step": 2084 }, { "epoch": 0.1, "grad_norm": 0.10964906789491767, "learning_rate": 9.070886075949368e-06, "loss": 1.5605, "step": 2086 }, { "epoch": 0.1, "grad_norm": 0.04726672407716711, "learning_rate": 9.06987341772152e-06, "loss": 4.1562, "step": 2088 }, { "epoch": 0.1, "grad_norm": 0.042038330796539077, "learning_rate": 9.068860759493672e-06, "loss": 3.1953, "step": 2090 }, { "epoch": 0.1, "grad_norm": 0.05953614239851778, "learning_rate": 9.067848101265824e-06, "loss": 3.1562, "step": 2092 }, { "epoch": 0.1, "grad_norm": 0.04562048501461819, "learning_rate": 9.066835443037976e-06, "loss": 2.2109, "step": 2094 }, { "epoch": 0.1, "grad_norm": 0.07193685505579554, "learning_rate": 9.065822784810128e-06, "loss": 4.0078, "step": 2096 }, { "epoch": 0.1, "grad_norm": 0.07965518457460447, "learning_rate": 9.06481012658228e-06, "loss": 2.0742, "step": 2098 }, { "epoch": 0.1, "grad_norm": 0.048523563405114496, "learning_rate": 9.063797468354432e-06, "loss": 3.9141, "step": 2100 }, { "epoch": 0.1, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.53125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.40625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.8046875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.81912994384766, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3194, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.654, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, "step": 2100 }, { "epoch": 0.1, "eval_specter_top15HN_validation.jsonl.gz_acc1": 11.328125, "eval_specter_top15HN_validation.jsonl.gz_acc3": 26.171875, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4453125, "eval_specter_top15HN_validation.jsonl.gz_mrr": 23.504255294799805, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.5172, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 25.425, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.397, "step": 2100 }, { "epoch": 0.1, "eval_nq_top15HN_validation.jsonl.gz_acc1": 44.921875, "eval_nq_top15HN_validation.jsonl.gz_acc3": 91.796875, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, "eval_nq_top15HN_validation.jsonl.gz_mrr": 68.86204528808594, "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.0751, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.779, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.09, "step": 2100 }, { "epoch": 0.1, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 37.6953125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 78.90625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.99609375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 59.85957717895508, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.2226, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.5, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.07, "step": 2100 }, { "epoch": 0.1, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 40.4296875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 83.3984375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0390625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 63.35692596435547, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8162, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.917, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 2100 }, { "epoch": 0.1, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 47.0703125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.43053436279297, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.7984, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.927, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 2100 }, { "epoch": 0.1, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 42.7734375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 88.671875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 66.77860260009766, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 9.7879, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.539, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.102, "step": 2100 }, { "epoch": 0.1, "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.0625, "eval_fever_top15HN_validation.jsonl.gz_loss": 1.9140625, "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.59601593017578, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.3736, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.909, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, "step": 2100 }, { "epoch": 0.1, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 36.71875, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 78.125, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.91558074951172, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.7232, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.183, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.175, "step": 2100 }, { "epoch": 0.1, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 44.53125, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 90.8203125, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0859375, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 68.49028015136719, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 6.6111, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 9.681, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.151, "step": 2100 }, { "epoch": 0.1, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.359375, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.015625, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.39799499511719, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0332, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.716, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.199, "step": 2100 }, { "epoch": 0.1, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 28.90625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 67.578125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.8515625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 51.7280387878418, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.1246, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.531, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.071, "step": 2100 }, { "epoch": 0.11, "grad_norm": 0.07387735249473802, "learning_rate": 9.062784810126582e-06, "loss": 2.5391, "step": 2102 }, { "epoch": 0.11, "grad_norm": 0.0458105010880384, "learning_rate": 9.061772151898734e-06, "loss": 2.8047, "step": 2104 }, { "epoch": 0.11, "grad_norm": 0.12684186314779236, "learning_rate": 9.060759493670886e-06, "loss": 1.7695, "step": 2106 }, { "epoch": 0.11, "grad_norm": 0.08325164548755534, "learning_rate": 9.059746835443038e-06, "loss": 3.1602, "step": 2108 }, { "epoch": 0.11, "grad_norm": 0.04912619687489667, "learning_rate": 9.058734177215191e-06, "loss": 3.5547, "step": 2110 }, { "epoch": 0.11, "grad_norm": 0.039371876159854616, "learning_rate": 9.057721518987343e-06, "loss": 3.1875, "step": 2112 }, { "epoch": 0.11, "grad_norm": 0.055597342693624215, "learning_rate": 9.056708860759495e-06, "loss": 1.7031, "step": 2114 }, { "epoch": 0.11, "grad_norm": 0.07703184419677474, "learning_rate": 9.055696202531647e-06, "loss": 4.5859, "step": 2116 }, { "epoch": 0.11, "grad_norm": 0.05945928380987463, "learning_rate": 9.054683544303799e-06, "loss": 3.2578, "step": 2118 }, { "epoch": 0.11, "grad_norm": 0.06653723000877092, "learning_rate": 9.05367088607595e-06, "loss": 3.5391, "step": 2120 }, { "epoch": 0.11, "grad_norm": 0.0570261593728403, "learning_rate": 9.052658227848103e-06, "loss": 2.6641, "step": 2122 }, { "epoch": 0.11, "grad_norm": 0.05441555245366215, "learning_rate": 9.051645569620255e-06, "loss": 2.793, "step": 2124 }, { "epoch": 0.11, "grad_norm": 0.05697224198374822, "learning_rate": 9.050632911392407e-06, "loss": 2.6133, "step": 2126 }, { "epoch": 0.11, "grad_norm": 0.04949291905010763, "learning_rate": 9.049620253164557e-06, "loss": 1.9219, "step": 2128 }, { "epoch": 0.11, "grad_norm": 0.05592274580805365, "learning_rate": 9.048607594936709e-06, "loss": 2.6289, "step": 2130 }, { "epoch": 0.11, "grad_norm": 0.0455284168193864, "learning_rate": 9.04759493670886e-06, "loss": 3.0234, "step": 2132 }, { "epoch": 0.11, "grad_norm": 0.05619766438175372, "learning_rate": 9.046582278481012e-06, "loss": 3.0703, "step": 2134 }, { "epoch": 0.11, "grad_norm": 0.06472864649519333, "learning_rate": 9.045569620253164e-06, "loss": 1.7246, "step": 2136 }, { "epoch": 0.11, "grad_norm": 0.04752637126015769, "learning_rate": 9.044556962025316e-06, "loss": 2.3828, "step": 2138 }, { "epoch": 0.11, "grad_norm": 0.04792697472111959, "learning_rate": 9.04354430379747e-06, "loss": 1.7344, "step": 2140 }, { "epoch": 0.11, "grad_norm": 0.04742442196081877, "learning_rate": 9.042531645569622e-06, "loss": 2.875, "step": 2142 }, { "epoch": 0.11, "grad_norm": 0.05231304647118661, "learning_rate": 9.041518987341774e-06, "loss": 2.5781, "step": 2144 }, { "epoch": 0.11, "grad_norm": 0.04065182251997917, "learning_rate": 9.040506329113926e-06, "loss": 3.125, "step": 2146 }, { "epoch": 0.11, "grad_norm": 0.05189222525590392, "learning_rate": 9.039493670886077e-06, "loss": 2.4062, "step": 2148 }, { "epoch": 0.11, "grad_norm": 0.04470197614035536, "learning_rate": 9.03848101265823e-06, "loss": 1.3398, "step": 2150 }, { "epoch": 0.11, "grad_norm": 0.04976772591738967, "learning_rate": 9.037468354430381e-06, "loss": 2.5781, "step": 2152 }, { "epoch": 0.11, "grad_norm": 0.07822486038531574, "learning_rate": 9.036455696202533e-06, "loss": 2.1875, "step": 2154 }, { "epoch": 0.11, "grad_norm": 0.04786407540653388, "learning_rate": 9.035443037974683e-06, "loss": 3.125, "step": 2156 }, { "epoch": 0.11, "grad_norm": 0.043433457581407584, "learning_rate": 9.034430379746835e-06, "loss": 1.75, "step": 2158 }, { "epoch": 0.11, "grad_norm": 0.057060320757604435, "learning_rate": 9.033417721518987e-06, "loss": 2.5781, "step": 2160 }, { "epoch": 0.11, "grad_norm": 0.043933465473367316, "learning_rate": 9.03240506329114e-06, "loss": 3.4766, "step": 2162 }, { "epoch": 0.11, "grad_norm": 0.04949802297665267, "learning_rate": 9.031392405063291e-06, "loss": 2.7891, "step": 2164 }, { "epoch": 0.11, "grad_norm": 0.04954968413272497, "learning_rate": 9.030379746835443e-06, "loss": 2.1211, "step": 2166 }, { "epoch": 0.11, "grad_norm": 0.05060258024534153, "learning_rate": 9.029367088607595e-06, "loss": 3.1641, "step": 2168 }, { "epoch": 0.11, "grad_norm": 0.04724895305426068, "learning_rate": 9.028354430379748e-06, "loss": 3.4766, "step": 2170 }, { "epoch": 0.11, "grad_norm": 0.03995820056141506, "learning_rate": 9.0273417721519e-06, "loss": 2.6797, "step": 2172 }, { "epoch": 0.11, "grad_norm": 0.05166613358528966, "learning_rate": 9.026329113924052e-06, "loss": 1.9453, "step": 2174 }, { "epoch": 0.11, "grad_norm": 0.07654649362821431, "learning_rate": 9.025316455696204e-06, "loss": 0.8867, "step": 2176 }, { "epoch": 0.11, "grad_norm": 0.048252976519986994, "learning_rate": 9.024303797468356e-06, "loss": 2.6445, "step": 2178 }, { "epoch": 0.11, "grad_norm": 0.08881036283074599, "learning_rate": 9.023291139240508e-06, "loss": 3.1016, "step": 2180 }, { "epoch": 0.11, "grad_norm": 0.04284117329093428, "learning_rate": 9.022278481012658e-06, "loss": 2.3125, "step": 2182 }, { "epoch": 0.11, "grad_norm": 0.04474664022993633, "learning_rate": 9.02126582278481e-06, "loss": 2.8828, "step": 2184 }, { "epoch": 0.11, "grad_norm": 0.03174705200783846, "learning_rate": 9.020253164556962e-06, "loss": 4.5469, "step": 2186 }, { "epoch": 0.11, "grad_norm": 0.04487331242944554, "learning_rate": 9.019240506329114e-06, "loss": 3.6172, "step": 2188 }, { "epoch": 0.11, "grad_norm": 0.06609003075152096, "learning_rate": 9.018227848101266e-06, "loss": 2.5938, "step": 2190 }, { "epoch": 0.11, "grad_norm": 0.07497844137842792, "learning_rate": 9.017215189873418e-06, "loss": 3.9453, "step": 2192 }, { "epoch": 0.11, "grad_norm": 0.06233797267984074, "learning_rate": 9.01620253164557e-06, "loss": 2.3945, "step": 2194 }, { "epoch": 0.11, "grad_norm": 0.05036065260023731, "learning_rate": 9.015189873417722e-06, "loss": 2.9766, "step": 2196 }, { "epoch": 0.11, "grad_norm": 0.0546727501187819, "learning_rate": 9.014177215189874e-06, "loss": 3.8047, "step": 2198 }, { "epoch": 0.11, "grad_norm": 0.08060857074075317, "learning_rate": 9.013164556962027e-06, "loss": 1.6562, "step": 2200 }, { "epoch": 0.11, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 45.5078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.1640625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.55633544921875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3241, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.652, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, "step": 2200 }, { "epoch": 0.11, "eval_specter_top15HN_validation.jsonl.gz_acc1": 8.7890625, "eval_specter_top15HN_validation.jsonl.gz_acc3": 20.3125, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, "eval_specter_top15HN_validation.jsonl.gz_mrr": 20.747766494750977, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.93, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 21.843, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.341, "step": 2200 }, { "epoch": 0.11, "eval_nq_top15HN_validation.jsonl.gz_acc1": 45.8984375, "eval_nq_top15HN_validation.jsonl.gz_acc3": 95.1171875, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, "eval_nq_top15HN_validation.jsonl.gz_mrr": 70.41649627685547, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8324, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.908, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 2200 }, { "epoch": 0.11, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.0390625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 83.203125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9921875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.23518371582031, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.249, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.492, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.07, "step": 2200 }, { "epoch": 0.11, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 40.625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 83.59375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0390625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 63.68541717529297, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.2193, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.704, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 2200 }, { "epoch": 0.11, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.6796875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.53125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.95918273925781, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.7864, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.933, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 2200 }, { "epoch": 0.11, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 42.1875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.71875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 65.048095703125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 9.8185, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.518, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.102, "step": 2200 }, { "epoch": 0.11, "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.2578125, "eval_fever_top15HN_validation.jsonl.gz_loss": 1.9765625, "eval_fever_top15HN_validation.jsonl.gz_mrr": 67.1825942993164, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.0967, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.976, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, "step": 2200 }, { "epoch": 0.11, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.109375, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 78.7109375, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 60.018646240234375, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.285, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.11, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.189, "step": 2200 }, { "epoch": 0.11, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 46.484375, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 93.75, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0703125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 70.67472076416016, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 9.6155, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.656, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.104, "step": 2200 }, { "epoch": 0.11, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 41.796875, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 89.453125, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.64289855957031, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.3255, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.018, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.188, "step": 2200 }, { "epoch": 0.11, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.4921875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 67.1875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.8671875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 51.89695358276367, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3591, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.457, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 2200 }, { "epoch": 0.11, "grad_norm": 0.040314831021747495, "learning_rate": 9.012151898734179e-06, "loss": 2.1406, "step": 2202 }, { "epoch": 0.11, "grad_norm": 0.047171810341216105, "learning_rate": 9.011139240506331e-06, "loss": 1.418, "step": 2204 }, { "epoch": 0.11, "grad_norm": 0.048262845473527605, "learning_rate": 9.010126582278483e-06, "loss": 1.4023, "step": 2206 }, { "epoch": 0.11, "grad_norm": 0.05154581332768385, "learning_rate": 9.009113924050633e-06, "loss": 1.3281, "step": 2208 }, { "epoch": 0.11, "grad_norm": 0.04717605493865237, "learning_rate": 9.008101265822785e-06, "loss": 3.1172, "step": 2210 }, { "epoch": 0.11, "grad_norm": 0.0539025659615322, "learning_rate": 9.007088607594937e-06, "loss": 3.0234, "step": 2212 }, { "epoch": 0.11, "grad_norm": 0.04953877670880636, "learning_rate": 9.006075949367089e-06, "loss": 3.3828, "step": 2214 }, { "epoch": 0.11, "grad_norm": 0.04434792675801702, "learning_rate": 9.00506329113924e-06, "loss": 2.4766, "step": 2216 }, { "epoch": 0.11, "grad_norm": 0.04454442496488195, "learning_rate": 9.004050632911393e-06, "loss": 1.3398, "step": 2218 }, { "epoch": 0.11, "grad_norm": 0.06255015953249068, "learning_rate": 9.003037974683545e-06, "loss": 2.0781, "step": 2220 }, { "epoch": 0.11, "grad_norm": 0.05884765805099793, "learning_rate": 9.002025316455696e-06, "loss": 2.5977, "step": 2222 }, { "epoch": 0.11, "grad_norm": 0.04479049825022292, "learning_rate": 9.001012658227848e-06, "loss": 2.8984, "step": 2224 }, { "epoch": 0.11, "grad_norm": 0.07160542458432002, "learning_rate": 9e-06, "loss": 3.0781, "step": 2226 }, { "epoch": 0.11, "grad_norm": 0.06552493223138955, "learning_rate": 8.998987341772152e-06, "loss": 2.9062, "step": 2228 }, { "epoch": 0.11, "grad_norm": 0.05722711956677415, "learning_rate": 8.997974683544306e-06, "loss": 3.8516, "step": 2230 }, { "epoch": 0.11, "grad_norm": 0.06535531913348491, "learning_rate": 8.996962025316458e-06, "loss": 2.4883, "step": 2232 }, { "epoch": 0.11, "grad_norm": 0.07461287917151682, "learning_rate": 8.995949367088608e-06, "loss": 2.1797, "step": 2234 }, { "epoch": 0.11, "grad_norm": 0.1164959969597713, "learning_rate": 8.99493670886076e-06, "loss": 2.2891, "step": 2236 }, { "epoch": 0.11, "grad_norm": 0.045805685191550656, "learning_rate": 8.993924050632912e-06, "loss": 1.2891, "step": 2238 }, { "epoch": 0.11, "grad_norm": 0.12476488442819236, "learning_rate": 8.992911392405064e-06, "loss": 2.5742, "step": 2240 }, { "epoch": 0.11, "grad_norm": 0.055790341758965224, "learning_rate": 8.991898734177215e-06, "loss": 3.75, "step": 2242 }, { "epoch": 0.11, "grad_norm": 0.10690788499980759, "learning_rate": 8.990886075949367e-06, "loss": 2.041, "step": 2244 }, { "epoch": 0.11, "grad_norm": 0.0468540914912707, "learning_rate": 8.98987341772152e-06, "loss": 2.3203, "step": 2246 }, { "epoch": 0.11, "grad_norm": 0.0431115651648458, "learning_rate": 8.988860759493671e-06, "loss": 3.4219, "step": 2248 }, { "epoch": 0.11, "grad_norm": 0.045633132639230405, "learning_rate": 8.987848101265823e-06, "loss": 2.5391, "step": 2250 }, { "epoch": 0.11, "grad_norm": 0.04884449922669781, "learning_rate": 8.986835443037975e-06, "loss": 1.8633, "step": 2252 }, { "epoch": 0.11, "grad_norm": 0.05455381581326918, "learning_rate": 8.985822784810127e-06, "loss": 2.1719, "step": 2254 }, { "epoch": 0.11, "grad_norm": 0.05550927886059573, "learning_rate": 8.984810126582279e-06, "loss": 3.0625, "step": 2256 }, { "epoch": 0.11, "grad_norm": 0.07083931236183479, "learning_rate": 8.98379746835443e-06, "loss": 2.4297, "step": 2258 }, { "epoch": 0.11, "grad_norm": 0.0446496468728146, "learning_rate": 8.982784810126584e-06, "loss": 3.6328, "step": 2260 }, { "epoch": 0.11, "grad_norm": 0.06287031438083145, "learning_rate": 8.981772151898735e-06, "loss": 2.8906, "step": 2262 }, { "epoch": 0.11, "grad_norm": 0.052572239536472096, "learning_rate": 8.980759493670886e-06, "loss": 4.25, "step": 2264 }, { "epoch": 0.11, "grad_norm": 0.053437322827513935, "learning_rate": 8.979746835443038e-06, "loss": 2.832, "step": 2266 }, { "epoch": 0.11, "grad_norm": 0.060520529783522306, "learning_rate": 8.97873417721519e-06, "loss": 3.6172, "step": 2268 }, { "epoch": 0.11, "grad_norm": 0.04523560344019551, "learning_rate": 8.977721518987342e-06, "loss": 1.3125, "step": 2270 }, { "epoch": 0.11, "grad_norm": 0.05590894648628289, "learning_rate": 8.976708860759494e-06, "loss": 2.6523, "step": 2272 }, { "epoch": 0.11, "grad_norm": 0.08261328512133413, "learning_rate": 8.975696202531646e-06, "loss": 3.0156, "step": 2274 }, { "epoch": 0.11, "grad_norm": 0.07630333499348366, "learning_rate": 8.974683544303798e-06, "loss": 2.4922, "step": 2276 }, { "epoch": 0.11, "grad_norm": 0.07586013753293634, "learning_rate": 8.97367088607595e-06, "loss": 1.3711, "step": 2278 }, { "epoch": 0.11, "grad_norm": 0.041416353553989003, "learning_rate": 8.972658227848102e-06, "loss": 2.6406, "step": 2280 }, { "epoch": 0.11, "grad_norm": 0.05881140734182516, "learning_rate": 8.971645569620254e-06, "loss": 2.625, "step": 2282 }, { "epoch": 0.11, "grad_norm": 0.05417846806548981, "learning_rate": 8.970632911392406e-06, "loss": 3.5859, "step": 2284 }, { "epoch": 0.11, "grad_norm": 0.05058786589212933, "learning_rate": 8.969620253164557e-06, "loss": 2.3828, "step": 2286 }, { "epoch": 0.11, "grad_norm": 0.04866282377765518, "learning_rate": 8.96860759493671e-06, "loss": 2.6211, "step": 2288 }, { "epoch": 0.11, "grad_norm": 0.044517872080821724, "learning_rate": 8.967594936708861e-06, "loss": 2.8359, "step": 2290 }, { "epoch": 0.11, "grad_norm": 0.04184764002943263, "learning_rate": 8.966582278481013e-06, "loss": 2.4922, "step": 2292 }, { "epoch": 0.11, "grad_norm": 0.04532587269240339, "learning_rate": 8.965569620253165e-06, "loss": 2.2891, "step": 2294 }, { "epoch": 0.11, "grad_norm": 0.04478519315755515, "learning_rate": 8.964556962025317e-06, "loss": 1.207, "step": 2296 }, { "epoch": 0.11, "grad_norm": 0.05864167836092175, "learning_rate": 8.963544303797469e-06, "loss": 3.8438, "step": 2298 }, { "epoch": 0.12, "grad_norm": 0.04066455404468236, "learning_rate": 8.96253164556962e-06, "loss": 3.1484, "step": 2300 }, { "epoch": 0.12, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.921875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 92.7734375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.36105346679688, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.4734, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.578, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, "step": 2300 }, { "epoch": 0.12, "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.1796875, "eval_specter_top15HN_validation.jsonl.gz_acc3": 22.65625, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.005577087402344, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6634, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.029, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.375, "step": 2300 }, { "epoch": 0.12, "eval_nq_top15HN_validation.jsonl.gz_acc1": 45.5078125, "eval_nq_top15HN_validation.jsonl.gz_acc3": 92.578125, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.87109375, "eval_nq_top15HN_validation.jsonl.gz_mrr": 69.31172943115234, "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.0054, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.815, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 2300 }, { "epoch": 0.12, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 41.2109375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.5703125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9765625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 64.35186767578125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.3948, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.446, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 2300 }, { "epoch": 0.12, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.9921875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.9375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.2877197265625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.626, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 6.023, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.094, "step": 2300 }, { "epoch": 0.12, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.6796875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.53125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.0522689819336, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.8329, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.908, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 2300 }, { "epoch": 0.12, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.9921875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 86.5234375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 65.30658721923828, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.9321, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.854, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 2300 }, { "epoch": 0.12, "eval_fever_top15HN_validation.jsonl.gz_acc1": 40.8203125, "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.0859375, "eval_fever_top15HN_validation.jsonl.gz_loss": 1.984375, "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.79490661621094, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.2273, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.944, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, "step": 2300 }, { "epoch": 0.12, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.3046875, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 79.4921875, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83984375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.55287170410156, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.0029, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.793, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.2, "step": 2300 }, { "epoch": 0.12, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.5078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 92.578125, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 69.79608154296875, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 9.4517, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 6.771, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.106, "step": 2300 }, { "epoch": 0.12, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.1640625, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 90.234375, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.7109375, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.19181060791016, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.4139, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 11.821, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.185, "step": 2300 }, { "epoch": 0.12, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.8359375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 71.09375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 54.697837829589844, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3425, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.462, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 2300 }, { "epoch": 0.12, "grad_norm": 0.043993231026225224, "learning_rate": 8.961518987341773e-06, "loss": 2.5391, "step": 2302 }, { "epoch": 0.12, "grad_norm": 0.07591932256289878, "learning_rate": 8.960506329113925e-06, "loss": 2.6289, "step": 2304 }, { "epoch": 0.12, "grad_norm": 0.04323148596719756, "learning_rate": 8.959493670886077e-06, "loss": 2.6133, "step": 2306 }, { "epoch": 0.12, "grad_norm": 0.05536397134631646, "learning_rate": 8.958481012658228e-06, "loss": 2.9922, "step": 2308 }, { "epoch": 0.12, "grad_norm": 0.062026491433053485, "learning_rate": 8.95746835443038e-06, "loss": 4.6875, "step": 2310 }, { "epoch": 0.12, "grad_norm": 0.040402043492967124, "learning_rate": 8.956455696202532e-06, "loss": 3.0938, "step": 2312 }, { "epoch": 0.12, "grad_norm": 0.06112096476692111, "learning_rate": 8.955443037974684e-06, "loss": 2.9453, "step": 2314 }, { "epoch": 0.12, "grad_norm": 0.0541853801140486, "learning_rate": 8.954430379746836e-06, "loss": 3.5781, "step": 2316 }, { "epoch": 0.12, "grad_norm": 0.03930248511010282, "learning_rate": 8.953417721518988e-06, "loss": 3.4375, "step": 2318 }, { "epoch": 0.12, "grad_norm": 0.0992215450899402, "learning_rate": 8.95240506329114e-06, "loss": 3.2734, "step": 2320 }, { "epoch": 0.12, "grad_norm": 0.04215743712338384, "learning_rate": 8.951392405063292e-06, "loss": 2.1719, "step": 2322 }, { "epoch": 0.12, "grad_norm": 0.07320172463749858, "learning_rate": 8.950379746835444e-06, "loss": 2.3633, "step": 2324 }, { "epoch": 0.12, "grad_norm": 0.04884678722418488, "learning_rate": 8.949367088607596e-06, "loss": 2.2852, "step": 2326 }, { "epoch": 0.12, "grad_norm": 0.05442499270066741, "learning_rate": 8.948354430379748e-06, "loss": 1.0449, "step": 2328 }, { "epoch": 0.12, "grad_norm": 0.04890507356916708, "learning_rate": 8.9473417721519e-06, "loss": 1.9414, "step": 2330 }, { "epoch": 0.12, "grad_norm": 0.05921324433195504, "learning_rate": 8.946329113924051e-06, "loss": 3.4922, "step": 2332 }, { "epoch": 0.12, "grad_norm": 0.05700700134972175, "learning_rate": 8.945316455696203e-06, "loss": 4.2266, "step": 2334 }, { "epoch": 0.12, "grad_norm": 0.04541638034291379, "learning_rate": 8.944303797468355e-06, "loss": 1.3086, "step": 2336 }, { "epoch": 0.12, "grad_norm": 0.07271819550368797, "learning_rate": 8.943291139240507e-06, "loss": 1.9688, "step": 2338 }, { "epoch": 0.12, "grad_norm": 0.043434576598254726, "learning_rate": 8.942278481012659e-06, "loss": 2.5234, "step": 2340 }, { "epoch": 0.12, "grad_norm": 0.08763628291954113, "learning_rate": 8.941265822784811e-06, "loss": 3.1094, "step": 2342 }, { "epoch": 0.12, "grad_norm": 0.04318677253013843, "learning_rate": 8.940253164556963e-06, "loss": 1.8672, "step": 2344 }, { "epoch": 0.12, "grad_norm": 0.04196947059950058, "learning_rate": 8.939240506329115e-06, "loss": 2.5781, "step": 2346 }, { "epoch": 0.12, "grad_norm": 0.09224427543881804, "learning_rate": 8.938227848101267e-06, "loss": 2.3379, "step": 2348 }, { "epoch": 0.12, "grad_norm": 0.04235448599344302, "learning_rate": 8.937215189873419e-06, "loss": 3.4609, "step": 2350 }, { "epoch": 0.12, "grad_norm": 0.046992432997166135, "learning_rate": 8.93620253164557e-06, "loss": 1.8672, "step": 2352 }, { "epoch": 0.12, "grad_norm": 0.08273873473919732, "learning_rate": 8.935189873417722e-06, "loss": 2.4688, "step": 2354 }, { "epoch": 0.12, "grad_norm": 0.025447840598404273, "learning_rate": 8.934177215189874e-06, "loss": 4.2188, "step": 2356 }, { "epoch": 0.12, "grad_norm": 0.08535506101635452, "learning_rate": 8.933164556962026e-06, "loss": 1.6816, "step": 2358 }, { "epoch": 0.12, "grad_norm": 0.08728930553305825, "learning_rate": 8.932151898734178e-06, "loss": 1.1211, "step": 2360 }, { "epoch": 0.12, "grad_norm": 0.04555766185257788, "learning_rate": 8.93113924050633e-06, "loss": 3.1328, "step": 2362 }, { "epoch": 0.12, "grad_norm": 0.04353537890607082, "learning_rate": 8.930126582278482e-06, "loss": 2.0352, "step": 2364 }, { "epoch": 0.12, "grad_norm": 0.0506482491708173, "learning_rate": 8.929113924050634e-06, "loss": 0.7012, "step": 2366 }, { "epoch": 0.12, "grad_norm": 0.0651253804771874, "learning_rate": 8.928101265822786e-06, "loss": 1.4453, "step": 2368 }, { "epoch": 0.12, "grad_norm": 0.03876162527654146, "learning_rate": 8.927088607594938e-06, "loss": 2.9375, "step": 2370 }, { "epoch": 0.12, "grad_norm": 0.05776458612139321, "learning_rate": 8.92607594936709e-06, "loss": 3.5938, "step": 2372 }, { "epoch": 0.12, "grad_norm": 0.04748463323682337, "learning_rate": 8.925063291139241e-06, "loss": 1.9531, "step": 2374 }, { "epoch": 0.12, "grad_norm": 0.07599693468952615, "learning_rate": 8.924050632911393e-06, "loss": 2.9648, "step": 2376 }, { "epoch": 0.12, "grad_norm": 0.043300969936869965, "learning_rate": 8.923037974683545e-06, "loss": 3.2656, "step": 2378 }, { "epoch": 0.12, "grad_norm": 0.044206076071945946, "learning_rate": 8.922025316455697e-06, "loss": 2.8438, "step": 2380 }, { "epoch": 0.12, "grad_norm": 0.1016382796671544, "learning_rate": 8.921012658227849e-06, "loss": 2.8828, "step": 2382 }, { "epoch": 0.12, "grad_norm": 0.055427975006479106, "learning_rate": 8.920000000000001e-06, "loss": 2.5938, "step": 2384 }, { "epoch": 0.12, "grad_norm": 0.05624485894723525, "learning_rate": 8.918987341772153e-06, "loss": 3.1797, "step": 2386 }, { "epoch": 0.12, "grad_norm": 0.045076970655017604, "learning_rate": 8.917974683544305e-06, "loss": 1.8633, "step": 2388 }, { "epoch": 0.12, "grad_norm": 0.046547060461273486, "learning_rate": 8.916962025316457e-06, "loss": 2.2148, "step": 2390 }, { "epoch": 0.12, "grad_norm": 0.05549173701507732, "learning_rate": 8.915949367088609e-06, "loss": 4.1797, "step": 2392 }, { "epoch": 0.12, "grad_norm": 0.05839806902651177, "learning_rate": 8.914936708860759e-06, "loss": 3.2109, "step": 2394 }, { "epoch": 0.12, "grad_norm": 0.05540163940146562, "learning_rate": 8.913924050632912e-06, "loss": 3.8516, "step": 2396 }, { "epoch": 0.12, "grad_norm": 0.054197388695415565, "learning_rate": 8.912911392405064e-06, "loss": 3.3438, "step": 2398 }, { "epoch": 0.12, "grad_norm": 0.1269306456397681, "learning_rate": 8.911898734177216e-06, "loss": 1.6523, "step": 2400 }, { "epoch": 0.12, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 43.75, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 90.625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.58004760742188, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 12.2259, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.235, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.082, "step": 2400 }, { "epoch": 0.12, "eval_specter_top15HN_validation.jsonl.gz_acc1": 6.8359375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 15.8203125, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.5078125, "eval_specter_top15HN_validation.jsonl.gz_mrr": 17.98919105529785, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6358, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.281, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.379, "step": 2400 }, { "epoch": 0.12, "eval_nq_top15HN_validation.jsonl.gz_acc1": 46.484375, "eval_nq_top15HN_validation.jsonl.gz_acc3": 95.703125, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.859375, "eval_nq_top15HN_validation.jsonl.gz_mrr": 70.98693084716797, "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.2417, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.693, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 2400 }, { "epoch": 0.12, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 41.015625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.9609375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.97265625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 64.15122985839844, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4484, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.43, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 2400 }, { "epoch": 0.12, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.96875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 87.5, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 66.5779800415039, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.1732, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.728, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 2400 }, { "epoch": 0.12, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.484375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.921875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.76953125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.19829559326172, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.8532, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.399, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.084, "step": 2400 }, { "epoch": 0.12, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 41.796875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 87.109375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.88831329345703, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 11.5908, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.522, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.086, "step": 2400 }, { "epoch": 0.12, "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.8671875, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.0, "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.49065399169922, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.2019, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.95, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, "step": 2400 }, { "epoch": 0.12, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 39.84375, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 82.8125, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.828125, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 62.91026306152344, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.7262, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.177, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.175, "step": 2400 }, { "epoch": 0.12, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.1171875, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 91.796875, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 69.19859313964844, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.9681, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.032, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.125, "step": 2400 }, { "epoch": 0.12, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 42.96875, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.015625, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.14521789550781, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.5466, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 11.539, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.18, "step": 2400 }, { "epoch": 0.12, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 33.0078125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 74.0234375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 56.22562789916992, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3377, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.464, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 2400 }, { "epoch": 0.12, "grad_norm": 0.054360585518445484, "learning_rate": 8.910886075949368e-06, "loss": 3.8828, "step": 2402 }, { "epoch": 0.12, "grad_norm": 0.06887794058527433, "learning_rate": 8.90987341772152e-06, "loss": 3.0547, "step": 2404 }, { "epoch": 0.12, "grad_norm": 0.060583557276451795, "learning_rate": 8.908860759493672e-06, "loss": 1.6855, "step": 2406 }, { "epoch": 0.12, "grad_norm": 0.12613856817860564, "learning_rate": 8.907848101265824e-06, "loss": 2.3145, "step": 2408 }, { "epoch": 0.12, "grad_norm": 0.05571721701243465, "learning_rate": 8.906835443037976e-06, "loss": 3.8281, "step": 2410 }, { "epoch": 0.12, "grad_norm": 0.07231961952418126, "learning_rate": 8.905822784810128e-06, "loss": 2.9375, "step": 2412 }, { "epoch": 0.12, "grad_norm": 0.050725319211885146, "learning_rate": 8.90481012658228e-06, "loss": 2.6875, "step": 2414 }, { "epoch": 0.12, "grad_norm": 0.11104139412810117, "learning_rate": 8.903797468354432e-06, "loss": 1.5645, "step": 2416 }, { "epoch": 0.12, "grad_norm": 0.046148625234935944, "learning_rate": 8.902784810126583e-06, "loss": 2.0156, "step": 2418 }, { "epoch": 0.12, "grad_norm": 0.044795181597166084, "learning_rate": 8.901772151898735e-06, "loss": 1.3516, "step": 2420 }, { "epoch": 0.12, "grad_norm": 0.04766981242618178, "learning_rate": 8.900759493670886e-06, "loss": 2.6406, "step": 2422 }, { "epoch": 0.12, "grad_norm": 0.08883505544240511, "learning_rate": 8.899746835443037e-06, "loss": 2.5078, "step": 2424 }, { "epoch": 0.12, "grad_norm": 0.04613727963014351, "learning_rate": 8.898734177215191e-06, "loss": 1.7188, "step": 2426 }, { "epoch": 0.12, "grad_norm": 0.04830866347513251, "learning_rate": 8.897721518987343e-06, "loss": 2.25, "step": 2428 }, { "epoch": 0.12, "grad_norm": 0.058420708565597675, "learning_rate": 8.896708860759495e-06, "loss": 2.4062, "step": 2430 }, { "epoch": 0.12, "grad_norm": 0.06958523112172264, "learning_rate": 8.895696202531647e-06, "loss": 2.9648, "step": 2432 }, { "epoch": 0.12, "grad_norm": 0.045121764078908125, "learning_rate": 8.894683544303799e-06, "loss": 1.6602, "step": 2434 }, { "epoch": 0.12, "grad_norm": 0.04712074158473754, "learning_rate": 8.89367088607595e-06, "loss": 1.1914, "step": 2436 }, { "epoch": 0.12, "grad_norm": 0.05144407669486078, "learning_rate": 8.892658227848103e-06, "loss": 3.2656, "step": 2438 }, { "epoch": 0.12, "grad_norm": 0.050265401858983816, "learning_rate": 8.891645569620254e-06, "loss": 2.293, "step": 2440 }, { "epoch": 0.12, "grad_norm": 0.054604631461465866, "learning_rate": 8.890632911392406e-06, "loss": 2.5312, "step": 2442 }, { "epoch": 0.12, "grad_norm": 0.04667980062399209, "learning_rate": 8.889620253164558e-06, "loss": 1.6797, "step": 2444 }, { "epoch": 0.12, "grad_norm": 0.05304347727193509, "learning_rate": 8.88860759493671e-06, "loss": 2.6992, "step": 2446 }, { "epoch": 0.12, "grad_norm": 0.0840372069101572, "learning_rate": 8.88759493670886e-06, "loss": 0.8516, "step": 2448 }, { "epoch": 0.12, "grad_norm": 0.04842070838375918, "learning_rate": 8.886582278481012e-06, "loss": 1.7148, "step": 2450 }, { "epoch": 0.12, "grad_norm": 0.07344970867961509, "learning_rate": 8.885569620253164e-06, "loss": 2.8164, "step": 2452 }, { "epoch": 0.12, "grad_norm": 0.08197126579605098, "learning_rate": 8.884556962025316e-06, "loss": 2.0508, "step": 2454 }, { "epoch": 0.12, "grad_norm": 0.08906346433519001, "learning_rate": 8.88354430379747e-06, "loss": 1.9297, "step": 2456 }, { "epoch": 0.12, "grad_norm": 0.07026800098562865, "learning_rate": 8.882531645569622e-06, "loss": 2.9141, "step": 2458 }, { "epoch": 0.12, "grad_norm": 0.057852559290353545, "learning_rate": 8.881518987341774e-06, "loss": 1.2891, "step": 2460 }, { "epoch": 0.12, "grad_norm": 0.07488084656189853, "learning_rate": 8.880506329113925e-06, "loss": 3.7734, "step": 2462 }, { "epoch": 0.12, "grad_norm": 0.059345150098033324, "learning_rate": 8.879493670886077e-06, "loss": 2.3672, "step": 2464 }, { "epoch": 0.12, "grad_norm": 0.046010342595922626, "learning_rate": 8.87848101265823e-06, "loss": 2.8281, "step": 2466 }, { "epoch": 0.12, "grad_norm": 0.03510200766462188, "learning_rate": 8.877468354430381e-06, "loss": 4.4453, "step": 2468 }, { "epoch": 0.12, "grad_norm": 0.0464438162298748, "learning_rate": 8.876455696202533e-06, "loss": 1.3281, "step": 2470 }, { "epoch": 0.12, "grad_norm": 0.11354799763355634, "learning_rate": 8.875443037974685e-06, "loss": 2.2383, "step": 2472 }, { "epoch": 0.12, "grad_norm": 0.05458230519319839, "learning_rate": 8.874430379746835e-06, "loss": 2.9102, "step": 2474 }, { "epoch": 0.12, "grad_norm": 0.03803021377034928, "learning_rate": 8.873417721518987e-06, "loss": 2.75, "step": 2476 }, { "epoch": 0.12, "grad_norm": 0.07906754079965607, "learning_rate": 8.872405063291139e-06, "loss": 3.4531, "step": 2478 }, { "epoch": 0.12, "grad_norm": 0.07767897781324883, "learning_rate": 8.871392405063291e-06, "loss": 2.5703, "step": 2480 }, { "epoch": 0.12, "grad_norm": 0.0978122466589843, "learning_rate": 8.870379746835443e-06, "loss": 4.2266, "step": 2482 }, { "epoch": 0.12, "grad_norm": 0.04908720662041111, "learning_rate": 8.869367088607595e-06, "loss": 1.9023, "step": 2484 }, { "epoch": 0.12, "grad_norm": 0.07665655710797971, "learning_rate": 8.868354430379748e-06, "loss": 2.3887, "step": 2486 }, { "epoch": 0.12, "grad_norm": 0.08932946561581721, "learning_rate": 8.8673417721519e-06, "loss": 2.7031, "step": 2488 }, { "epoch": 0.12, "grad_norm": 0.043585521695245454, "learning_rate": 8.866329113924052e-06, "loss": 1.9336, "step": 2490 }, { "epoch": 0.12, "grad_norm": 0.0616416080110006, "learning_rate": 8.865316455696204e-06, "loss": 2.5312, "step": 2492 }, { "epoch": 0.12, "grad_norm": 0.06618915511822913, "learning_rate": 8.864303797468356e-06, "loss": 3.6094, "step": 2494 }, { "epoch": 0.12, "grad_norm": 0.04493140036724536, "learning_rate": 8.863291139240508e-06, "loss": 2.4531, "step": 2496 }, { "epoch": 0.12, "grad_norm": 0.0589899002891497, "learning_rate": 8.86227848101266e-06, "loss": 3.6953, "step": 2498 }, { "epoch": 0.12, "grad_norm": 0.07937925656330862, "learning_rate": 8.861265822784812e-06, "loss": 1.3164, "step": 2500 }, { "epoch": 0.12, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 45.1171875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.1640625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.6258316040039, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.5075, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.562, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.087, "step": 2500 }, { "epoch": 0.12, "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.9609375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 23.6328125, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4609375, "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.807819366455078, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.7181, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.546, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.368, "step": 2500 }, { "epoch": 0.12, "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.0703125, "eval_nq_top15HN_validation.jsonl.gz_acc3": 94.7265625, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.85546875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 71.43701171875, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.7722, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.941, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 2500 }, { "epoch": 0.12, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 38.4765625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 80.2734375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 1.0, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.60811996459961, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5135, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.41, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 2500 }, { "epoch": 0.12, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 40.625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 84.1796875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0390625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 63.69384002685547, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.7142, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.973, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 2500 }, { "epoch": 0.12, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.7265625, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.17011260986328, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.0668, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.783, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.09, "step": 2500 }, { "epoch": 0.12, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.0390625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 83.984375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.44486999511719, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.5564, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.063, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.095, "step": 2500 }, { "epoch": 0.12, "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.796875, "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.28125, "eval_fever_top15HN_validation.jsonl.gz_loss": 1.921875, "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.27226257324219, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.3154, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.923, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, "step": 2500 }, { "epoch": 0.12, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 27.5390625, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 63.28125, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.875, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 48.667640686035156, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 4.9663, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.887, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.201, "step": 2500 }, { "epoch": 0.12, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 43.75, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 89.84375, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.09375, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 67.84911346435547, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.5591, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.467, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.132, "step": 2500 }, { "epoch": 0.12, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.7265625, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.578125, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.54449462890625, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0478, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.679, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.198, "step": 2500 }, { "epoch": 0.12, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.8828125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 70.3125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 53.18346405029297, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2854, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.48, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 2500 }, { "epoch": 0.13, "grad_norm": 0.04633153091940758, "learning_rate": 8.860253164556962e-06, "loss": 2.1875, "step": 2502 }, { "epoch": 0.13, "grad_norm": 0.05905298038997286, "learning_rate": 8.859240506329114e-06, "loss": 3.4844, "step": 2504 }, { "epoch": 0.13, "grad_norm": 0.048094727656352754, "learning_rate": 8.858227848101266e-06, "loss": 2.1211, "step": 2506 }, { "epoch": 0.13, "grad_norm": 0.035994165452193784, "learning_rate": 8.857215189873418e-06, "loss": 3.0547, "step": 2508 }, { "epoch": 0.13, "grad_norm": 0.04289409263851401, "learning_rate": 8.85620253164557e-06, "loss": 3.1797, "step": 2510 }, { "epoch": 0.13, "grad_norm": 0.04667679037496861, "learning_rate": 8.855189873417721e-06, "loss": 3.8359, "step": 2512 }, { "epoch": 0.13, "grad_norm": 0.12455027862059778, "learning_rate": 8.854177215189873e-06, "loss": 3.7812, "step": 2514 }, { "epoch": 0.13, "grad_norm": 0.04730664140523245, "learning_rate": 8.853164556962027e-06, "loss": 2.4297, "step": 2516 }, { "epoch": 0.13, "grad_norm": 0.042202110155894375, "learning_rate": 8.852151898734179e-06, "loss": 1.2891, "step": 2518 }, { "epoch": 0.13, "grad_norm": 0.04754586764286116, "learning_rate": 8.85113924050633e-06, "loss": 3.2109, "step": 2520 }, { "epoch": 0.13, "grad_norm": 0.07343919643005098, "learning_rate": 8.850126582278483e-06, "loss": 2.25, "step": 2522 }, { "epoch": 0.13, "grad_norm": 0.08709059692488305, "learning_rate": 8.849113924050635e-06, "loss": 2.3438, "step": 2524 }, { "epoch": 0.13, "grad_norm": 0.04340495246498866, "learning_rate": 8.848101265822786e-06, "loss": 2.3633, "step": 2526 }, { "epoch": 0.13, "grad_norm": 0.04469305828346309, "learning_rate": 8.847088607594937e-06, "loss": 1.6836, "step": 2528 }, { "epoch": 0.13, "grad_norm": 0.088414459032459, "learning_rate": 8.846075949367089e-06, "loss": 2.4844, "step": 2530 }, { "epoch": 0.13, "grad_norm": 0.043113690269789035, "learning_rate": 8.84506329113924e-06, "loss": 1.0469, "step": 2532 }, { "epoch": 0.13, "grad_norm": 0.06503235750891347, "learning_rate": 8.844050632911392e-06, "loss": 2.8906, "step": 2534 }, { "epoch": 0.13, "grad_norm": 0.05571664033619658, "learning_rate": 8.843037974683544e-06, "loss": 4.1797, "step": 2536 }, { "epoch": 0.13, "grad_norm": 0.04358647922501268, "learning_rate": 8.842025316455696e-06, "loss": 1.0645, "step": 2538 }, { "epoch": 0.13, "grad_norm": 0.09110864380118001, "learning_rate": 8.841012658227848e-06, "loss": 2.9844, "step": 2540 }, { "epoch": 0.13, "grad_norm": 0.04784759923520609, "learning_rate": 8.84e-06, "loss": 1.8594, "step": 2542 }, { "epoch": 0.13, "grad_norm": 0.04563810959116694, "learning_rate": 8.838987341772152e-06, "loss": 3.2188, "step": 2544 }, { "epoch": 0.13, "grad_norm": 0.0461173796094796, "learning_rate": 8.837974683544306e-06, "loss": 2.5391, "step": 2546 }, { "epoch": 0.13, "grad_norm": 0.07851545988606415, "learning_rate": 8.836962025316457e-06, "loss": 2.0742, "step": 2548 }, { "epoch": 0.13, "grad_norm": 0.047102253116170514, "learning_rate": 8.83594936708861e-06, "loss": 2.3281, "step": 2550 }, { "epoch": 0.13, "grad_norm": 0.04421632697742336, "learning_rate": 8.834936708860761e-06, "loss": 1.0996, "step": 2552 }, { "epoch": 0.13, "grad_norm": 0.04662218312739471, "learning_rate": 8.833924050632912e-06, "loss": 2.8984, "step": 2554 }, { "epoch": 0.13, "grad_norm": 0.041431262450878824, "learning_rate": 8.832911392405063e-06, "loss": 1.6582, "step": 2556 }, { "epoch": 0.13, "grad_norm": 0.05535268065333397, "learning_rate": 8.831898734177215e-06, "loss": 3.7656, "step": 2558 }, { "epoch": 0.13, "grad_norm": 0.04371699654044821, "learning_rate": 8.830886075949367e-06, "loss": 1.6758, "step": 2560 }, { "epoch": 0.13, "grad_norm": 0.13086511115412275, "learning_rate": 8.829873417721519e-06, "loss": 2.0664, "step": 2562 }, { "epoch": 0.13, "grad_norm": 0.06839763728577254, "learning_rate": 8.828860759493671e-06, "loss": 1.0723, "step": 2564 }, { "epoch": 0.13, "grad_norm": 0.04184277562079915, "learning_rate": 8.827848101265823e-06, "loss": 1.6562, "step": 2566 }, { "epoch": 0.13, "grad_norm": 0.058952200716529374, "learning_rate": 8.826835443037975e-06, "loss": 3.7734, "step": 2568 }, { "epoch": 0.13, "grad_norm": 0.044434838225155884, "learning_rate": 8.825822784810127e-06, "loss": 3.1094, "step": 2570 }, { "epoch": 0.13, "grad_norm": 0.05595388170602543, "learning_rate": 8.824810126582279e-06, "loss": 3.8359, "step": 2572 }, { "epoch": 0.13, "grad_norm": 0.08113111202164194, "learning_rate": 8.82379746835443e-06, "loss": 2.1719, "step": 2574 }, { "epoch": 0.13, "grad_norm": 0.05373612376884004, "learning_rate": 8.822784810126584e-06, "loss": 3.3281, "step": 2576 }, { "epoch": 0.13, "grad_norm": 0.04954179399609412, "learning_rate": 8.821772151898736e-06, "loss": 2.5938, "step": 2578 }, { "epoch": 0.13, "grad_norm": 0.04943208064397753, "learning_rate": 8.820759493670888e-06, "loss": 2.9141, "step": 2580 }, { "epoch": 0.13, "grad_norm": 0.044735737967205216, "learning_rate": 8.819746835443038e-06, "loss": 3.1484, "step": 2582 }, { "epoch": 0.13, "grad_norm": 0.10650528959169556, "learning_rate": 8.81873417721519e-06, "loss": 2.2461, "step": 2584 }, { "epoch": 0.13, "grad_norm": 0.05377515075505955, "learning_rate": 8.817721518987342e-06, "loss": 3.6484, "step": 2586 }, { "epoch": 0.13, "grad_norm": 0.044445382115991326, "learning_rate": 8.816708860759494e-06, "loss": 3.2188, "step": 2588 }, { "epoch": 0.13, "grad_norm": 0.07149806803480696, "learning_rate": 8.815696202531646e-06, "loss": 3.3203, "step": 2590 }, { "epoch": 0.13, "grad_norm": 0.07765096857543947, "learning_rate": 8.814683544303798e-06, "loss": 1.1152, "step": 2592 }, { "epoch": 0.13, "grad_norm": 0.32396538727778484, "learning_rate": 8.81367088607595e-06, "loss": 2.1191, "step": 2594 }, { "epoch": 0.13, "grad_norm": 0.0543477861736957, "learning_rate": 8.812658227848102e-06, "loss": 3.7969, "step": 2596 }, { "epoch": 0.13, "grad_norm": 0.06229143061454939, "learning_rate": 8.811645569620254e-06, "loss": 3.0312, "step": 2598 }, { "epoch": 0.13, "grad_norm": 0.0537317517438358, "learning_rate": 8.810632911392405e-06, "loss": 3.4531, "step": 2600 }, { "epoch": 0.13, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.3359375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 91.2109375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.84588623046875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3589, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.634, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, "step": 2600 }, { "epoch": 0.13, "eval_specter_top15HN_validation.jsonl.gz_acc1": 10.7421875, "eval_specter_top15HN_validation.jsonl.gz_acc3": 25.1953125, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4453125, "eval_specter_top15HN_validation.jsonl.gz_mrr": 22.87902069091797, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.514, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 25.458, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.398, "step": 2600 }, { "epoch": 0.13, "eval_nq_top15HN_validation.jsonl.gz_acc1": 47.4609375, "eval_nq_top15HN_validation.jsonl.gz_acc3": 96.484375, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.85546875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 72.10218048095703, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8518, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.898, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 2600 }, { "epoch": 0.13, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 39.453125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 82.03125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9921875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 62.346885681152344, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 15.2287, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.203, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.066, "step": 2600 }, { "epoch": 0.13, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.1875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.71875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.28152465820312, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.8528, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.4, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.084, "step": 2600 }, { "epoch": 0.13, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.6796875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.53125, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.67933654785156, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 11.6791, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.48, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.086, "step": 2600 }, { "epoch": 0.13, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 43.1640625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 88.671875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 66.86489868164062, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 11.7831, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.431, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.085, "step": 2600 }, { "epoch": 0.13, "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.9921875, "eval_fever_top15HN_validation.jsonl.gz_acc3": 88.28125, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.015625, "eval_fever_top15HN_validation.jsonl.gz_mrr": 66.0167007446289, "eval_fever_top15HN_validation.jsonl.gz_runtime": 17.3656, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.685, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.058, "step": 2600 }, { "epoch": 0.13, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 36.1328125, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 78.515625, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 59.20676040649414, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 6.2888, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 10.177, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.159, "step": 2600 }, { "epoch": 0.13, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 43.1640625, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 89.2578125, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.09375, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 67.05455017089844, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.1267, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.98, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.14, "step": 2600 }, { "epoch": 0.13, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.75, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.40625, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.7109375, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.508056640625, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.2486, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.194, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.191, "step": 2600 }, { "epoch": 0.13, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 28.7109375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 67.578125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 51.83809280395508, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.3416, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.463, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 2600 }, { "epoch": 0.13, "grad_norm": 0.04951655968018335, "learning_rate": 8.809620253164557e-06, "loss": 1.7109, "step": 2602 }, { "epoch": 0.13, "grad_norm": 0.045592901695699066, "learning_rate": 8.80860759493671e-06, "loss": 2.0273, "step": 2604 }, { "epoch": 0.13, "grad_norm": 0.05395490767162895, "learning_rate": 8.807594936708863e-06, "loss": 4.0, "step": 2606 }, { "epoch": 0.13, "grad_norm": 0.05350486279955657, "learning_rate": 8.806582278481013e-06, "loss": 3.9141, "step": 2608 }, { "epoch": 0.13, "grad_norm": 0.04664756565512289, "learning_rate": 8.805569620253165e-06, "loss": 1.3555, "step": 2610 }, { "epoch": 0.13, "grad_norm": 0.05610965889346614, "learning_rate": 8.804556962025317e-06, "loss": 3.7188, "step": 2612 }, { "epoch": 0.13, "grad_norm": 0.099457180400491, "learning_rate": 8.803544303797469e-06, "loss": 2.6484, "step": 2614 }, { "epoch": 0.13, "grad_norm": 0.05464773989198765, "learning_rate": 8.80253164556962e-06, "loss": 3.0469, "step": 2616 }, { "epoch": 0.13, "grad_norm": 0.04492590979410205, "learning_rate": 8.801518987341773e-06, "loss": 2.5938, "step": 2618 }, { "epoch": 0.13, "grad_norm": 0.06176717102157868, "learning_rate": 8.800506329113924e-06, "loss": 2.9453, "step": 2620 }, { "epoch": 0.13, "grad_norm": 0.06297430356503986, "learning_rate": 8.799493670886076e-06, "loss": 1.5312, "step": 2622 }, { "epoch": 0.13, "grad_norm": 0.07557707951512468, "learning_rate": 8.798481012658228e-06, "loss": 1.3008, "step": 2624 }, { "epoch": 0.13, "grad_norm": 0.0828691249589272, "learning_rate": 8.79746835443038e-06, "loss": 3.4062, "step": 2626 }, { "epoch": 0.13, "grad_norm": 0.08076606632694368, "learning_rate": 8.796455696202532e-06, "loss": 1.9844, "step": 2628 }, { "epoch": 0.13, "grad_norm": 0.08741726827798928, "learning_rate": 8.795443037974684e-06, "loss": 1.6875, "step": 2630 }, { "epoch": 0.13, "grad_norm": 0.0532597578727803, "learning_rate": 8.794430379746836e-06, "loss": 2.7031, "step": 2632 }, { "epoch": 0.13, "grad_norm": 0.09681222962433436, "learning_rate": 8.793417721518988e-06, "loss": 2.0508, "step": 2634 }, { "epoch": 0.13, "grad_norm": 0.05998016873778101, "learning_rate": 8.79240506329114e-06, "loss": 3.2266, "step": 2636 }, { "epoch": 0.13, "grad_norm": 0.06809758398603295, "learning_rate": 8.791392405063292e-06, "loss": 1.3945, "step": 2638 }, { "epoch": 0.13, "grad_norm": 0.07995097602847276, "learning_rate": 8.790379746835444e-06, "loss": 2.1094, "step": 2640 }, { "epoch": 0.13, "grad_norm": 0.03780879612143424, "learning_rate": 8.789367088607595e-06, "loss": 3.1875, "step": 2642 }, { "epoch": 0.13, "grad_norm": 0.05723412312437367, "learning_rate": 8.788354430379747e-06, "loss": 3.9297, "step": 2644 }, { "epoch": 0.13, "grad_norm": 0.056302760632846574, "learning_rate": 8.7873417721519e-06, "loss": 2.4062, "step": 2646 }, { "epoch": 0.13, "grad_norm": 0.047170378937362194, "learning_rate": 8.786329113924051e-06, "loss": 1.7383, "step": 2648 }, { "epoch": 0.13, "grad_norm": 0.07832131159233159, "learning_rate": 8.785316455696203e-06, "loss": 1.0918, "step": 2650 }, { "epoch": 0.13, "grad_norm": 0.10096020187857348, "learning_rate": 8.784303797468355e-06, "loss": 1.9297, "step": 2652 }, { "epoch": 0.13, "grad_norm": 0.04348167647482904, "learning_rate": 8.783291139240507e-06, "loss": 2.168, "step": 2654 }, { "epoch": 0.13, "grad_norm": 0.08173612172454091, "learning_rate": 8.782278481012659e-06, "loss": 2.9141, "step": 2656 }, { "epoch": 0.13, "grad_norm": 0.04625064567488957, "learning_rate": 8.78126582278481e-06, "loss": 2.6484, "step": 2658 }, { "epoch": 0.13, "grad_norm": 0.07516927701880775, "learning_rate": 8.780253164556963e-06, "loss": 4.1562, "step": 2660 }, { "epoch": 0.13, "grad_norm": 0.046221992737433985, "learning_rate": 8.779240506329115e-06, "loss": 2.5312, "step": 2662 }, { "epoch": 0.13, "grad_norm": 0.060891742062961385, "learning_rate": 8.778227848101266e-06, "loss": 2.832, "step": 2664 }, { "epoch": 0.13, "grad_norm": 0.04855708235268953, "learning_rate": 8.777215189873418e-06, "loss": 3.1953, "step": 2666 }, { "epoch": 0.13, "grad_norm": 0.05375008240335823, "learning_rate": 8.77620253164557e-06, "loss": 3.5312, "step": 2668 }, { "epoch": 0.13, "grad_norm": 0.07130262494084699, "learning_rate": 8.775189873417722e-06, "loss": 2.5234, "step": 2670 }, { "epoch": 0.13, "grad_norm": 0.05924101789563237, "learning_rate": 8.774177215189874e-06, "loss": 2.7031, "step": 2672 }, { "epoch": 0.13, "grad_norm": 0.05020368159321286, "learning_rate": 8.773164556962026e-06, "loss": 2.4766, "step": 2674 }, { "epoch": 0.13, "grad_norm": 0.0456321096310645, "learning_rate": 8.772151898734178e-06, "loss": 2.7578, "step": 2676 }, { "epoch": 0.13, "grad_norm": 0.13466335645582303, "learning_rate": 8.77113924050633e-06, "loss": 1.0703, "step": 2678 }, { "epoch": 0.13, "grad_norm": 0.04404695993934905, "learning_rate": 8.770126582278482e-06, "loss": 1.9277, "step": 2680 }, { "epoch": 0.13, "grad_norm": 0.08263470157459507, "learning_rate": 8.769113924050634e-06, "loss": 2.3633, "step": 2682 }, { "epoch": 0.13, "grad_norm": 0.05717499821710438, "learning_rate": 8.768101265822786e-06, "loss": 3.9766, "step": 2684 }, { "epoch": 0.13, "grad_norm": 0.06888364635761927, "learning_rate": 8.767088607594937e-06, "loss": 1.6094, "step": 2686 }, { "epoch": 0.13, "grad_norm": 0.09166897324708952, "learning_rate": 8.76607594936709e-06, "loss": 2.373, "step": 2688 }, { "epoch": 0.13, "grad_norm": 0.04524041569108601, "learning_rate": 8.765063291139241e-06, "loss": 1.9453, "step": 2690 }, { "epoch": 0.13, "grad_norm": 0.08062394717179651, "learning_rate": 8.764050632911393e-06, "loss": 2.2773, "step": 2692 }, { "epoch": 0.13, "grad_norm": 0.12650775549602336, "learning_rate": 8.763037974683545e-06, "loss": 3.3828, "step": 2694 }, { "epoch": 0.13, "grad_norm": 0.042981691741514276, "learning_rate": 8.762025316455697e-06, "loss": 3.0938, "step": 2696 }, { "epoch": 0.13, "grad_norm": 0.05665027601956785, "learning_rate": 8.761012658227849e-06, "loss": 2.6094, "step": 2698 }, { "epoch": 0.14, "grad_norm": 0.04791952921300498, "learning_rate": 8.76e-06, "loss": 2.1797, "step": 2700 }, { "epoch": 0.14, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 45.5078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.359375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.97989654541016, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.7028, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.469, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.085, "step": 2700 }, { "epoch": 0.14, "eval_specter_top15HN_validation.jsonl.gz_acc1": 10.15625, "eval_specter_top15HN_validation.jsonl.gz_acc3": 24.21875, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.46875, "eval_specter_top15HN_validation.jsonl.gz_mrr": 22.30203628540039, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6086, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.534, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.383, "step": 2700 }, { "epoch": 0.14, "eval_nq_top15HN_validation.jsonl.gz_acc1": 48.046875, "eval_nq_top15HN_validation.jsonl.gz_acc3": 96.484375, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.85546875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 72.34609985351562, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.7236, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.968, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 2700 }, { "epoch": 0.14, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.8203125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 84.765625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.984375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.08637237548828, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5234, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.407, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 2700 }, { "epoch": 0.14, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 43.1640625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 88.4765625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.015625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 66.65151977539062, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 11.0194, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.808, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 2700 }, { "epoch": 0.14, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 95.1171875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.90335083007812, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.9697, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.834, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 2700 }, { "epoch": 0.14, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.8203125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 84.5703125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.20475006103516, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.9022, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.87, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 2700 }, { "epoch": 0.14, "eval_fever_top15HN_validation.jsonl.gz_acc1": 41.6015625, "eval_fever_top15HN_validation.jsonl.gz_acc3": 87.6953125, "eval_fever_top15HN_validation.jsonl.gz_loss": 1.96875, "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.48861694335938, "eval_fever_top15HN_validation.jsonl.gz_runtime": 15.8639, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 4.034, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.063, "step": 2700 }, { "epoch": 0.14, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 37.6953125, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 79.6875, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.8359375, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 60.636932373046875, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 4.8688, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 13.145, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.205, "step": 2700 }, { "epoch": 0.14, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 43.9453125, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 90.234375, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.09375, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 68.0864028930664, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.6733, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.341, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.13, "step": 2700 }, { "epoch": 0.14, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.5546875, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.015625, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.7109375, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.655029296875, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 4.9869, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.834, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.201, "step": 2700 }, { "epoch": 0.14, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.296875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 66.9921875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.859375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 51.773258209228516, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.4547, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.428, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.069, "step": 2700 }, { "epoch": 0.14, "grad_norm": 0.07249195188316931, "learning_rate": 8.758987341772153e-06, "loss": 1.6055, "step": 2702 }, { "epoch": 0.14, "grad_norm": 0.057633979419438035, "learning_rate": 8.757974683544305e-06, "loss": 3.25, "step": 2704 }, { "epoch": 0.14, "grad_norm": 0.051040752051553895, "learning_rate": 8.756962025316457e-06, "loss": 1.9375, "step": 2706 }, { "epoch": 0.14, "grad_norm": 0.07146589021940866, "learning_rate": 8.755949367088608e-06, "loss": 2.332, "step": 2708 }, { "epoch": 0.14, "grad_norm": 0.04312966165838956, "learning_rate": 8.75493670886076e-06, "loss": 3.1016, "step": 2710 }, { "epoch": 0.14, "grad_norm": 0.040291770833117316, "learning_rate": 8.753924050632912e-06, "loss": 1.8672, "step": 2712 }, { "epoch": 0.14, "grad_norm": 0.04306660828359189, "learning_rate": 8.752911392405064e-06, "loss": 1.3164, "step": 2714 }, { "epoch": 0.14, "grad_norm": 0.03797749676395882, "learning_rate": 8.751898734177216e-06, "loss": 2.5312, "step": 2716 }, { "epoch": 0.14, "grad_norm": 0.08444159548591303, "learning_rate": 8.750886075949368e-06, "loss": 2.2188, "step": 2718 }, { "epoch": 0.14, "grad_norm": 0.0380185965488091, "learning_rate": 8.74987341772152e-06, "loss": 3.2578, "step": 2720 }, { "epoch": 0.14, "grad_norm": 0.06045313084504471, "learning_rate": 8.748860759493672e-06, "loss": 4.0703, "step": 2722 }, { "epoch": 0.14, "grad_norm": 0.03966127813695506, "learning_rate": 8.747848101265824e-06, "loss": 2.2891, "step": 2724 }, { "epoch": 0.14, "grad_norm": 0.055140091316243436, "learning_rate": 8.746835443037976e-06, "loss": 2.625, "step": 2726 }, { "epoch": 0.14, "grad_norm": 0.05381506666307845, "learning_rate": 8.745822784810128e-06, "loss": 3.1719, "step": 2728 }, { "epoch": 0.14, "grad_norm": 0.05021926655278173, "learning_rate": 8.74481012658228e-06, "loss": 2.6758, "step": 2730 }, { "epoch": 0.14, "grad_norm": 0.04702400079967468, "learning_rate": 8.743797468354431e-06, "loss": 2.0078, "step": 2732 }, { "epoch": 0.14, "grad_norm": 0.049133886013928964, "learning_rate": 8.742784810126583e-06, "loss": 1.3242, "step": 2734 }, { "epoch": 0.14, "grad_norm": 0.04799777192883991, "learning_rate": 8.741772151898735e-06, "loss": 2.4375, "step": 2736 }, { "epoch": 0.14, "grad_norm": 0.047888279471591924, "learning_rate": 8.740759493670887e-06, "loss": 2.4766, "step": 2738 }, { "epoch": 0.14, "grad_norm": 0.05984335804292988, "learning_rate": 8.739746835443037e-06, "loss": 3.2969, "step": 2740 }, { "epoch": 0.14, "grad_norm": 0.044926342534701, "learning_rate": 8.738734177215191e-06, "loss": 1.3516, "step": 2742 }, { "epoch": 0.14, "grad_norm": 0.043959224671034046, "learning_rate": 8.737721518987343e-06, "loss": 1.3281, "step": 2744 }, { "epoch": 0.14, "grad_norm": 0.0584937255169823, "learning_rate": 8.736708860759495e-06, "loss": 3.5547, "step": 2746 }, { "epoch": 0.14, "grad_norm": 0.04613424912217633, "learning_rate": 8.735696202531647e-06, "loss": 2.4609, "step": 2748 }, { "epoch": 0.14, "grad_norm": 0.0855576998250252, "learning_rate": 8.734683544303799e-06, "loss": 1.7539, "step": 2750 }, { "epoch": 0.14, "grad_norm": 0.048632087568065925, "learning_rate": 8.73367088607595e-06, "loss": 1.5352, "step": 2752 }, { "epoch": 0.14, "grad_norm": 0.04691725802992881, "learning_rate": 8.732658227848102e-06, "loss": 1.9258, "step": 2754 }, { "epoch": 0.14, "grad_norm": 0.06921217510974044, "learning_rate": 8.731645569620254e-06, "loss": 0.8477, "step": 2756 }, { "epoch": 0.14, "grad_norm": 0.047910364669956074, "learning_rate": 8.730632911392406e-06, "loss": 3.0391, "step": 2758 }, { "epoch": 0.14, "grad_norm": 0.047306808743848086, "learning_rate": 8.729620253164558e-06, "loss": 3.2266, "step": 2760 }, { "epoch": 0.14, "grad_norm": 0.05737040696259312, "learning_rate": 8.72860759493671e-06, "loss": 3.4531, "step": 2762 }, { "epoch": 0.14, "grad_norm": 0.04767645208347041, "learning_rate": 8.727594936708862e-06, "loss": 1.3906, "step": 2764 }, { "epoch": 0.14, "grad_norm": 0.05579559780102283, "learning_rate": 8.726582278481014e-06, "loss": 1.8477, "step": 2766 }, { "epoch": 0.14, "grad_norm": 0.05636799699713594, "learning_rate": 8.725569620253164e-06, "loss": 3.5234, "step": 2768 }, { "epoch": 0.14, "grad_norm": 0.046882581594394686, "learning_rate": 8.724556962025316e-06, "loss": 1.0723, "step": 2770 }, { "epoch": 0.14, "grad_norm": 0.0750346728236882, "learning_rate": 8.72354430379747e-06, "loss": 1.7559, "step": 2772 }, { "epoch": 0.14, "grad_norm": 0.06177951297877184, "learning_rate": 8.722531645569621e-06, "loss": 2.9297, "step": 2774 }, { "epoch": 0.14, "grad_norm": 0.054191608152368814, "learning_rate": 8.721518987341773e-06, "loss": 3.3828, "step": 2776 }, { "epoch": 0.14, "grad_norm": 0.04471650291574515, "learning_rate": 8.720506329113925e-06, "loss": 2.9141, "step": 2778 }, { "epoch": 0.14, "grad_norm": 0.04413088584531061, "learning_rate": 8.719493670886077e-06, "loss": 3.2578, "step": 2780 }, { "epoch": 0.14, "grad_norm": 0.05601101380153402, "learning_rate": 8.718481012658229e-06, "loss": 1.9375, "step": 2782 }, { "epoch": 0.14, "grad_norm": 0.05927547007243715, "learning_rate": 8.717468354430381e-06, "loss": 3.6328, "step": 2784 }, { "epoch": 0.14, "grad_norm": 0.05938647976436854, "learning_rate": 8.716455696202533e-06, "loss": 2.6367, "step": 2786 }, { "epoch": 0.14, "grad_norm": 0.05064732975993634, "learning_rate": 8.715443037974685e-06, "loss": 2.5547, "step": 2788 }, { "epoch": 0.14, "grad_norm": 0.0514953544556108, "learning_rate": 8.714430379746837e-06, "loss": 1.3711, "step": 2790 }, { "epoch": 0.14, "grad_norm": 0.04363196503218346, "learning_rate": 8.713417721518989e-06, "loss": 2.668, "step": 2792 }, { "epoch": 0.14, "grad_norm": 0.05115391091986226, "learning_rate": 8.712405063291139e-06, "loss": 2.5703, "step": 2794 }, { "epoch": 0.14, "grad_norm": 0.06085458556991963, "learning_rate": 8.71139240506329e-06, "loss": 2.3867, "step": 2796 }, { "epoch": 0.14, "grad_norm": 0.05528478699682591, "learning_rate": 8.710379746835443e-06, "loss": 3.1406, "step": 2798 }, { "epoch": 0.14, "grad_norm": 0.05841235254993294, "learning_rate": 8.709367088607595e-06, "loss": 3.3203, "step": 2800 }, { "epoch": 0.14, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 44.53125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 92.3828125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 69.36053466796875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2348, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.697, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 2800 }, { "epoch": 0.14, "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 23.2421875, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.4765625, "eval_specter_top15HN_validation.jsonl.gz_mrr": 21.60693359375, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.5021, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 25.579, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.4, "step": 2800 }, { "epoch": 0.14, "eval_nq_top15HN_validation.jsonl.gz_acc1": 41.9921875, "eval_nq_top15HN_validation.jsonl.gz_acc3": 88.0859375, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 66.19718170166016, "eval_nq_top15HN_validation.jsonl.gz_runtime": 11.0144, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.811, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 2800 }, { "epoch": 0.14, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 40.0390625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 83.984375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.984375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 63.32621765136719, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.5473, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.399, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 2800 }, { "epoch": 0.14, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.796875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 85.546875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.03125, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.07398223876953, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8712, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.887, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 2800 }, { "epoch": 0.14, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.484375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.140625, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.77734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 70.9749755859375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.7689, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.943, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 2800 }, { "epoch": 0.14, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.234375, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 84.5703125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.956912994384766, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.7182, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.971, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 2800 }, { "epoch": 0.14, "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.1875, "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.2578125, "eval_fever_top15HN_validation.jsonl.gz_loss": 2.015625, "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.83201599121094, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.0975, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.976, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, "step": 2800 }, { "epoch": 0.14, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 38.8671875, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 82.03125, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.83203125, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 62.253936767578125, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.1807, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.353, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.193, "step": 2800 }, { "epoch": 0.14, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 43.359375, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 89.453125, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.0859375, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 67.39067077636719, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.625, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 8.393, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.131, "step": 2800 }, { "epoch": 0.14, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 44.7265625, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 92.3828125, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.87017059326172, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0609, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.646, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.198, "step": 2800 }, { "epoch": 0.14, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.0546875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 68.75, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 53.45774459838867, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.6129, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.38, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.068, "step": 2800 }, { "epoch": 0.14, "grad_norm": 0.04261711767454922, "learning_rate": 8.708354430379748e-06, "loss": 2.4062, "step": 2802 }, { "epoch": 0.14, "grad_norm": 0.04645713681303877, "learning_rate": 8.7073417721519e-06, "loss": 1.9062, "step": 2804 }, { "epoch": 0.14, "grad_norm": 0.05664159305337188, "learning_rate": 8.706329113924052e-06, "loss": 3.8672, "step": 2806 }, { "epoch": 0.14, "grad_norm": 0.043983600414410934, "learning_rate": 8.705316455696204e-06, "loss": 1.3203, "step": 2808 }, { "epoch": 0.14, "grad_norm": 0.04588362018358865, "learning_rate": 8.704303797468356e-06, "loss": 3.2852, "step": 2810 }, { "epoch": 0.14, "grad_norm": 0.07635983178014015, "learning_rate": 8.703291139240508e-06, "loss": 2.041, "step": 2812 }, { "epoch": 0.14, "grad_norm": 0.0916584015456185, "learning_rate": 8.70227848101266e-06, "loss": 2.4844, "step": 2814 }, { "epoch": 0.14, "grad_norm": 0.05892169492522577, "learning_rate": 8.701265822784812e-06, "loss": 2.0312, "step": 2816 }, { "epoch": 0.14, "grad_norm": 0.051820325886520344, "learning_rate": 8.700253164556963e-06, "loss": 1.3828, "step": 2818 }, { "epoch": 0.14, "grad_norm": 0.055710079179014056, "learning_rate": 8.699240506329114e-06, "loss": 2.9375, "step": 2820 }, { "epoch": 0.14, "grad_norm": 0.056917768326981706, "learning_rate": 8.698227848101266e-06, "loss": 3.5938, "step": 2822 }, { "epoch": 0.14, "grad_norm": 0.08770435781494222, "learning_rate": 8.697215189873417e-06, "loss": 2.3047, "step": 2824 }, { "epoch": 0.14, "grad_norm": 0.04423307931390974, "learning_rate": 8.69620253164557e-06, "loss": 1.9961, "step": 2826 }, { "epoch": 0.14, "grad_norm": 0.0748767172333099, "learning_rate": 8.695189873417721e-06, "loss": 2.9141, "step": 2828 }, { "epoch": 0.14, "grad_norm": 0.056072194492391036, "learning_rate": 8.694177215189873e-06, "loss": 2.7031, "step": 2830 }, { "epoch": 0.14, "grad_norm": 0.04389768509713057, "learning_rate": 8.693164556962027e-06, "loss": 2.75, "step": 2832 }, { "epoch": 0.14, "grad_norm": 0.0476806419907698, "learning_rate": 8.692151898734179e-06, "loss": 2.6094, "step": 2834 }, { "epoch": 0.14, "grad_norm": 0.04650770271213978, "learning_rate": 8.69113924050633e-06, "loss": 1.918, "step": 2836 }, { "epoch": 0.14, "grad_norm": 0.05239431577433559, "learning_rate": 8.690126582278483e-06, "loss": 1.0547, "step": 2838 }, { "epoch": 0.14, "grad_norm": 0.11572726759595672, "learning_rate": 8.689113924050634e-06, "loss": 2.2656, "step": 2840 }, { "epoch": 0.14, "grad_norm": 0.04643675966012806, "learning_rate": 8.688101265822786e-06, "loss": 1.9629, "step": 2842 }, { "epoch": 0.14, "grad_norm": 0.04629993854181596, "learning_rate": 8.687088607594938e-06, "loss": 3.2656, "step": 2844 }, { "epoch": 0.14, "grad_norm": 0.12961856233480304, "learning_rate": 8.68607594936709e-06, "loss": 2.3281, "step": 2846 }, { "epoch": 0.14, "grad_norm": 0.047928021614982726, "learning_rate": 8.68506329113924e-06, "loss": 3.6875, "step": 2848 }, { "epoch": 0.14, "grad_norm": 0.04179233438347491, "learning_rate": 8.684050632911392e-06, "loss": 2.4961, "step": 2850 }, { "epoch": 0.14, "grad_norm": 0.039675581911473946, "learning_rate": 8.683037974683544e-06, "loss": 3.1875, "step": 2852 }, { "epoch": 0.14, "grad_norm": 0.05333055705211874, "learning_rate": 8.682025316455696e-06, "loss": 1.9219, "step": 2854 }, { "epoch": 0.14, "grad_norm": 0.05709328519032197, "learning_rate": 8.681012658227848e-06, "loss": 3.9922, "step": 2856 }, { "epoch": 0.14, "grad_norm": 0.061995079781907214, "learning_rate": 8.68e-06, "loss": 3.625, "step": 2858 }, { "epoch": 0.14, "grad_norm": 0.04162847533794511, "learning_rate": 8.678987341772152e-06, "loss": 2.4844, "step": 2860 }, { "epoch": 0.14, "grad_norm": 0.0519238633375782, "learning_rate": 8.677974683544305e-06, "loss": 3.3828, "step": 2862 }, { "epoch": 0.14, "grad_norm": 0.05243794475973222, "learning_rate": 8.676962025316457e-06, "loss": 1.3984, "step": 2864 }, { "epoch": 0.14, "grad_norm": 0.032479401549115876, "learning_rate": 8.67594936708861e-06, "loss": 4.0625, "step": 2866 }, { "epoch": 0.14, "grad_norm": 0.05057630078935886, "learning_rate": 8.674936708860761e-06, "loss": 1.9375, "step": 2868 }, { "epoch": 0.14, "grad_norm": 0.0478188685230481, "learning_rate": 8.673924050632913e-06, "loss": 3.1719, "step": 2870 }, { "epoch": 0.14, "grad_norm": 0.05455466085242383, "learning_rate": 8.672911392405065e-06, "loss": 2.1309, "step": 2872 }, { "epoch": 0.14, "grad_norm": 0.04743098061180723, "learning_rate": 8.671898734177215e-06, "loss": 3.2578, "step": 2874 }, { "epoch": 0.14, "grad_norm": 0.06311851906083067, "learning_rate": 8.670886075949367e-06, "loss": 2.168, "step": 2876 }, { "epoch": 0.14, "grad_norm": 0.04812334931394406, "learning_rate": 8.669873417721519e-06, "loss": 2.5859, "step": 2878 }, { "epoch": 0.14, "grad_norm": 0.04399156255926814, "learning_rate": 8.668860759493671e-06, "loss": 1.9688, "step": 2880 }, { "epoch": 0.14, "grad_norm": 0.047780502359174505, "learning_rate": 8.667848101265823e-06, "loss": 2.6445, "step": 2882 }, { "epoch": 0.14, "grad_norm": 0.05398302519583386, "learning_rate": 8.666835443037975e-06, "loss": 2.373, "step": 2884 }, { "epoch": 0.14, "grad_norm": 0.04777209339294052, "learning_rate": 8.665822784810127e-06, "loss": 1.8711, "step": 2886 }, { "epoch": 0.14, "grad_norm": 0.05130103596515375, "learning_rate": 8.664810126582279e-06, "loss": 3.0156, "step": 2888 }, { "epoch": 0.14, "grad_norm": 0.08383940227125532, "learning_rate": 8.66379746835443e-06, "loss": 2.1016, "step": 2890 }, { "epoch": 0.14, "grad_norm": 0.07182262899450922, "learning_rate": 8.662784810126584e-06, "loss": 2.0508, "step": 2892 }, { "epoch": 0.14, "grad_norm": 0.04364754140496318, "learning_rate": 8.661772151898736e-06, "loss": 2.3672, "step": 2894 }, { "epoch": 0.14, "grad_norm": 0.04535774826607822, "learning_rate": 8.660759493670888e-06, "loss": 1.6797, "step": 2896 }, { "epoch": 0.14, "grad_norm": 0.08776352222005823, "learning_rate": 8.65974683544304e-06, "loss": 3.0078, "step": 2898 }, { "epoch": 0.14, "grad_norm": 0.08231854933995264, "learning_rate": 8.65873417721519e-06, "loss": 2.375, "step": 2900 }, { "epoch": 0.14, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 46.09375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 93.359375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.796875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 70.46411895751953, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.2188, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.705, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.089, "step": 2900 }, { "epoch": 0.14, "eval_specter_top15HN_validation.jsonl.gz_acc1": 6.640625, "eval_specter_top15HN_validation.jsonl.gz_acc3": 16.6015625, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.515625, "eval_specter_top15HN_validation.jsonl.gz_mrr": 17.565414428710938, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.5627, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 24.973, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.39, "step": 2900 }, { "epoch": 0.14, "eval_nq_top15HN_validation.jsonl.gz_acc1": 46.875, "eval_nq_top15HN_validation.jsonl.gz_acc3": 95.3125, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.85546875, "eval_nq_top15HN_validation.jsonl.gz_mrr": 71.68214416503906, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.8629, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.892, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 2900 }, { "epoch": 0.14, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 38.671875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 81.640625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.9921875, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 61.0409049987793, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.4689, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.423, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 2900 }, { "epoch": 0.14, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 41.9921875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.5234375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.0234375, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.95207214355469, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8349, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.907, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 2900 }, { "epoch": 0.14, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.6796875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.7265625, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.13858032226562, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.8375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.905, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 2900 }, { "epoch": 0.14, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.4296875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 84.765625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.7421875, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 63.133331298828125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.6538, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 6.007, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.094, "step": 2900 }, { "epoch": 0.14, "eval_fever_top15HN_validation.jsonl.gz_acc1": 39.6484375, "eval_fever_top15HN_validation.jsonl.gz_acc3": 86.5234375, "eval_fever_top15HN_validation.jsonl.gz_loss": 1.84375, "eval_fever_top15HN_validation.jsonl.gz_mrr": 65.24149322509766, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.2411, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.941, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.062, "step": 2900 }, { "epoch": 0.14, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 35.15625, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 75.1953125, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.84765625, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 58.0799446105957, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.2428, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 12.207, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.191, "step": 2900 }, { "epoch": 0.14, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 42.96875, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 88.0859375, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.09375, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 66.8700180053711, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 7.0597, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 9.066, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.142, "step": 2900 }, { "epoch": 0.14, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 42.3828125, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 89.453125, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 66.93702697753906, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.0233, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.741, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.199, "step": 2900 }, { "epoch": 0.14, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 31.0546875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 68.9453125, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.85546875, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 53.83131790161133, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.2282, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.498, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 2900 }, { "epoch": 0.15, "grad_norm": 0.07477114924089268, "learning_rate": 8.657721518987342e-06, "loss": 3.375, "step": 2902 }, { "epoch": 0.15, "grad_norm": 0.0432321120471421, "learning_rate": 8.656708860759494e-06, "loss": 2.7656, "step": 2904 }, { "epoch": 0.15, "grad_norm": 0.044349297010555074, "learning_rate": 8.655696202531646e-06, "loss": 2.4688, "step": 2906 }, { "epoch": 0.15, "grad_norm": 0.03877685385237317, "learning_rate": 8.654683544303798e-06, "loss": 3.6953, "step": 2908 }, { "epoch": 0.15, "grad_norm": 0.058803880921406156, "learning_rate": 8.65367088607595e-06, "loss": 2.5781, "step": 2910 }, { "epoch": 0.15, "grad_norm": 0.0411508309987447, "learning_rate": 8.652658227848101e-06, "loss": 2.1797, "step": 2912 }, { "epoch": 0.15, "grad_norm": 0.04504128580073143, "learning_rate": 8.651645569620253e-06, "loss": 2.5, "step": 2914 }, { "epoch": 0.15, "grad_norm": 0.060956641241486406, "learning_rate": 8.650632911392405e-06, "loss": 3.6406, "step": 2916 }, { "epoch": 0.15, "grad_norm": 0.04308382117943785, "learning_rate": 8.649620253164557e-06, "loss": 2.9375, "step": 2918 }, { "epoch": 0.15, "grad_norm": 0.07348782990907644, "learning_rate": 8.648607594936709e-06, "loss": 1.7793, "step": 2920 }, { "epoch": 0.15, "grad_norm": 0.061247028730661686, "learning_rate": 8.647594936708863e-06, "loss": 1.832, "step": 2922 }, { "epoch": 0.15, "grad_norm": 0.04503804713557941, "learning_rate": 8.646582278481015e-06, "loss": 3.0234, "step": 2924 }, { "epoch": 0.15, "grad_norm": 0.1000457954971076, "learning_rate": 8.645569620253166e-06, "loss": 1.8047, "step": 2926 }, { "epoch": 0.15, "grad_norm": 0.09482330121461713, "learning_rate": 8.644556962025317e-06, "loss": 2.293, "step": 2928 }, { "epoch": 0.15, "grad_norm": 0.06873864478381334, "learning_rate": 8.643544303797469e-06, "loss": 2.1562, "step": 2930 }, { "epoch": 0.15, "grad_norm": 0.029219417698263857, "learning_rate": 8.64253164556962e-06, "loss": 4.1484, "step": 2932 }, { "epoch": 0.15, "grad_norm": 0.11640324428697353, "learning_rate": 8.641518987341772e-06, "loss": 0.9961, "step": 2934 }, { "epoch": 0.15, "grad_norm": 0.04955319176202268, "learning_rate": 8.640506329113924e-06, "loss": 3.4688, "step": 2936 }, { "epoch": 0.15, "grad_norm": 0.057930255428599024, "learning_rate": 8.639493670886076e-06, "loss": 4.4922, "step": 2938 }, { "epoch": 0.15, "grad_norm": 0.04593005258256435, "learning_rate": 8.638481012658228e-06, "loss": 2.9219, "step": 2940 }, { "epoch": 0.15, "grad_norm": 0.060304294210792975, "learning_rate": 8.63746835443038e-06, "loss": 3.0, "step": 2942 }, { "epoch": 0.15, "grad_norm": 0.0218452468401511, "learning_rate": 8.636455696202532e-06, "loss": 3.5078, "step": 2944 }, { "epoch": 0.15, "grad_norm": 0.0799725289881099, "learning_rate": 8.635443037974684e-06, "loss": 1.6738, "step": 2946 }, { "epoch": 0.15, "grad_norm": 0.06799658145226696, "learning_rate": 8.634430379746836e-06, "loss": 2.9453, "step": 2948 }, { "epoch": 0.15, "grad_norm": 0.04533609893838451, "learning_rate": 8.633417721518988e-06, "loss": 1.8516, "step": 2950 }, { "epoch": 0.15, "grad_norm": 0.13776389909632097, "learning_rate": 8.632405063291141e-06, "loss": 2.3867, "step": 2952 }, { "epoch": 0.15, "grad_norm": 0.09322318933903165, "learning_rate": 8.631392405063292e-06, "loss": 1.7012, "step": 2954 }, { "epoch": 0.15, "grad_norm": 0.051294343484733905, "learning_rate": 8.630379746835443e-06, "loss": 3.4375, "step": 2956 }, { "epoch": 0.15, "grad_norm": 0.05283734855406228, "learning_rate": 8.629367088607595e-06, "loss": 1.9297, "step": 2958 }, { "epoch": 0.15, "grad_norm": 0.0446215051653574, "learning_rate": 8.628354430379747e-06, "loss": 1.8984, "step": 2960 }, { "epoch": 0.15, "grad_norm": 0.028306240252279194, "learning_rate": 8.627341772151899e-06, "loss": 3.5508, "step": 2962 }, { "epoch": 0.15, "grad_norm": 0.10700466459291955, "learning_rate": 8.626329113924051e-06, "loss": 3.2422, "step": 2964 }, { "epoch": 0.15, "grad_norm": 0.04298498107222189, "learning_rate": 8.625316455696203e-06, "loss": 2.5859, "step": 2966 }, { "epoch": 0.15, "grad_norm": 0.10153439939997064, "learning_rate": 8.624303797468355e-06, "loss": 2.2461, "step": 2968 }, { "epoch": 0.15, "grad_norm": 0.08713454505120027, "learning_rate": 8.623291139240507e-06, "loss": 2.9453, "step": 2970 }, { "epoch": 0.15, "grad_norm": 0.07463167478931523, "learning_rate": 8.622278481012659e-06, "loss": 0.9746, "step": 2972 }, { "epoch": 0.15, "grad_norm": 0.045513612046108515, "learning_rate": 8.62126582278481e-06, "loss": 2.7578, "step": 2974 }, { "epoch": 0.15, "grad_norm": 0.04592394120233233, "learning_rate": 8.620253164556963e-06, "loss": 3.0391, "step": 2976 }, { "epoch": 0.15, "grad_norm": 0.041987217841938596, "learning_rate": 8.619240506329114e-06, "loss": 3.25, "step": 2978 }, { "epoch": 0.15, "grad_norm": 0.04678244692335755, "learning_rate": 8.618227848101266e-06, "loss": 2.0391, "step": 2980 }, { "epoch": 0.15, "grad_norm": 0.11298075232977826, "learning_rate": 8.617215189873418e-06, "loss": 3.1875, "step": 2982 }, { "epoch": 0.15, "grad_norm": 0.05431368485278824, "learning_rate": 8.61620253164557e-06, "loss": 3.2578, "step": 2984 }, { "epoch": 0.15, "grad_norm": 0.053505713526510804, "learning_rate": 8.615189873417722e-06, "loss": 2.8438, "step": 2986 }, { "epoch": 0.15, "grad_norm": 0.04393738834145115, "learning_rate": 8.614177215189874e-06, "loss": 3.2891, "step": 2988 }, { "epoch": 0.15, "grad_norm": 0.05246996599152096, "learning_rate": 8.613164556962026e-06, "loss": 2.5352, "step": 2990 }, { "epoch": 0.15, "grad_norm": 0.10998424228866684, "learning_rate": 8.612151898734178e-06, "loss": 1.0117, "step": 2992 }, { "epoch": 0.15, "grad_norm": 0.05296540867273297, "learning_rate": 8.61113924050633e-06, "loss": 4.0, "step": 2994 }, { "epoch": 0.15, "grad_norm": 0.05825320064235262, "learning_rate": 8.610126582278482e-06, "loss": 1.9844, "step": 2996 }, { "epoch": 0.15, "grad_norm": 0.04920198130300496, "learning_rate": 8.609113924050633e-06, "loss": 2.5703, "step": 2998 }, { "epoch": 0.15, "grad_norm": 0.04230717274145532, "learning_rate": 8.608101265822785e-06, "loss": 1.9707, "step": 3000 }, { "epoch": 0.15, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc1": 43.359375, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_acc3": 90.0390625, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_loss": 0.80078125, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_mrr": 68.26922607421875, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_runtime": 11.3394, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_samples_per_second": 5.644, "eval_squad_uniq_passages_top15HN_validation.jsonl.gz_steps_per_second": 0.088, "step": 3000 }, { "epoch": 0.15, "eval_specter_top15HN_validation.jsonl.gz_acc1": 9.9609375, "eval_specter_top15HN_validation.jsonl.gz_acc3": 23.828125, "eval_specter_top15HN_validation.jsonl.gz_loss": 1.453125, "eval_specter_top15HN_validation.jsonl.gz_mrr": 22.220108032226562, "eval_specter_top15HN_validation.jsonl.gz_runtime": 2.6716, "eval_specter_top15HN_validation.jsonl.gz_samples_per_second": 23.956, "eval_specter_top15HN_validation.jsonl.gz_steps_per_second": 0.374, "step": 3000 }, { "epoch": 0.15, "eval_nq_top15HN_validation.jsonl.gz_acc1": 45.1171875, "eval_nq_top15HN_validation.jsonl.gz_acc3": 93.1640625, "eval_nq_top15HN_validation.jsonl.gz_loss": 0.86328125, "eval_nq_top15HN_validation.jsonl.gz_mrr": 69.32588195800781, "eval_nq_top15HN_validation.jsonl.gz_runtime": 10.9789, "eval_nq_top15HN_validation.jsonl.gz_samples_per_second": 5.829, "eval_nq_top15HN_validation.jsonl.gz_steps_per_second": 0.091, "step": 3000 }, { "epoch": 0.15, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc1": 37.890625, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_acc3": 80.078125, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_loss": 0.99609375, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_mrr": 60.68324279785156, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_runtime": 14.535, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_samples_per_second": 4.403, "eval_stackexchange_title_body_top15HN_validation.jsonl.gz_steps_per_second": 0.069, "step": 3000 }, { "epoch": 0.15, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc1": 42.96875, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_acc3": 86.9140625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_loss": 1.015625, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_mrr": 65.91932678222656, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_runtime": 10.8405, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_samples_per_second": 5.904, "eval_S2ORC_title_abstract-10M_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 3000 }, { "epoch": 0.15, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc1": 46.875, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_acc3": 94.7265625, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_loss": 0.7734375, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_mrr": 71.57400512695312, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_runtime": 10.7451, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_samples_per_second": 5.956, "eval_hotpotqa_part1_top15HN_validation.jsonl.gz_steps_per_second": 0.093, "step": 3000 }, { "epoch": 0.15, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc1": 40.8203125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_acc3": 85.3515625, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_loss": 0.73828125, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_mrr": 64.20136260986328, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_runtime": 10.8702, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_samples_per_second": 5.888, "eval_hotpotqa_part2_top15HN_validation.jsonl.gz_steps_per_second": 0.092, "step": 3000 }, { "epoch": 0.15, "eval_fever_top15HN_validation.jsonl.gz_acc1": 42.578125, "eval_fever_top15HN_validation.jsonl.gz_acc3": 89.0625, "eval_fever_top15HN_validation.jsonl.gz_loss": 1.8359375, "eval_fever_top15HN_validation.jsonl.gz_mrr": 67.19670867919922, "eval_fever_top15HN_validation.jsonl.gz_runtime": 16.3653, "eval_fever_top15HN_validation.jsonl.gz_samples_per_second": 3.911, "eval_fever_top15HN_validation.jsonl.gz_steps_per_second": 0.061, "step": 3000 }, { "epoch": 0.15, "eval_searchQA_top15HN_validation.jsonl.gz_acc1": 39.453125, "eval_searchQA_top15HN_validation.jsonl.gz_acc3": 83.3984375, "eval_searchQA_top15HN_validation.jsonl.gz_loss": 0.82421875, "eval_searchQA_top15HN_validation.jsonl.gz_mrr": 62.63498306274414, "eval_searchQA_top15HN_validation.jsonl.gz_runtime": 5.5675, "eval_searchQA_top15HN_validation.jsonl.gz_samples_per_second": 11.495, "eval_searchQA_top15HN_validation.jsonl.gz_steps_per_second": 0.18, "step": 3000 }, { "epoch": 0.15, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc1": 45.5078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_acc3": 92.3828125, "eval_pubmedqa_top15HN_validation.jsonl.gz_loss": 1.078125, "eval_pubmedqa_top15HN_validation.jsonl.gz_mrr": 69.95784759521484, "eval_pubmedqa_top15HN_validation.jsonl.gz_runtime": 8.1936, "eval_pubmedqa_top15HN_validation.jsonl.gz_samples_per_second": 7.811, "eval_pubmedqa_top15HN_validation.jsonl.gz_steps_per_second": 0.122, "step": 3000 }, { "epoch": 0.15, "eval_arguana_synthetic_validation.jsonl.gz_acc1": 43.359375, "eval_arguana_synthetic_validation.jsonl.gz_acc3": 91.40625, "eval_arguana_synthetic_validation.jsonl.gz_loss": 1.703125, "eval_arguana_synthetic_validation.jsonl.gz_mrr": 68.28853607177734, "eval_arguana_synthetic_validation.jsonl.gz_runtime": 5.1568, "eval_arguana_synthetic_validation.jsonl.gz_samples_per_second": 12.411, "eval_arguana_synthetic_validation.jsonl.gz_steps_per_second": 0.194, "step": 3000 }, { "epoch": 0.15, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc1": 29.1015625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_acc3": 68.359375, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_loss": 0.8515625, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_mrr": 52.09654998779297, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_runtime": 14.1945, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_samples_per_second": 4.509, "eval_en_miracal_train_split_1pos-HN-2_validation.jsonl.gz_steps_per_second": 0.07, "step": 3000 } ], "logging_steps": 2, "max_steps": 20000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }