|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 100, |
|
"global_step": 3112, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0706638115631692e-10, |
|
"logits/chosen": 1.2566330432891846, |
|
"logits/rejected": 0.7730951309204102, |
|
"logps/chosen": -300.374267578125, |
|
"logps/rejected": -324.00494384765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.070663811563169e-09, |
|
"logits/chosen": 0.9792649745941162, |
|
"logits/rejected": 1.7012548446655273, |
|
"logps/chosen": -464.2229309082031, |
|
"logps/rejected": -332.3782653808594, |
|
"loss": 0.6952, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": -0.006301212124526501, |
|
"rewards/margins": -0.0025307913310825825, |
|
"rewards/rejected": -0.0037704205606132746, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.141327623126338e-09, |
|
"logits/chosen": 0.5618988871574402, |
|
"logits/rejected": 1.6265491247177124, |
|
"logps/chosen": -438.208984375, |
|
"logps/rejected": -328.3803405761719, |
|
"loss": 0.6981, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.012895757332444191, |
|
"rewards/margins": -0.00526293832808733, |
|
"rewards/rejected": -0.007632819004356861, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.2119914346895075e-09, |
|
"logits/chosen": 0.8482489585876465, |
|
"logits/rejected": 1.8450462818145752, |
|
"logps/chosen": -437.23870849609375, |
|
"logps/rejected": -367.84637451171875, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.005116526037454605, |
|
"rewards/margins": -0.005225582513958216, |
|
"rewards/rejected": 0.010342106223106384, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.282655246252676e-09, |
|
"logits/chosen": 0.9240902662277222, |
|
"logits/rejected": 2.074276924133301, |
|
"logps/chosen": -408.9275207519531, |
|
"logps/rejected": -335.3138122558594, |
|
"loss": 0.6953, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.02186817303299904, |
|
"rewards/margins": 0.013794437050819397, |
|
"rewards/rejected": 0.008073735050857067, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.353319057815846e-09, |
|
"logits/chosen": 0.9429410696029663, |
|
"logits/rejected": 1.3247915506362915, |
|
"logps/chosen": -487.283203125, |
|
"logps/rejected": -337.8562927246094, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.01094397995620966, |
|
"rewards/margins": 0.016293564811348915, |
|
"rewards/rejected": -0.005349582992494106, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.423982869379015e-09, |
|
"logits/chosen": 0.9860417246818542, |
|
"logits/rejected": 1.649106740951538, |
|
"logps/chosen": -456.7554626464844, |
|
"logps/rejected": -330.7721252441406, |
|
"loss": 0.6692, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0688941478729248, |
|
"rewards/margins": 0.04043982923030853, |
|
"rewards/rejected": 0.02845432423055172, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.494646680942184e-09, |
|
"logits/chosen": 0.9182626008987427, |
|
"logits/rejected": 1.827265977859497, |
|
"logps/chosen": -396.9792785644531, |
|
"logps/rejected": -330.5106506347656, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.1199527382850647, |
|
"rewards/margins": 0.07451293617486954, |
|
"rewards/rejected": 0.04543980211019516, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.565310492505352e-09, |
|
"logits/chosen": 0.5657048225402832, |
|
"logits/rejected": 2.0772719383239746, |
|
"logps/chosen": -467.70233154296875, |
|
"logps/rejected": -340.50457763671875, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.18721021711826324, |
|
"rewards/margins": 0.12396695464849472, |
|
"rewards/rejected": 0.06324325501918793, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.635974304068522e-09, |
|
"logits/chosen": 0.9224559664726257, |
|
"logits/rejected": 1.6432113647460938, |
|
"logps/chosen": -410.8121032714844, |
|
"logps/rejected": -300.57012939453125, |
|
"loss": 0.608, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.28949785232543945, |
|
"rewards/margins": 0.21210959553718567, |
|
"rewards/rejected": 0.07738825678825378, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0706638115631692e-08, |
|
"logits/chosen": 0.8869959115982056, |
|
"logits/rejected": 1.8451831340789795, |
|
"logps/chosen": -403.8915710449219, |
|
"logps/rejected": -300.415771484375, |
|
"loss": 0.5911, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.3641214966773987, |
|
"rewards/margins": 0.22090363502502441, |
|
"rewards/rejected": 0.14321786165237427, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_logits/chosen": 0.30295610427856445, |
|
"eval_logits/rejected": 0.8016409873962402, |
|
"eval_logps/chosen": -395.38714599609375, |
|
"eval_logps/rejected": -304.46209716796875, |
|
"eval_loss": 0.5531623363494873, |
|
"eval_rewards/accuracies": 0.84375, |
|
"eval_rewards/chosen": 0.39543235301971436, |
|
"eval_rewards/margins": 0.28056541085243225, |
|
"eval_rewards/rejected": 0.1148669496178627, |
|
"eval_runtime": 77.7884, |
|
"eval_samples_per_second": 12.855, |
|
"eval_steps_per_second": 0.411, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.177730192719486e-08, |
|
"logits/chosen": 1.0650697946548462, |
|
"logits/rejected": 1.743814468383789, |
|
"logps/chosen": -376.599853515625, |
|
"logps/rejected": -334.667724609375, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.4621427655220032, |
|
"rewards/margins": 0.31689101457595825, |
|
"rewards/rejected": 0.14525175094604492, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.284796573875803e-08, |
|
"logits/chosen": 0.7667558789253235, |
|
"logits/rejected": 1.5485883951187134, |
|
"logps/chosen": -444.92987060546875, |
|
"logps/rejected": -348.63372802734375, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.5855865478515625, |
|
"rewards/margins": 0.441326379776001, |
|
"rewards/rejected": 0.14426018297672272, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3918629550321198e-08, |
|
"logits/chosen": 0.6718708276748657, |
|
"logits/rejected": 1.5146424770355225, |
|
"logps/chosen": -443.32568359375, |
|
"logps/rejected": -351.67010498046875, |
|
"loss": 0.5069, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.6444682478904724, |
|
"rewards/margins": 0.5162140130996704, |
|
"rewards/rejected": 0.128254234790802, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.4989293361884368e-08, |
|
"logits/chosen": 0.7301766872406006, |
|
"logits/rejected": 1.4742127656936646, |
|
"logps/chosen": -460.937255859375, |
|
"logps/rejected": -351.74871826171875, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.7888091802597046, |
|
"rewards/margins": 0.612645149230957, |
|
"rewards/rejected": 0.17616406083106995, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.6059957173447535e-08, |
|
"logits/chosen": 1.1185513734817505, |
|
"logits/rejected": 1.3702727556228638, |
|
"logps/chosen": -388.2076721191406, |
|
"logps/rejected": -313.1037902832031, |
|
"loss": 0.4438, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.8620562553405762, |
|
"rewards/margins": 0.5798792839050293, |
|
"rewards/rejected": 0.2821769118309021, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7130620985010704e-08, |
|
"logits/chosen": 1.1629040241241455, |
|
"logits/rejected": 1.1827285289764404, |
|
"logps/chosen": -448.11517333984375, |
|
"logps/rejected": -330.4544372558594, |
|
"loss": 0.4282, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.1283848285675049, |
|
"rewards/margins": 0.9130982160568237, |
|
"rewards/rejected": 0.2152867317199707, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8201284796573874e-08, |
|
"logits/chosen": 0.9314751625061035, |
|
"logits/rejected": 1.7524402141571045, |
|
"logps/chosen": -433.95159912109375, |
|
"logps/rejected": -306.7549743652344, |
|
"loss": 0.3991, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 1.0592777729034424, |
|
"rewards/margins": 0.8145540952682495, |
|
"rewards/rejected": 0.24472376704216003, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9271948608137044e-08, |
|
"logits/chosen": 0.9510858654975891, |
|
"logits/rejected": 1.7319552898406982, |
|
"logps/chosen": -425.33148193359375, |
|
"logps/rejected": -348.006103515625, |
|
"loss": 0.3595, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.3629436492919922, |
|
"rewards/margins": 1.0799864530563354, |
|
"rewards/rejected": 0.2829572558403015, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0342612419700214e-08, |
|
"logits/chosen": 0.8850847482681274, |
|
"logits/rejected": 1.6263946294784546, |
|
"logps/chosen": -368.7896423339844, |
|
"logps/rejected": -318.7091064453125, |
|
"loss": 0.3786, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.226855993270874, |
|
"rewards/margins": 0.9453747868537903, |
|
"rewards/rejected": 0.2814810574054718, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.1413276231263384e-08, |
|
"logits/chosen": 0.9113885760307312, |
|
"logits/rejected": 1.9366668462753296, |
|
"logps/chosen": -392.1955871582031, |
|
"logps/rejected": -291.93536376953125, |
|
"loss": 0.3425, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.2265583276748657, |
|
"rewards/margins": 1.0303113460540771, |
|
"rewards/rejected": 0.19624683260917664, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": 0.33036381006240845, |
|
"eval_logits/rejected": 0.8503063321113586, |
|
"eval_logps/chosen": -385.884765625, |
|
"eval_logps/rejected": -303.293212890625, |
|
"eval_loss": 0.31037652492523193, |
|
"eval_rewards/accuracies": 0.9453125, |
|
"eval_rewards/chosen": 1.345674753189087, |
|
"eval_rewards/margins": 1.1139166355133057, |
|
"eval_rewards/rejected": 0.23175781965255737, |
|
"eval_runtime": 77.5737, |
|
"eval_samples_per_second": 12.891, |
|
"eval_steps_per_second": 0.413, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.248394004282655e-08, |
|
"logits/chosen": 1.403585433959961, |
|
"logits/rejected": 1.621122121810913, |
|
"logps/chosen": -385.9792175292969, |
|
"logps/rejected": -314.9217529296875, |
|
"loss": 0.3509, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.412534475326538, |
|
"rewards/margins": 1.2651290893554688, |
|
"rewards/rejected": 0.1474055051803589, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.355460385438972e-08, |
|
"logits/chosen": 0.8744575381278992, |
|
"logits/rejected": 2.048430919647217, |
|
"logps/chosen": -420.71820068359375, |
|
"logps/rejected": -308.5834045410156, |
|
"loss": 0.3222, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.4642362594604492, |
|
"rewards/margins": 1.3160054683685303, |
|
"rewards/rejected": 0.14823095500469208, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.462526766595289e-08, |
|
"logits/chosen": 1.1990312337875366, |
|
"logits/rejected": 1.7153043746948242, |
|
"logps/chosen": -431.5537109375, |
|
"logps/rejected": -328.9321594238281, |
|
"loss": 0.3009, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.6640676259994507, |
|
"rewards/margins": 1.5493268966674805, |
|
"rewards/rejected": 0.11474086344242096, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.569593147751606e-08, |
|
"logits/chosen": 1.2709139585494995, |
|
"logits/rejected": 2.0714378356933594, |
|
"logps/chosen": -390.81512451171875, |
|
"logps/rejected": -353.8885803222656, |
|
"loss": 0.2846, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.6071593761444092, |
|
"rewards/margins": 1.5323899984359741, |
|
"rewards/rejected": 0.07476941496133804, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.676659528907923e-08, |
|
"logits/chosen": 1.2884495258331299, |
|
"logits/rejected": 1.4190622568130493, |
|
"logps/chosen": -402.27105712890625, |
|
"logps/rejected": -316.29681396484375, |
|
"loss": 0.2703, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.6437135934829712, |
|
"rewards/margins": 1.6835206747055054, |
|
"rewards/rejected": -0.039806898683309555, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7837259100642396e-08, |
|
"logits/chosen": 1.0041536092758179, |
|
"logits/rejected": 1.827345848083496, |
|
"logps/chosen": -352.20782470703125, |
|
"logps/rejected": -281.7880859375, |
|
"loss": 0.2568, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.6974023580551147, |
|
"rewards/margins": 1.677443265914917, |
|
"rewards/rejected": 0.019959043711423874, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.890792291220557e-08, |
|
"logits/chosen": 1.280470848083496, |
|
"logits/rejected": 2.0490353107452393, |
|
"logps/chosen": -396.66546630859375, |
|
"logps/rejected": -352.89923095703125, |
|
"loss": 0.2423, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.8935625553131104, |
|
"rewards/margins": 1.9584296941757202, |
|
"rewards/rejected": -0.06486758589744568, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9978586723768736e-08, |
|
"logits/chosen": 1.1651164293289185, |
|
"logits/rejected": 1.8686307668685913, |
|
"logps/chosen": -369.2472229003906, |
|
"logps/rejected": -350.49395751953125, |
|
"loss": 0.2369, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.8536313772201538, |
|
"rewards/margins": 2.0920298099517822, |
|
"rewards/rejected": -0.23839814960956573, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1049250535331906e-08, |
|
"logits/chosen": 0.9764202833175659, |
|
"logits/rejected": 1.5498555898666382, |
|
"logps/chosen": -401.94708251953125, |
|
"logps/rejected": -309.38751220703125, |
|
"loss": 0.2172, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.9584842920303345, |
|
"rewards/margins": 2.0484821796417236, |
|
"rewards/rejected": -0.08999788761138916, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.211991434689507e-08, |
|
"logits/chosen": 1.1941782236099243, |
|
"logits/rejected": 2.1323885917663574, |
|
"logps/chosen": -424.00299072265625, |
|
"logps/rejected": -329.6502380371094, |
|
"loss": 0.2046, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.1664984226226807, |
|
"rewards/margins": 2.4187095165252686, |
|
"rewards/rejected": -0.2522108256816864, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_logits/chosen": 0.5110462307929993, |
|
"eval_logits/rejected": 1.0409064292907715, |
|
"eval_logps/chosen": -381.6942138671875, |
|
"eval_logps/rejected": -308.5774230957031, |
|
"eval_loss": 0.1841452568769455, |
|
"eval_rewards/accuracies": 0.9453125, |
|
"eval_rewards/chosen": 1.764728307723999, |
|
"eval_rewards/margins": 2.061392307281494, |
|
"eval_rewards/rejected": -0.2966638207435608, |
|
"eval_runtime": 77.5919, |
|
"eval_samples_per_second": 12.888, |
|
"eval_steps_per_second": 0.412, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.3190578158458246e-08, |
|
"logits/chosen": 0.9687066078186035, |
|
"logits/rejected": 1.7230606079101562, |
|
"logps/chosen": -409.93109130859375, |
|
"logps/rejected": -367.07464599609375, |
|
"loss": 0.183, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.9146747589111328, |
|
"rewards/margins": 2.3619022369384766, |
|
"rewards/rejected": -0.4472277760505676, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.426124197002141e-08, |
|
"logits/chosen": 0.9972401857376099, |
|
"logits/rejected": 2.0866451263427734, |
|
"logps/chosen": -459.5816345214844, |
|
"logps/rejected": -333.5121154785156, |
|
"loss": 0.1805, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.2129783630371094, |
|
"rewards/margins": 2.6930830478668213, |
|
"rewards/rejected": -0.4801049828529358, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.533190578158458e-08, |
|
"logits/chosen": 1.417145013809204, |
|
"logits/rejected": 1.8801225423812866, |
|
"logps/chosen": -405.06280517578125, |
|
"logps/rejected": -345.85809326171875, |
|
"loss": 0.1807, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.1431164741516113, |
|
"rewards/margins": 2.6557106971740723, |
|
"rewards/rejected": -0.5125941038131714, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.640256959314775e-08, |
|
"logits/chosen": 1.1074305772781372, |
|
"logits/rejected": 2.019841194152832, |
|
"logps/chosen": -463.04534912109375, |
|
"logps/rejected": -334.6724853515625, |
|
"loss": 0.1691, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.148191452026367, |
|
"rewards/margins": 2.7028677463531494, |
|
"rewards/rejected": -0.5546759366989136, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.747323340471092e-08, |
|
"logits/chosen": 1.4934161901474, |
|
"logits/rejected": 2.0615015029907227, |
|
"logps/chosen": -408.3346862792969, |
|
"logps/rejected": -348.22747802734375, |
|
"loss": 0.2014, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.1981351375579834, |
|
"rewards/margins": 2.8584563732147217, |
|
"rewards/rejected": -0.6603211164474487, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.854389721627409e-08, |
|
"logits/chosen": 1.234470009803772, |
|
"logits/rejected": 1.3749644756317139, |
|
"logps/chosen": -429.24169921875, |
|
"logps/rejected": -359.1957092285156, |
|
"loss": 0.1651, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.1755428314208984, |
|
"rewards/margins": 3.0690488815307617, |
|
"rewards/rejected": -0.8935060501098633, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.961456102783726e-08, |
|
"logits/chosen": 1.5004570484161377, |
|
"logits/rejected": 2.204867124557495, |
|
"logps/chosen": -432.32135009765625, |
|
"logps/rejected": -330.5525207519531, |
|
"loss": 0.1527, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.2549405097961426, |
|
"rewards/margins": 2.9733498096466064, |
|
"rewards/rejected": -0.7184091806411743, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.068522483940043e-08, |
|
"logits/chosen": 1.4254530668258667, |
|
"logits/rejected": 1.9733701944351196, |
|
"logps/chosen": -372.3633117675781, |
|
"logps/rejected": -325.6457824707031, |
|
"loss": 0.1565, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.8286545276641846, |
|
"rewards/margins": 2.7208027839660645, |
|
"rewards/rejected": -0.8921481966972351, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.175588865096359e-08, |
|
"logits/chosen": 1.5646907091140747, |
|
"logits/rejected": 1.819138526916504, |
|
"logps/chosen": -377.5379943847656, |
|
"logps/rejected": -343.8385925292969, |
|
"loss": 0.1354, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.9773420095443726, |
|
"rewards/margins": 2.9212684631347656, |
|
"rewards/rejected": -0.9439260363578796, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.282655246252677e-08, |
|
"logits/chosen": 1.4840887784957886, |
|
"logits/rejected": 1.9725840091705322, |
|
"logps/chosen": -434.97784423828125, |
|
"logps/rejected": -331.9248352050781, |
|
"loss": 0.1596, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.0752744674682617, |
|
"rewards/margins": 3.0398762226104736, |
|
"rewards/rejected": -0.9646021127700806, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 0.6985446214675903, |
|
"eval_logits/rejected": 1.232299566268921, |
|
"eval_logps/chosen": -381.2061767578125, |
|
"eval_logps/rejected": -316.1226806640625, |
|
"eval_loss": 0.13778163492679596, |
|
"eval_rewards/accuracies": 0.953125, |
|
"eval_rewards/chosen": 1.813530683517456, |
|
"eval_rewards/margins": 2.8647243976593018, |
|
"eval_rewards/rejected": -1.0511937141418457, |
|
"eval_runtime": 77.7071, |
|
"eval_samples_per_second": 12.869, |
|
"eval_steps_per_second": 0.412, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.389721627408993e-08, |
|
"logits/chosen": 1.466382622718811, |
|
"logits/rejected": 2.043748617172241, |
|
"logps/chosen": -327.84527587890625, |
|
"logps/rejected": -320.1709899902344, |
|
"loss": 0.1566, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.6711170673370361, |
|
"rewards/margins": 2.701075315475464, |
|
"rewards/rejected": -1.0299583673477173, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.49678800856531e-08, |
|
"logits/chosen": 0.8124781847000122, |
|
"logits/rejected": 2.2421953678131104, |
|
"logps/chosen": -400.47650146484375, |
|
"logps/rejected": -366.773681640625, |
|
"loss": 0.1388, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.0225610733032227, |
|
"rewards/margins": 3.3162319660186768, |
|
"rewards/rejected": -1.293670654296875, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.603854389721627e-08, |
|
"logits/chosen": 1.411001443862915, |
|
"logits/rejected": 2.1695659160614014, |
|
"logps/chosen": -381.0799255371094, |
|
"logps/rejected": -321.12640380859375, |
|
"loss": 0.1331, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.094374656677246, |
|
"rewards/margins": 3.4128735065460205, |
|
"rewards/rejected": -1.3184987306594849, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.710920770877944e-08, |
|
"logits/chosen": 1.3558982610702515, |
|
"logits/rejected": 2.4410338401794434, |
|
"logps/chosen": -366.334716796875, |
|
"logps/rejected": -347.8705139160156, |
|
"loss": 0.1456, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.165606737136841, |
|
"rewards/margins": 3.6063430309295654, |
|
"rewards/rejected": -1.4407367706298828, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.817987152034261e-08, |
|
"logits/chosen": 1.2681734561920166, |
|
"logits/rejected": 2.0421833992004395, |
|
"logps/chosen": -482.25091552734375, |
|
"logps/rejected": -355.57904052734375, |
|
"loss": 0.1114, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3174893856048584, |
|
"rewards/margins": 3.938343048095703, |
|
"rewards/rejected": -1.6208534240722656, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.925053533190578e-08, |
|
"logits/chosen": 1.3757874965667725, |
|
"logits/rejected": 2.598388195037842, |
|
"logps/chosen": -371.0218200683594, |
|
"logps/rejected": -349.3445129394531, |
|
"loss": 0.1312, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.7905902862548828, |
|
"rewards/margins": 3.369292736053467, |
|
"rewards/rejected": -1.5787023305892944, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5.032119914346895e-08, |
|
"logits/chosen": 1.4324853420257568, |
|
"logits/rejected": 2.03037428855896, |
|
"logps/chosen": -403.80120849609375, |
|
"logps/rejected": -359.4237976074219, |
|
"loss": 0.0932, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.2499053478240967, |
|
"rewards/margins": 3.795927047729492, |
|
"rewards/rejected": -1.5460216999053955, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5.139186295503212e-08, |
|
"logits/chosen": 1.1828899383544922, |
|
"logits/rejected": 2.248396873474121, |
|
"logps/chosen": -442.19970703125, |
|
"logps/rejected": -360.25006103515625, |
|
"loss": 0.123, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.486384868621826, |
|
"rewards/margins": 4.412066459655762, |
|
"rewards/rejected": -1.9256811141967773, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5.246252676659528e-08, |
|
"logits/chosen": 0.9969785809516907, |
|
"logits/rejected": 2.016841173171997, |
|
"logps/chosen": -364.4849853515625, |
|
"logps/rejected": -346.4527893066406, |
|
"loss": 0.1157, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.7432949542999268, |
|
"rewards/margins": 3.481992721557617, |
|
"rewards/rejected": -1.7386982440948486, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 5.353319057815846e-08, |
|
"logits/chosen": 1.628588080406189, |
|
"logits/rejected": 2.0982065200805664, |
|
"logps/chosen": -381.443115234375, |
|
"logps/rejected": -362.29486083984375, |
|
"loss": 0.1153, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.0162081718444824, |
|
"rewards/margins": 4.036345958709717, |
|
"rewards/rejected": -2.0201380252838135, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/chosen": 0.9163604378700256, |
|
"eval_logits/rejected": 1.455054521560669, |
|
"eval_logps/chosen": -380.87884521484375, |
|
"eval_logps/rejected": -324.88177490234375, |
|
"eval_loss": 0.10337930172681808, |
|
"eval_rewards/accuracies": 0.9375, |
|
"eval_rewards/chosen": 1.8462636470794678, |
|
"eval_rewards/margins": 3.773362159729004, |
|
"eval_rewards/rejected": -1.9270987510681152, |
|
"eval_runtime": 77.7201, |
|
"eval_samples_per_second": 12.867, |
|
"eval_steps_per_second": 0.412, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.460385438972163e-08, |
|
"logits/chosen": 1.4893255233764648, |
|
"logits/rejected": 1.7933692932128906, |
|
"logps/chosen": -441.28399658203125, |
|
"logps/rejected": -357.0665588378906, |
|
"loss": 0.0929, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.2136569023132324, |
|
"rewards/margins": 4.3935346603393555, |
|
"rewards/rejected": -2.179877758026123, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.567451820128479e-08, |
|
"logits/chosen": 1.6450592279434204, |
|
"logits/rejected": 2.41060209274292, |
|
"logps/chosen": -457.15667724609375, |
|
"logps/rejected": -374.75115966796875, |
|
"loss": 0.0995, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.662111759185791, |
|
"rewards/margins": 5.2003607749938965, |
|
"rewards/rejected": -2.5382492542266846, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5.6745182012847956e-08, |
|
"logits/chosen": 1.7802613973617554, |
|
"logits/rejected": 2.5348358154296875, |
|
"logps/chosen": -378.1894836425781, |
|
"logps/rejected": -335.26129150390625, |
|
"loss": 0.1062, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.415276050567627, |
|
"rewards/margins": 4.646345138549805, |
|
"rewards/rejected": -2.231069564819336, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.781584582441114e-08, |
|
"logits/chosen": 1.8911815881729126, |
|
"logits/rejected": 2.2930028438568115, |
|
"logps/chosen": -445.2462463378906, |
|
"logps/rejected": -343.48150634765625, |
|
"loss": 0.102, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.925233244895935, |
|
"rewards/margins": 4.143408298492432, |
|
"rewards/rejected": -2.2181754112243652, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.88865096359743e-08, |
|
"logits/chosen": 1.3622030019760132, |
|
"logits/rejected": 2.7591183185577393, |
|
"logps/chosen": -417.8857421875, |
|
"logps/rejected": -337.2507629394531, |
|
"loss": 0.1003, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.4883484840393066, |
|
"rewards/margins": 4.856560707092285, |
|
"rewards/rejected": -2.3682124614715576, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.995717344753747e-08, |
|
"logits/chosen": 1.885152816772461, |
|
"logits/rejected": 2.445096492767334, |
|
"logps/chosen": -413.9400939941406, |
|
"logps/rejected": -347.16864013671875, |
|
"loss": 0.1078, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.7049505710601807, |
|
"rewards/margins": 4.317243576049805, |
|
"rewards/rejected": -2.612293243408203, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.102783725910064e-08, |
|
"logits/chosen": 1.8548791408538818, |
|
"logits/rejected": 2.9488348960876465, |
|
"logps/chosen": -399.66888427734375, |
|
"logps/rejected": -361.6997375488281, |
|
"loss": 0.0833, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.391327381134033, |
|
"rewards/margins": 5.426538467407227, |
|
"rewards/rejected": -3.0352110862731934, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.209850107066381e-08, |
|
"logits/chosen": 1.9805002212524414, |
|
"logits/rejected": 2.253380537033081, |
|
"logps/chosen": -363.88214111328125, |
|
"logps/rejected": -356.90264892578125, |
|
"loss": 0.0771, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.9852304458618164, |
|
"rewards/margins": 4.773615837097168, |
|
"rewards/rejected": -2.7883856296539307, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.316916488222698e-08, |
|
"logits/chosen": 1.8189414739608765, |
|
"logits/rejected": 2.1621651649475098, |
|
"logps/chosen": -376.37664794921875, |
|
"logps/rejected": -344.7612609863281, |
|
"loss": 0.1041, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.4361340999603271, |
|
"rewards/margins": 4.317945957183838, |
|
"rewards/rejected": -2.8818118572235107, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.423982869379014e-08, |
|
"logits/chosen": 1.9913393259048462, |
|
"logits/rejected": 2.2392477989196777, |
|
"logps/chosen": -409.7967834472656, |
|
"logps/rejected": -371.5293884277344, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.0530600547790527, |
|
"rewards/margins": 5.140936851501465, |
|
"rewards/rejected": -3.087876081466675, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": 1.1242132186889648, |
|
"eval_logits/rejected": 1.6480361223220825, |
|
"eval_logps/chosen": -383.8947448730469, |
|
"eval_logps/rejected": -335.6293029785156, |
|
"eval_loss": 0.08573687076568604, |
|
"eval_rewards/accuracies": 0.9453125, |
|
"eval_rewards/chosen": 1.5446751117706299, |
|
"eval_rewards/margins": 4.5465288162231445, |
|
"eval_rewards/rejected": -3.0018532276153564, |
|
"eval_runtime": 77.586, |
|
"eval_samples_per_second": 12.889, |
|
"eval_steps_per_second": 0.412, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.531049250535332e-08, |
|
"logits/chosen": 1.5741275548934937, |
|
"logits/rejected": 2.2887330055236816, |
|
"logps/chosen": -405.8610534667969, |
|
"logps/rejected": -368.2715759277344, |
|
"loss": 0.0841, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9225307703018188, |
|
"rewards/margins": 5.187340259552002, |
|
"rewards/rejected": -3.2648093700408936, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.638115631691649e-08, |
|
"logits/chosen": 1.3936151266098022, |
|
"logits/rejected": 2.8601880073547363, |
|
"logps/chosen": -414.2528381347656, |
|
"logps/rejected": -394.7596435546875, |
|
"loss": 0.079, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.7199567556381226, |
|
"rewards/margins": 5.62530517578125, |
|
"rewards/rejected": -3.905348300933838, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.745182012847965e-08, |
|
"logits/chosen": 1.7785946130752563, |
|
"logits/rejected": 2.8865761756896973, |
|
"logps/chosen": -429.94580078125, |
|
"logps/rejected": -385.0089416503906, |
|
"loss": 0.085, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.8962761163711548, |
|
"rewards/margins": 5.370936393737793, |
|
"rewards/rejected": -3.474660873413086, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.852248394004282e-08, |
|
"logits/chosen": 1.801674246788025, |
|
"logits/rejected": 2.2879269123077393, |
|
"logps/chosen": -424.087158203125, |
|
"logps/rejected": -360.2523498535156, |
|
"loss": 0.0998, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.9135773181915283, |
|
"rewards/margins": 5.04227352142334, |
|
"rewards/rejected": -3.1286959648132324, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.9593147751606e-08, |
|
"logits/chosen": 1.8188073635101318, |
|
"logits/rejected": 2.1497268676757812, |
|
"logps/chosen": -423.1458435058594, |
|
"logps/rejected": -369.9307861328125, |
|
"loss": 0.082, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.321187138557434, |
|
"rewards/margins": 4.85211706161499, |
|
"rewards/rejected": -3.5309300422668457, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.066381156316916e-08, |
|
"logits/chosen": 1.7404190301895142, |
|
"logits/rejected": 2.677952289581299, |
|
"logps/chosen": -402.8115234375, |
|
"logps/rejected": -340.89337158203125, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.031486749649048, |
|
"rewards/margins": 5.562923908233643, |
|
"rewards/rejected": -3.531437397003174, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.173447537473233e-08, |
|
"logits/chosen": 1.8731329441070557, |
|
"logits/rejected": 2.752986431121826, |
|
"logps/chosen": -387.5521240234375, |
|
"logps/rejected": -344.8697204589844, |
|
"loss": 0.0797, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.3953731060028076, |
|
"rewards/margins": 5.006608963012695, |
|
"rewards/rejected": -3.611236095428467, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.28051391862955e-08, |
|
"logits/chosen": 1.765631914138794, |
|
"logits/rejected": 2.9511940479278564, |
|
"logps/chosen": -412.4698791503906, |
|
"logps/rejected": -378.50537109375, |
|
"loss": 0.0922, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.7086801528930664, |
|
"rewards/margins": 5.677321434020996, |
|
"rewards/rejected": -3.968641757965088, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.387580299785867e-08, |
|
"logits/chosen": 1.7808834314346313, |
|
"logits/rejected": 2.730713367462158, |
|
"logps/chosen": -375.7123718261719, |
|
"logps/rejected": -352.998046875, |
|
"loss": 0.0794, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.92323899269104, |
|
"rewards/margins": 5.590106010437012, |
|
"rewards/rejected": -3.6668670177459717, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.494646680942184e-08, |
|
"logits/chosen": 1.7647113800048828, |
|
"logits/rejected": 2.6917636394500732, |
|
"logps/chosen": -393.0909729003906, |
|
"logps/rejected": -375.1470947265625, |
|
"loss": 0.0754, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.228740930557251, |
|
"rewards/margins": 6.255539417266846, |
|
"rewards/rejected": -4.026798248291016, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_logits/chosen": 1.1851357221603394, |
|
"eval_logits/rejected": 1.7113410234451294, |
|
"eval_logps/chosen": -382.10595703125, |
|
"eval_logps/rejected": -341.21160888671875, |
|
"eval_loss": 0.07382317632436752, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": 1.7235567569732666, |
|
"eval_rewards/margins": 5.283637523651123, |
|
"eval_rewards/rejected": -3.5600812435150146, |
|
"eval_runtime": 78.3559, |
|
"eval_samples_per_second": 12.762, |
|
"eval_steps_per_second": 0.408, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.601713062098501e-08, |
|
"logits/chosen": 1.8762671947479248, |
|
"logits/rejected": 2.7768630981445312, |
|
"logps/chosen": -403.41461181640625, |
|
"logps/rejected": -348.1267395019531, |
|
"loss": 0.0767, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.008270502090454, |
|
"rewards/margins": 5.45863151550293, |
|
"rewards/rejected": -3.4503607749938965, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.708779443254818e-08, |
|
"logits/chosen": 1.9685356616973877, |
|
"logits/rejected": 2.4259753227233887, |
|
"logps/chosen": -423.30694580078125, |
|
"logps/rejected": -355.1989440917969, |
|
"loss": 0.0738, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.1973319053649902, |
|
"rewards/margins": 5.911899566650391, |
|
"rewards/rejected": -3.7145678997039795, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.815845824411135e-08, |
|
"logits/chosen": 1.7616288661956787, |
|
"logits/rejected": 2.778398036956787, |
|
"logps/chosen": -415.79833984375, |
|
"logps/rejected": -365.4802551269531, |
|
"loss": 0.0838, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.6791164875030518, |
|
"rewards/margins": 5.412492275238037, |
|
"rewards/rejected": -3.7333762645721436, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.922912205567452e-08, |
|
"logits/chosen": 1.911268949508667, |
|
"logits/rejected": 2.5959761142730713, |
|
"logps/chosen": -399.43463134765625, |
|
"logps/rejected": -380.5097961425781, |
|
"loss": 0.0822, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.497511625289917, |
|
"rewards/margins": 5.510095596313477, |
|
"rewards/rejected": -4.0125837326049805, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.029978586723767e-08, |
|
"logits/chosen": 1.8361726999282837, |
|
"logits/rejected": 2.3821051120758057, |
|
"logps/chosen": -372.65618896484375, |
|
"logps/rejected": -381.7906799316406, |
|
"loss": 0.0662, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.844112753868103, |
|
"rewards/margins": 6.229226589202881, |
|
"rewards/rejected": -4.385113716125488, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.137044967880086e-08, |
|
"logits/chosen": 2.179152011871338, |
|
"logits/rejected": 2.5297486782073975, |
|
"logps/chosen": -396.2829284667969, |
|
"logps/rejected": -352.2908935546875, |
|
"loss": 0.0683, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.0879392623901367, |
|
"rewards/margins": 6.044869422912598, |
|
"rewards/rejected": -3.956930160522461, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.244111349036403e-08, |
|
"logits/chosen": 1.5910015106201172, |
|
"logits/rejected": 2.9595401287078857, |
|
"logps/chosen": -386.81573486328125, |
|
"logps/rejected": -350.80303955078125, |
|
"loss": 0.0697, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.7900558710098267, |
|
"rewards/margins": 5.987098693847656, |
|
"rewards/rejected": -4.197042942047119, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.351177730192718e-08, |
|
"logits/chosen": 1.8572914600372314, |
|
"logits/rejected": 2.7143654823303223, |
|
"logps/chosen": -416.63238525390625, |
|
"logps/rejected": -403.2386169433594, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.9369878768920898, |
|
"rewards/margins": 6.391732692718506, |
|
"rewards/rejected": -4.4547438621521, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.458244111349035e-08, |
|
"logits/chosen": 1.7877603769302368, |
|
"logits/rejected": 3.0778090953826904, |
|
"logps/chosen": -436.93121337890625, |
|
"logps/rejected": -375.30499267578125, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.7189620733261108, |
|
"rewards/margins": 6.110939979553223, |
|
"rewards/rejected": -4.391977787017822, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.565310492505354e-08, |
|
"logits/chosen": 1.8236795663833618, |
|
"logits/rejected": 3.053520441055298, |
|
"logps/chosen": -440.10711669921875, |
|
"logps/rejected": -371.2597351074219, |
|
"loss": 0.0748, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6093488931655884, |
|
"rewards/margins": 5.858896732330322, |
|
"rewards/rejected": -4.249547004699707, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_logits/chosen": 1.2858448028564453, |
|
"eval_logits/rejected": 1.8164113759994507, |
|
"eval_logps/chosen": -386.32391357421875, |
|
"eval_logps/rejected": -349.60479736328125, |
|
"eval_loss": 0.0711909607052803, |
|
"eval_rewards/accuracies": 0.9765625, |
|
"eval_rewards/chosen": 1.3017570972442627, |
|
"eval_rewards/margins": 5.701159477233887, |
|
"eval_rewards/rejected": -4.399402618408203, |
|
"eval_runtime": 78.5065, |
|
"eval_samples_per_second": 12.738, |
|
"eval_steps_per_second": 0.408, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.672376873661669e-08, |
|
"logits/chosen": 2.046654462814331, |
|
"logits/rejected": 2.875682830810547, |
|
"logps/chosen": -367.9713134765625, |
|
"logps/rejected": -358.0971984863281, |
|
"loss": 0.0733, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.6088149547576904, |
|
"rewards/margins": 6.157548427581787, |
|
"rewards/rejected": -4.548734188079834, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.779443254817986e-08, |
|
"logits/chosen": 2.2108118534088135, |
|
"logits/rejected": 2.811412811279297, |
|
"logps/chosen": -388.9371032714844, |
|
"logps/rejected": -359.3924255371094, |
|
"loss": 0.0554, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.500943899154663, |
|
"rewards/margins": 6.306944370269775, |
|
"rewards/rejected": -4.806000709533691, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.886509635974304e-08, |
|
"logits/chosen": 1.7628936767578125, |
|
"logits/rejected": 2.9087016582489014, |
|
"logps/chosen": -436.14886474609375, |
|
"logps/rejected": -375.5445251464844, |
|
"loss": 0.0587, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.7326514720916748, |
|
"rewards/margins": 6.393059730529785, |
|
"rewards/rejected": -4.660407543182373, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.99357601713062e-08, |
|
"logits/chosen": 1.880814552307129, |
|
"logits/rejected": 2.7161240577697754, |
|
"logps/chosen": -395.0550231933594, |
|
"logps/rejected": -363.26849365234375, |
|
"loss": 0.0616, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.268677830696106, |
|
"rewards/margins": 5.726746559143066, |
|
"rewards/rejected": -4.45806884765625, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.100642398286937e-08, |
|
"logits/chosen": 1.7003024816513062, |
|
"logits/rejected": 2.424133777618408, |
|
"logps/chosen": -389.845703125, |
|
"logps/rejected": -358.5777282714844, |
|
"loss": 0.0776, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.434398889541626, |
|
"rewards/margins": 5.904941082000732, |
|
"rewards/rejected": -4.470543384552002, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.207708779443254e-08, |
|
"logits/chosen": 1.8526960611343384, |
|
"logits/rejected": 2.986250638961792, |
|
"logps/chosen": -414.87744140625, |
|
"logps/rejected": -394.83477783203125, |
|
"loss": 0.0555, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.5953035354614258, |
|
"rewards/margins": 6.830922603607178, |
|
"rewards/rejected": -5.23561954498291, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.314775160599571e-08, |
|
"logits/chosen": 2.066636800765991, |
|
"logits/rejected": 2.8474645614624023, |
|
"logps/chosen": -422.1144104003906, |
|
"logps/rejected": -373.4290466308594, |
|
"loss": 0.059, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.4623820781707764, |
|
"rewards/margins": 6.585521697998047, |
|
"rewards/rejected": -5.123138904571533, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.421841541755888e-08, |
|
"logits/chosen": 2.3123459815979004, |
|
"logits/rejected": 2.8036293983459473, |
|
"logps/chosen": -407.44525146484375, |
|
"logps/rejected": -358.09771728515625, |
|
"loss": 0.0736, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.7242000102996826, |
|
"rewards/margins": 6.361567974090576, |
|
"rewards/rejected": -4.637368202209473, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.528907922912205e-08, |
|
"logits/chosen": 1.5694644451141357, |
|
"logits/rejected": 2.9323623180389404, |
|
"logps/chosen": -440.52264404296875, |
|
"logps/rejected": -416.00543212890625, |
|
"loss": 0.0513, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.229987621307373, |
|
"rewards/margins": 7.332463264465332, |
|
"rewards/rejected": -5.102475166320801, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.635974304068522e-08, |
|
"logits/chosen": 1.9838998317718506, |
|
"logits/rejected": 3.293696165084839, |
|
"logps/chosen": -377.60546875, |
|
"logps/rejected": -400.6976013183594, |
|
"loss": 0.0418, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.361082911491394, |
|
"rewards/margins": 7.032995700836182, |
|
"rewards/rejected": -5.6719136238098145, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": 1.431064486503601, |
|
"eval_logits/rejected": 1.926888108253479, |
|
"eval_logps/chosen": -389.20965576171875, |
|
"eval_logps/rejected": -357.00994873046875, |
|
"eval_loss": 0.06582893431186676, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": 1.0131824016571045, |
|
"eval_rewards/margins": 6.153097629547119, |
|
"eval_rewards/rejected": -5.1399149894714355, |
|
"eval_runtime": 78.1749, |
|
"eval_samples_per_second": 12.792, |
|
"eval_steps_per_second": 0.409, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.743040685224839e-08, |
|
"logits/chosen": 2.3193790912628174, |
|
"logits/rejected": 2.9782004356384277, |
|
"logps/chosen": -355.58978271484375, |
|
"logps/rejected": -374.8011779785156, |
|
"loss": 0.0504, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.7792609930038452, |
|
"rewards/margins": 6.809684753417969, |
|
"rewards/rejected": -5.030424118041992, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.850107066381156e-08, |
|
"logits/chosen": 1.8106858730316162, |
|
"logits/rejected": 2.700303316116333, |
|
"logps/chosen": -436.91912841796875, |
|
"logps/rejected": -376.2388610839844, |
|
"loss": 0.0524, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.020312786102295, |
|
"rewards/margins": 7.166808128356934, |
|
"rewards/rejected": -5.146495342254639, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.957173447537473e-08, |
|
"logits/chosen": 1.956599235534668, |
|
"logits/rejected": 2.98834490776062, |
|
"logps/chosen": -432.9703063964844, |
|
"logps/rejected": -390.3727722167969, |
|
"loss": 0.0802, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.9984395503997803, |
|
"rewards/margins": 7.112124443054199, |
|
"rewards/rejected": -5.11368465423584, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.992858843132586e-08, |
|
"logits/chosen": 2.160102605819702, |
|
"logits/rejected": 2.821474313735962, |
|
"logps/chosen": -424.42962646484375, |
|
"logps/rejected": -402.60235595703125, |
|
"loss": 0.059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0432326793670654, |
|
"rewards/margins": 7.653304100036621, |
|
"rewards/rejected": -5.610072135925293, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.980956915020233e-08, |
|
"logits/chosen": 1.941819190979004, |
|
"logits/rejected": 3.0604381561279297, |
|
"logps/chosen": -397.3748474121094, |
|
"logps/rejected": -423.2496643066406, |
|
"loss": 0.0557, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.0661425590515137, |
|
"rewards/margins": 7.666254997253418, |
|
"rewards/rejected": -5.600112438201904, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.969054986907879e-08, |
|
"logits/chosen": 1.859452247619629, |
|
"logits/rejected": 3.1880416870117188, |
|
"logps/chosen": -423.7708435058594, |
|
"logps/rejected": -383.41259765625, |
|
"loss": 0.0419, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7157636880874634, |
|
"rewards/margins": 7.163266181945801, |
|
"rewards/rejected": -5.447502136230469, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.957153058795524e-08, |
|
"logits/chosen": 1.755894660949707, |
|
"logits/rejected": 2.667271852493286, |
|
"logps/chosen": -405.3694763183594, |
|
"logps/rejected": -380.8141784667969, |
|
"loss": 0.0777, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.459249496459961, |
|
"rewards/margins": 6.949099540710449, |
|
"rewards/rejected": -5.489850044250488, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.94525113068317e-08, |
|
"logits/chosen": 1.9811556339263916, |
|
"logits/rejected": 3.005946397781372, |
|
"logps/chosen": -364.09521484375, |
|
"logps/rejected": -364.8800048828125, |
|
"loss": 0.0533, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.8379303216934204, |
|
"rewards/margins": 6.930338382720947, |
|
"rewards/rejected": -5.092407703399658, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.933349202570817e-08, |
|
"logits/chosen": 1.7156349420547485, |
|
"logits/rejected": 2.972471237182617, |
|
"logps/chosen": -424.38519287109375, |
|
"logps/rejected": -384.4205627441406, |
|
"loss": 0.0545, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.169966459274292, |
|
"rewards/margins": 7.766200065612793, |
|
"rewards/rejected": -5.596234321594238, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.921447274458463e-08, |
|
"logits/chosen": 1.65463387966156, |
|
"logits/rejected": 3.0111804008483887, |
|
"logps/chosen": -466.137451171875, |
|
"logps/rejected": -387.94317626953125, |
|
"loss": 0.0597, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.5395125150680542, |
|
"rewards/margins": 7.153542518615723, |
|
"rewards/rejected": -5.614029884338379, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": 1.426473617553711, |
|
"eval_logits/rejected": 1.9225867986679077, |
|
"eval_logps/chosen": -391.24505615234375, |
|
"eval_logps/rejected": -361.6107482910156, |
|
"eval_loss": 0.06333575397729874, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": 0.8096399307250977, |
|
"eval_rewards/margins": 6.409637928009033, |
|
"eval_rewards/rejected": -5.5999979972839355, |
|
"eval_runtime": 78.1719, |
|
"eval_samples_per_second": 12.792, |
|
"eval_steps_per_second": 0.409, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.909545346346108e-08, |
|
"logits/chosen": 2.0751636028289795, |
|
"logits/rejected": 2.8259801864624023, |
|
"logps/chosen": -391.50396728515625, |
|
"logps/rejected": -390.41680908203125, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.0614975690841675, |
|
"rewards/margins": 6.492199897766113, |
|
"rewards/rejected": -5.430701732635498, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.897643418233753e-08, |
|
"logits/chosen": 1.8207337856292725, |
|
"logits/rejected": 2.810199737548828, |
|
"logps/chosen": -412.99658203125, |
|
"logps/rejected": -401.36126708984375, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.5562200546264648, |
|
"rewards/margins": 8.119148254394531, |
|
"rewards/rejected": -6.562928199768066, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.885741490121398e-08, |
|
"logits/chosen": 2.1346724033355713, |
|
"logits/rejected": 2.99312424659729, |
|
"logps/chosen": -427.60601806640625, |
|
"logps/rejected": -379.160400390625, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.149390697479248, |
|
"rewards/margins": 7.403092384338379, |
|
"rewards/rejected": -5.253701210021973, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.873839562009045e-08, |
|
"logits/chosen": 1.81674063205719, |
|
"logits/rejected": 3.14642071723938, |
|
"logps/chosen": -410.53436279296875, |
|
"logps/rejected": -422.78790283203125, |
|
"loss": 0.0625, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.664518117904663, |
|
"rewards/margins": 7.533532619476318, |
|
"rewards/rejected": -5.869014739990234, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.861937633896691e-08, |
|
"logits/chosen": 2.1859567165374756, |
|
"logits/rejected": 2.774512767791748, |
|
"logps/chosen": -371.1358642578125, |
|
"logps/rejected": -376.1512451171875, |
|
"loss": 0.0397, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.8732522130012512, |
|
"rewards/margins": 6.792318820953369, |
|
"rewards/rejected": -5.919065952301025, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.850035705784336e-08, |
|
"logits/chosen": 2.508104085922241, |
|
"logits/rejected": 2.7455363273620605, |
|
"logps/chosen": -438.8089294433594, |
|
"logps/rejected": -426.82464599609375, |
|
"loss": 0.0433, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.799186110496521, |
|
"rewards/margins": 8.152082443237305, |
|
"rewards/rejected": -6.352896690368652, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.838133777671982e-08, |
|
"logits/chosen": 2.162297010421753, |
|
"logits/rejected": 2.9005820751190186, |
|
"logps/chosen": -393.8660583496094, |
|
"logps/rejected": -402.96649169921875, |
|
"loss": 0.0523, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.1076323986053467, |
|
"rewards/margins": 7.283668518066406, |
|
"rewards/rejected": -6.1760358810424805, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.826231849559629e-08, |
|
"logits/chosen": 2.274879217147827, |
|
"logits/rejected": 2.933121681213379, |
|
"logps/chosen": -436.8518981933594, |
|
"logps/rejected": -405.71246337890625, |
|
"loss": 0.0622, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.486023187637329, |
|
"rewards/margins": 7.599495887756348, |
|
"rewards/rejected": -6.1134724617004395, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.814329921447275e-08, |
|
"logits/chosen": 2.3372159004211426, |
|
"logits/rejected": 2.4765231609344482, |
|
"logps/chosen": -394.52398681640625, |
|
"logps/rejected": -383.6842956542969, |
|
"loss": 0.047, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.5595790147781372, |
|
"rewards/margins": 7.81160831451416, |
|
"rewards/rejected": -6.2520294189453125, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.80242799333492e-08, |
|
"logits/chosen": 2.076772689819336, |
|
"logits/rejected": 3.134021282196045, |
|
"logps/chosen": -424.93914794921875, |
|
"logps/rejected": -395.6697998046875, |
|
"loss": 0.045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.405705451965332, |
|
"rewards/margins": 7.619529724121094, |
|
"rewards/rejected": -6.213824272155762, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_logits/chosen": 1.5631608963012695, |
|
"eval_logits/rejected": 2.0626633167266846, |
|
"eval_logps/chosen": -392.5325012207031, |
|
"eval_logps/rejected": -367.3223876953125, |
|
"eval_loss": 0.060555677860975266, |
|
"eval_rewards/accuracies": 0.9609375, |
|
"eval_rewards/chosen": 0.6809001564979553, |
|
"eval_rewards/margins": 6.852060317993164, |
|
"eval_rewards/rejected": -6.171159744262695, |
|
"eval_runtime": 78.2923, |
|
"eval_samples_per_second": 12.773, |
|
"eval_steps_per_second": 0.409, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.790526065222565e-08, |
|
"logits/chosen": 2.313490629196167, |
|
"logits/rejected": 2.873136281967163, |
|
"logps/chosen": -429.7516174316406, |
|
"logps/rejected": -405.4490051269531, |
|
"loss": 0.0419, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.5168659687042236, |
|
"rewards/margins": 7.972109794616699, |
|
"rewards/rejected": -6.455244541168213, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.778624137110211e-08, |
|
"logits/chosen": 2.1459672451019287, |
|
"logits/rejected": 2.9044458866119385, |
|
"logps/chosen": -420.87744140625, |
|
"logps/rejected": -377.68896484375, |
|
"loss": 0.0336, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6729495525360107, |
|
"rewards/margins": 7.566412925720215, |
|
"rewards/rejected": -5.893463134765625, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.766722208997857e-08, |
|
"logits/chosen": 2.3557143211364746, |
|
"logits/rejected": 2.725691318511963, |
|
"logps/chosen": -405.68109130859375, |
|
"logps/rejected": -419.1329040527344, |
|
"loss": 0.0552, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.7717654705047607, |
|
"rewards/margins": 8.059925079345703, |
|
"rewards/rejected": -6.288159370422363, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.754820280885503e-08, |
|
"logits/chosen": 2.216296672821045, |
|
"logits/rejected": 2.695742130279541, |
|
"logps/chosen": -415.5310974121094, |
|
"logps/rejected": -400.45263671875, |
|
"loss": 0.0422, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.6496362686157227, |
|
"rewards/margins": 7.936199188232422, |
|
"rewards/rejected": -6.286562919616699, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.742918352773148e-08, |
|
"logits/chosen": 2.093048572540283, |
|
"logits/rejected": 2.7978243827819824, |
|
"logps/chosen": -450.3804626464844, |
|
"logps/rejected": -412.62908935546875, |
|
"loss": 0.0573, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.3106613159179688, |
|
"rewards/margins": 8.593067169189453, |
|
"rewards/rejected": -6.282405853271484, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.731016424660795e-08, |
|
"logits/chosen": 2.472761869430542, |
|
"logits/rejected": 3.0465588569641113, |
|
"logps/chosen": -426.41412353515625, |
|
"logps/rejected": -407.08612060546875, |
|
"loss": 0.0443, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.5518306493759155, |
|
"rewards/margins": 8.157126426696777, |
|
"rewards/rejected": -6.6052961349487305, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.719114496548441e-08, |
|
"logits/chosen": 1.941318154335022, |
|
"logits/rejected": 3.335367202758789, |
|
"logps/chosen": -418.33428955078125, |
|
"logps/rejected": -403.39337158203125, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.0809710025787354, |
|
"rewards/margins": 8.985780715942383, |
|
"rewards/rejected": -6.90480899810791, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.707212568436087e-08, |
|
"logits/chosen": 2.241579532623291, |
|
"logits/rejected": 2.8625073432922363, |
|
"logps/chosen": -408.77178955078125, |
|
"logps/rejected": -389.53778076171875, |
|
"loss": 0.0384, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.2259656190872192, |
|
"rewards/margins": 8.003189086914062, |
|
"rewards/rejected": -6.777223110198975, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.695310640323732e-08, |
|
"logits/chosen": 2.914790630340576, |
|
"logits/rejected": 2.8142731189727783, |
|
"logps/chosen": -387.0242614746094, |
|
"logps/rejected": -399.86749267578125, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.323935627937317, |
|
"rewards/margins": 8.005804061889648, |
|
"rewards/rejected": -6.681868553161621, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.683408712211378e-08, |
|
"logits/chosen": 2.1048951148986816, |
|
"logits/rejected": 2.841226816177368, |
|
"logps/chosen": -451.27459716796875, |
|
"logps/rejected": -403.28240966796875, |
|
"loss": 0.0669, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.7710305452346802, |
|
"rewards/margins": 7.861186981201172, |
|
"rewards/rejected": -7.090156555175781, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_logits/chosen": 1.5480470657348633, |
|
"eval_logits/rejected": 2.058847188949585, |
|
"eval_logps/chosen": -392.0873718261719, |
|
"eval_logps/rejected": -371.0377197265625, |
|
"eval_loss": 0.06311403959989548, |
|
"eval_rewards/accuracies": 0.9609375, |
|
"eval_rewards/chosen": 0.7254116535186768, |
|
"eval_rewards/margins": 7.268110275268555, |
|
"eval_rewards/rejected": -6.542698860168457, |
|
"eval_runtime": 78.0374, |
|
"eval_samples_per_second": 12.814, |
|
"eval_steps_per_second": 0.41, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.671506784099024e-08, |
|
"logits/chosen": 2.5204672813415527, |
|
"logits/rejected": 3.2195708751678467, |
|
"logps/chosen": -350.8345031738281, |
|
"logps/rejected": -381.27569580078125, |
|
"loss": 0.0321, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.312830924987793, |
|
"rewards/margins": 8.153268814086914, |
|
"rewards/rejected": -6.840437889099121, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.659604855986669e-08, |
|
"logits/chosen": 2.0493383407592773, |
|
"logits/rejected": 2.925226926803589, |
|
"logps/chosen": -462.79638671875, |
|
"logps/rejected": -402.0813903808594, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.8163875341415405, |
|
"rewards/margins": 8.08704948425293, |
|
"rewards/rejected": -6.270661354064941, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.647702927874315e-08, |
|
"logits/chosen": 2.3467869758605957, |
|
"logits/rejected": 2.963789463043213, |
|
"logps/chosen": -448.3075256347656, |
|
"logps/rejected": -411.7268981933594, |
|
"loss": 0.0308, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.1376395225524902, |
|
"rewards/margins": 8.935708999633789, |
|
"rewards/rejected": -6.798068046569824, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.63580099976196e-08, |
|
"logits/chosen": 2.0899271965026855, |
|
"logits/rejected": 3.0291128158569336, |
|
"logps/chosen": -395.5885009765625, |
|
"logps/rejected": -395.0786437988281, |
|
"loss": 0.0437, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.3425300121307373, |
|
"rewards/margins": 7.873586177825928, |
|
"rewards/rejected": -6.5310564041137695, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.623899071649607e-08, |
|
"logits/chosen": 2.6475844383239746, |
|
"logits/rejected": 3.3692593574523926, |
|
"logps/chosen": -355.3011169433594, |
|
"logps/rejected": -404.4001770019531, |
|
"loss": 0.036, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.1569859981536865, |
|
"rewards/margins": 8.337442398071289, |
|
"rewards/rejected": -6.18045711517334, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.611997143537253e-08, |
|
"logits/chosen": 2.364384412765503, |
|
"logits/rejected": 2.7631328105926514, |
|
"logps/chosen": -441.07391357421875, |
|
"logps/rejected": -394.32122802734375, |
|
"loss": 0.0434, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.301563024520874, |
|
"rewards/margins": 8.577180862426758, |
|
"rewards/rejected": -6.275616645812988, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.600095215424899e-08, |
|
"logits/chosen": 2.9918723106384277, |
|
"logits/rejected": 3.4367504119873047, |
|
"logps/chosen": -358.42230224609375, |
|
"logps/rejected": -392.7529296875, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.5515286922454834, |
|
"rewards/margins": 8.478203773498535, |
|
"rewards/rejected": -6.926675319671631, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.588193287312544e-08, |
|
"logits/chosen": 2.2631287574768066, |
|
"logits/rejected": 3.006317138671875, |
|
"logps/chosen": -417.509033203125, |
|
"logps/rejected": -430.56353759765625, |
|
"loss": 0.0396, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.066652536392212, |
|
"rewards/margins": 8.158655166625977, |
|
"rewards/rejected": -7.092002868652344, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.57629135920019e-08, |
|
"logits/chosen": 2.1602940559387207, |
|
"logits/rejected": 3.0440659523010254, |
|
"logps/chosen": -381.5716247558594, |
|
"logps/rejected": -442.3666076660156, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.255479335784912, |
|
"rewards/margins": 10.695677757263184, |
|
"rewards/rejected": -9.440199851989746, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.564389431087836e-08, |
|
"logits/chosen": 2.3889143466949463, |
|
"logits/rejected": 3.3642821311950684, |
|
"logps/chosen": -386.8828430175781, |
|
"logps/rejected": -414.80157470703125, |
|
"loss": 0.0349, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5850082635879517, |
|
"rewards/margins": 7.8508620262146, |
|
"rewards/rejected": -7.265854835510254, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 1.5706590414047241, |
|
"eval_logits/rejected": 2.0521459579467773, |
|
"eval_logps/chosen": -390.2462158203125, |
|
"eval_logps/rejected": -372.8173522949219, |
|
"eval_loss": 0.05391751974821091, |
|
"eval_rewards/accuracies": 0.9609375, |
|
"eval_rewards/chosen": 0.9095280170440674, |
|
"eval_rewards/margins": 7.630187034606934, |
|
"eval_rewards/rejected": -6.720658302307129, |
|
"eval_runtime": 77.9662, |
|
"eval_samples_per_second": 12.826, |
|
"eval_steps_per_second": 0.41, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.552487502975481e-08, |
|
"logits/chosen": 2.109424114227295, |
|
"logits/rejected": 3.372615098953247, |
|
"logps/chosen": -425.7452087402344, |
|
"logps/rejected": -420.9171447753906, |
|
"loss": 0.0561, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.8334258794784546, |
|
"rewards/margins": 8.975770950317383, |
|
"rewards/rejected": -7.142345428466797, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.540585574863127e-08, |
|
"logits/chosen": 1.9445127248764038, |
|
"logits/rejected": 2.690776824951172, |
|
"logps/chosen": -418.9251403808594, |
|
"logps/rejected": -406.52813720703125, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2240526676177979, |
|
"rewards/margins": 8.488465309143066, |
|
"rewards/rejected": -7.264412879943848, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.528683646750774e-08, |
|
"logits/chosen": 2.327258586883545, |
|
"logits/rejected": 3.642822265625, |
|
"logps/chosen": -422.3324279785156, |
|
"logps/rejected": -427.5508728027344, |
|
"loss": 0.0439, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.9490644335746765, |
|
"rewards/margins": 8.496426582336426, |
|
"rewards/rejected": -7.547361850738525, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.51678171863842e-08, |
|
"logits/chosen": 2.0918898582458496, |
|
"logits/rejected": 3.0718982219696045, |
|
"logps/chosen": -451.8070373535156, |
|
"logps/rejected": -396.4839782714844, |
|
"loss": 0.0282, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.8088127374649048, |
|
"rewards/margins": 8.987968444824219, |
|
"rewards/rejected": -7.179154872894287, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.504879790526065e-08, |
|
"logits/chosen": 2.251584529876709, |
|
"logits/rejected": 2.643411636352539, |
|
"logps/chosen": -390.68804931640625, |
|
"logps/rejected": -401.1186218261719, |
|
"loss": 0.0389, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.3335015773773193, |
|
"rewards/margins": 8.997198104858398, |
|
"rewards/rejected": -7.663697242736816, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.49297786241371e-08, |
|
"logits/chosen": 2.6715595722198486, |
|
"logits/rejected": 2.812282085418701, |
|
"logps/chosen": -398.098388671875, |
|
"logps/rejected": -429.861083984375, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4172677993774414, |
|
"rewards/margins": 9.3431396484375, |
|
"rewards/rejected": -7.925871849060059, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.481075934301356e-08, |
|
"logits/chosen": 2.6521875858306885, |
|
"logits/rejected": 3.3430676460266113, |
|
"logps/chosen": -384.2088928222656, |
|
"logps/rejected": -394.98565673828125, |
|
"loss": 0.0281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5810503959655762, |
|
"rewards/margins": 8.41893482208252, |
|
"rewards/rejected": -6.837882995605469, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.469174006189002e-08, |
|
"logits/chosen": 1.8557714223861694, |
|
"logits/rejected": 2.911968946456909, |
|
"logps/chosen": -493.2205505371094, |
|
"logps/rejected": -419.2433166503906, |
|
"loss": 0.038, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.404146432876587, |
|
"rewards/margins": 9.705537796020508, |
|
"rewards/rejected": -7.3013916015625, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.457272078076648e-08, |
|
"logits/chosen": 2.248764991760254, |
|
"logits/rejected": 2.723816394805908, |
|
"logps/chosen": -448.24749755859375, |
|
"logps/rejected": -399.8122253417969, |
|
"loss": 0.0282, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.05086612701416, |
|
"rewards/margins": 8.73229694366455, |
|
"rewards/rejected": -6.681430816650391, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.445370149964293e-08, |
|
"logits/chosen": 2.173478603363037, |
|
"logits/rejected": 3.2225327491760254, |
|
"logps/chosen": -396.5003356933594, |
|
"logps/rejected": -385.8442687988281, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8472175598144531, |
|
"rewards/margins": 8.179679870605469, |
|
"rewards/rejected": -6.332461357116699, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_logits/chosen": 1.612717866897583, |
|
"eval_logits/rejected": 2.088186264038086, |
|
"eval_logps/chosen": -388.5294189453125, |
|
"eval_logps/rejected": -372.8053894042969, |
|
"eval_loss": 0.05290338769555092, |
|
"eval_rewards/accuracies": 0.9609375, |
|
"eval_rewards/chosen": 1.0812093019485474, |
|
"eval_rewards/margins": 7.80067253112793, |
|
"eval_rewards/rejected": -6.719463348388672, |
|
"eval_runtime": 77.9512, |
|
"eval_samples_per_second": 12.829, |
|
"eval_steps_per_second": 0.411, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.43346822185194e-08, |
|
"logits/chosen": 1.9332910776138306, |
|
"logits/rejected": 3.120542526245117, |
|
"logps/chosen": -405.4175109863281, |
|
"logps/rejected": -406.57781982421875, |
|
"loss": 0.0388, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.5408118963241577, |
|
"rewards/margins": 8.476791381835938, |
|
"rewards/rejected": -6.935980319976807, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.421566293739586e-08, |
|
"logits/chosen": 2.6357414722442627, |
|
"logits/rejected": 2.7694199085235596, |
|
"logps/chosen": -403.54632568359375, |
|
"logps/rejected": -362.77557373046875, |
|
"loss": 0.05, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.2265160083770752, |
|
"rewards/margins": 7.877467155456543, |
|
"rewards/rejected": -6.650951385498047, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.409664365627231e-08, |
|
"logits/chosen": 2.213944673538208, |
|
"logits/rejected": 2.804633617401123, |
|
"logps/chosen": -411.54608154296875, |
|
"logps/rejected": -416.67498779296875, |
|
"loss": 0.0319, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.015861988067627, |
|
"rewards/margins": 9.614884376525879, |
|
"rewards/rejected": -7.599021911621094, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.397762437514877e-08, |
|
"logits/chosen": 2.344874620437622, |
|
"logits/rejected": 2.9800264835357666, |
|
"logps/chosen": -387.05206298828125, |
|
"logps/rejected": -409.482421875, |
|
"loss": 0.0328, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7274560928344727, |
|
"rewards/margins": 8.34837818145752, |
|
"rewards/rejected": -7.620922088623047, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.385860509402523e-08, |
|
"logits/chosen": 2.6229679584503174, |
|
"logits/rejected": 2.767507553100586, |
|
"logps/chosen": -400.82647705078125, |
|
"logps/rejected": -437.8872985839844, |
|
"loss": 0.0437, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.4043718576431274, |
|
"rewards/margins": 9.666748046875, |
|
"rewards/rejected": -8.26237678527832, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.373958581290168e-08, |
|
"logits/chosen": 1.9819673299789429, |
|
"logits/rejected": 3.2101433277130127, |
|
"logps/chosen": -395.5453186035156, |
|
"logps/rejected": -420.27020263671875, |
|
"loss": 0.0334, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1504669189453125, |
|
"rewards/margins": 9.256044387817383, |
|
"rewards/rejected": -8.105578422546387, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.362056653177814e-08, |
|
"logits/chosen": 2.1626055240631104, |
|
"logits/rejected": 2.6654152870178223, |
|
"logps/chosen": -443.01470947265625, |
|
"logps/rejected": -387.11956787109375, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.9894819259643555, |
|
"rewards/margins": 8.866507530212402, |
|
"rewards/rejected": -7.877026557922363, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.35015472506546e-08, |
|
"logits/chosen": 2.113184928894043, |
|
"logits/rejected": 2.9894702434539795, |
|
"logps/chosen": -426.97454833984375, |
|
"logps/rejected": -392.66937255859375, |
|
"loss": 0.0455, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.242422342300415, |
|
"rewards/margins": 8.285688400268555, |
|
"rewards/rejected": -7.043266296386719, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.338252796953105e-08, |
|
"logits/chosen": 2.179399013519287, |
|
"logits/rejected": 2.653637409210205, |
|
"logps/chosen": -417.0636291503906, |
|
"logps/rejected": -418.0546875, |
|
"loss": 0.0329, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5582334995269775, |
|
"rewards/margins": 9.643311500549316, |
|
"rewards/rejected": -8.085078239440918, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.326350868840752e-08, |
|
"logits/chosen": 2.342013120651245, |
|
"logits/rejected": 3.294114589691162, |
|
"logps/chosen": -410.2916564941406, |
|
"logps/rejected": -398.54022216796875, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9324003458023071, |
|
"rewards/margins": 8.479290008544922, |
|
"rewards/rejected": -7.5468902587890625, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": 1.6103559732437134, |
|
"eval_logits/rejected": 2.1083178520202637, |
|
"eval_logps/chosen": -395.0100402832031, |
|
"eval_logps/rejected": -380.8038330078125, |
|
"eval_loss": 0.05386331304907799, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": 0.4331449270248413, |
|
"eval_rewards/margins": 7.952449798583984, |
|
"eval_rewards/rejected": -7.519304275512695, |
|
"eval_runtime": 77.9615, |
|
"eval_samples_per_second": 12.827, |
|
"eval_steps_per_second": 0.41, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.314448940728398e-08, |
|
"logits/chosen": 1.7690677642822266, |
|
"logits/rejected": 3.269226551055908, |
|
"logps/chosen": -419.20867919921875, |
|
"logps/rejected": -420.2843322753906, |
|
"loss": 0.0332, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7724257707595825, |
|
"rewards/margins": 8.932862281799316, |
|
"rewards/rejected": -8.160436630249023, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.302547012616043e-08, |
|
"logits/chosen": 2.311052083969116, |
|
"logits/rejected": 2.7695891857147217, |
|
"logps/chosen": -420.0890197753906, |
|
"logps/rejected": -437.40863037109375, |
|
"loss": 0.0506, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.9803401827812195, |
|
"rewards/margins": 9.851530075073242, |
|
"rewards/rejected": -8.871191024780273, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.290645084503689e-08, |
|
"logits/chosen": 2.257835626602173, |
|
"logits/rejected": 2.8854148387908936, |
|
"logps/chosen": -388.00152587890625, |
|
"logps/rejected": -398.3310546875, |
|
"loss": 0.0312, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.7348248362541199, |
|
"rewards/margins": 8.951128959655762, |
|
"rewards/rejected": -8.216302871704102, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.278743156391336e-08, |
|
"logits/chosen": 1.8644654750823975, |
|
"logits/rejected": 3.318366527557373, |
|
"logps/chosen": -398.03961181640625, |
|
"logps/rejected": -404.6298522949219, |
|
"loss": 0.0329, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.6025460958480835, |
|
"rewards/margins": 8.443681716918945, |
|
"rewards/rejected": -7.8411359786987305, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.26684122827898e-08, |
|
"logits/chosen": 2.1022090911865234, |
|
"logits/rejected": 3.08918833732605, |
|
"logps/chosen": -380.1417236328125, |
|
"logps/rejected": -418.6334533691406, |
|
"loss": 0.0319, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.10860241949558258, |
|
"rewards/margins": 8.798811912536621, |
|
"rewards/rejected": -8.9074125289917, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.254939300166626e-08, |
|
"logits/chosen": 1.9660978317260742, |
|
"logits/rejected": 3.0992188453674316, |
|
"logps/chosen": -432.70379638671875, |
|
"logps/rejected": -398.1070861816406, |
|
"loss": 0.0321, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.7886090874671936, |
|
"rewards/margins": 9.03126335144043, |
|
"rewards/rejected": -8.242653846740723, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.243037372054272e-08, |
|
"logits/chosen": 1.8959643840789795, |
|
"logits/rejected": 3.006300687789917, |
|
"logps/chosen": -485.46435546875, |
|
"logps/rejected": -439.8067321777344, |
|
"loss": 0.0181, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.288769006729126, |
|
"rewards/margins": 10.527647018432617, |
|
"rewards/rejected": -9.238879203796387, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.231135443941919e-08, |
|
"logits/chosen": 2.1207528114318848, |
|
"logits/rejected": 3.0929980278015137, |
|
"logps/chosen": -427.466796875, |
|
"logps/rejected": -432.8934631347656, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.8015705943107605, |
|
"rewards/margins": 10.440237998962402, |
|
"rewards/rejected": -9.638667106628418, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.219233515829564e-08, |
|
"logits/chosen": 2.253310441970825, |
|
"logits/rejected": 3.08237886428833, |
|
"logps/chosen": -423.25775146484375, |
|
"logps/rejected": -393.71966552734375, |
|
"loss": 0.0153, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.1011648178100586, |
|
"rewards/margins": 8.963667869567871, |
|
"rewards/rejected": -7.8625030517578125, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.20733158771721e-08, |
|
"logits/chosen": 1.8514906167984009, |
|
"logits/rejected": 2.837451934814453, |
|
"logps/chosen": -476.034912109375, |
|
"logps/rejected": -431.28369140625, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7295993566513062, |
|
"rewards/margins": 10.104246139526367, |
|
"rewards/rejected": -8.37464714050293, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_logits/chosen": 1.5608395338058472, |
|
"eval_logits/rejected": 2.154280662536621, |
|
"eval_logps/chosen": -400.8282470703125, |
|
"eval_logps/rejected": -387.81903076171875, |
|
"eval_loss": 0.05462770164012909, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": -0.1486767828464508, |
|
"eval_rewards/margins": 8.072151184082031, |
|
"eval_rewards/rejected": -8.22082805633545, |
|
"eval_runtime": 78.122, |
|
"eval_samples_per_second": 12.8, |
|
"eval_steps_per_second": 0.41, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.195429659604855e-08, |
|
"logits/chosen": 2.027790069580078, |
|
"logits/rejected": 3.0657553672790527, |
|
"logps/chosen": -432.85369873046875, |
|
"logps/rejected": -412.8692321777344, |
|
"loss": 0.0163, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8429557681083679, |
|
"rewards/margins": 9.376811027526855, |
|
"rewards/rejected": -8.533855438232422, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.183527731492501e-08, |
|
"logits/chosen": 2.6315040588378906, |
|
"logits/rejected": 3.4493613243103027, |
|
"logps/chosen": -425.31756591796875, |
|
"logps/rejected": -431.07989501953125, |
|
"loss": 0.0162, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8139169812202454, |
|
"rewards/margins": 9.736674308776855, |
|
"rewards/rejected": -8.922757148742676, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.171625803380148e-08, |
|
"logits/chosen": 2.329026699066162, |
|
"logits/rejected": 3.3126883506774902, |
|
"logps/chosen": -426.11114501953125, |
|
"logps/rejected": -430.76934814453125, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6953893899917603, |
|
"rewards/margins": 10.516650199890137, |
|
"rewards/rejected": -8.821261405944824, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.159723875267794e-08, |
|
"logits/chosen": 2.058751106262207, |
|
"logits/rejected": 2.7939858436584473, |
|
"logps/chosen": -388.2889099121094, |
|
"logps/rejected": -386.5802307128906, |
|
"loss": 0.0199, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.9611308574676514, |
|
"rewards/margins": 9.938085556030273, |
|
"rewards/rejected": -7.976954460144043, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.147821947155438e-08, |
|
"logits/chosen": 2.1123156547546387, |
|
"logits/rejected": 3.6038296222686768, |
|
"logps/chosen": -393.5766296386719, |
|
"logps/rejected": -402.1626892089844, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.8590052723884583, |
|
"rewards/margins": 9.311236381530762, |
|
"rewards/rejected": -8.452230453491211, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.135920019043084e-08, |
|
"logits/chosen": 2.1106104850769043, |
|
"logits/rejected": 3.1583571434020996, |
|
"logps/chosen": -374.95001220703125, |
|
"logps/rejected": -392.5960998535156, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.768711268901825, |
|
"rewards/margins": 8.690801620483398, |
|
"rewards/rejected": -7.922091007232666, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.12401809093073e-08, |
|
"logits/chosen": 2.2228665351867676, |
|
"logits/rejected": 3.330571413040161, |
|
"logps/chosen": -443.146484375, |
|
"logps/rejected": -426.9916076660156, |
|
"loss": 0.0153, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.233867883682251, |
|
"rewards/margins": 10.038155555725098, |
|
"rewards/rejected": -8.804287910461426, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.112116162818376e-08, |
|
"logits/chosen": 2.2720413208007812, |
|
"logits/rejected": 3.6448235511779785, |
|
"logps/chosen": -428.63763427734375, |
|
"logps/rejected": -414.93768310546875, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4396426677703857, |
|
"rewards/margins": 9.752705574035645, |
|
"rewards/rejected": -8.31306266784668, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.100214234706022e-08, |
|
"logits/chosen": 2.2004613876342773, |
|
"logits/rejected": 3.101625919342041, |
|
"logps/chosen": -419.28143310546875, |
|
"logps/rejected": -431.95684814453125, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1783888339996338, |
|
"rewards/margins": 10.366841316223145, |
|
"rewards/rejected": -9.188451766967773, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.088312306593667e-08, |
|
"logits/chosen": 2.8064794540405273, |
|
"logits/rejected": 3.2689521312713623, |
|
"logps/chosen": -354.8829650878906, |
|
"logps/rejected": -406.4047546386719, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7891177535057068, |
|
"rewards/margins": 9.862220764160156, |
|
"rewards/rejected": -9.073102951049805, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_logits/chosen": 1.625260591506958, |
|
"eval_logits/rejected": 2.238248109817505, |
|
"eval_logps/chosen": -400.59375, |
|
"eval_logps/rejected": -390.6427917480469, |
|
"eval_loss": 0.057591091841459274, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": -0.12522682547569275, |
|
"eval_rewards/margins": 8.377971649169922, |
|
"eval_rewards/rejected": -8.503198623657227, |
|
"eval_runtime": 78.0334, |
|
"eval_samples_per_second": 12.815, |
|
"eval_steps_per_second": 0.41, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.076410378481314e-08, |
|
"logits/chosen": 1.9525015354156494, |
|
"logits/rejected": 3.1185505390167236, |
|
"logps/chosen": -422.75225830078125, |
|
"logps/rejected": -421.87445068359375, |
|
"loss": 0.013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9433252811431885, |
|
"rewards/margins": 10.827123641967773, |
|
"rewards/rejected": -8.883798599243164, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.06450845036896e-08, |
|
"logits/chosen": 2.2783799171447754, |
|
"logits/rejected": 3.1842360496520996, |
|
"logps/chosen": -418.703369140625, |
|
"logps/rejected": -418.055419921875, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2181668281555176, |
|
"rewards/margins": 9.945693016052246, |
|
"rewards/rejected": -8.727526664733887, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.052606522256606e-08, |
|
"logits/chosen": 2.0897390842437744, |
|
"logits/rejected": 3.332200288772583, |
|
"logps/chosen": -436.8134765625, |
|
"logps/rejected": -428.2256774902344, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.5979622006416321, |
|
"rewards/margins": 9.419112205505371, |
|
"rewards/rejected": -8.821150779724121, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.04070459414425e-08, |
|
"logits/chosen": 1.7566314935684204, |
|
"logits/rejected": 3.1516499519348145, |
|
"logps/chosen": -444.9219665527344, |
|
"logps/rejected": -409.5821228027344, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8818896412849426, |
|
"rewards/margins": 10.398978233337402, |
|
"rewards/rejected": -9.517088890075684, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.028802666031897e-08, |
|
"logits/chosen": 2.2249584197998047, |
|
"logits/rejected": 3.220370054244995, |
|
"logps/chosen": -373.8511962890625, |
|
"logps/rejected": -429.1431579589844, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.09718599170446396, |
|
"rewards/margins": 9.384894371032715, |
|
"rewards/rejected": -9.28770923614502, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.016900737919543e-08, |
|
"logits/chosen": 2.468928337097168, |
|
"logits/rejected": 3.5063624382019043, |
|
"logps/chosen": -375.80279541015625, |
|
"logps/rejected": -422.55450439453125, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.36610403656959534, |
|
"rewards/margins": 9.850809097290039, |
|
"rewards/rejected": -9.484704971313477, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.004998809807188e-08, |
|
"logits/chosen": 2.5144405364990234, |
|
"logits/rejected": 3.0172762870788574, |
|
"logps/chosen": -398.01959228515625, |
|
"logps/rejected": -412.0162048339844, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.71649569272995, |
|
"rewards/margins": 10.736013412475586, |
|
"rewards/rejected": -10.01951789855957, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.993096881694834e-08, |
|
"logits/chosen": 2.411824941635132, |
|
"logits/rejected": 3.4212913513183594, |
|
"logps/chosen": -441.16741943359375, |
|
"logps/rejected": -410.988525390625, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.526915967464447, |
|
"rewards/margins": 9.955665588378906, |
|
"rewards/rejected": -9.428749084472656, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.981194953582481e-08, |
|
"logits/chosen": 2.0398449897766113, |
|
"logits/rejected": 2.990245819091797, |
|
"logps/chosen": -424.50830078125, |
|
"logps/rejected": -440.29034423828125, |
|
"loss": 0.0161, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1279420852661133, |
|
"rewards/margins": 10.929909706115723, |
|
"rewards/rejected": -9.801966667175293, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.969293025470126e-08, |
|
"logits/chosen": 2.0134599208831787, |
|
"logits/rejected": 3.4384913444519043, |
|
"logps/chosen": -431.4124450683594, |
|
"logps/rejected": -422.57275390625, |
|
"loss": 0.0245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.387521505355835, |
|
"rewards/margins": 10.490163803100586, |
|
"rewards/rejected": -9.102643013000488, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_logits/chosen": 1.578616738319397, |
|
"eval_logits/rejected": 2.1524062156677246, |
|
"eval_logps/chosen": -396.13555908203125, |
|
"eval_logps/rejected": -386.849365234375, |
|
"eval_loss": 0.054019615054130554, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": 0.32059139013290405, |
|
"eval_rewards/margins": 8.444451332092285, |
|
"eval_rewards/rejected": -8.123859405517578, |
|
"eval_runtime": 78.1156, |
|
"eval_samples_per_second": 12.802, |
|
"eval_steps_per_second": 0.41, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.957391097357772e-08, |
|
"logits/chosen": 1.8505395650863647, |
|
"logits/rejected": 2.4895451068878174, |
|
"logps/chosen": -411.90081787109375, |
|
"logps/rejected": -446.5166931152344, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.4874722957611084, |
|
"rewards/margins": 10.799659729003906, |
|
"rewards/rejected": -9.312187194824219, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.945489169245418e-08, |
|
"logits/chosen": 2.4672083854675293, |
|
"logits/rejected": 3.3016533851623535, |
|
"logps/chosen": -407.24810791015625, |
|
"logps/rejected": -421.69207763671875, |
|
"loss": 0.0156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6584242582321167, |
|
"rewards/margins": 9.77189826965332, |
|
"rewards/rejected": -9.113473892211914, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.933587241133062e-08, |
|
"logits/chosen": 2.9424712657928467, |
|
"logits/rejected": 3.1963260173797607, |
|
"logps/chosen": -393.42730712890625, |
|
"logps/rejected": -405.26776123046875, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.141961693763733, |
|
"rewards/margins": 9.95053768157959, |
|
"rewards/rejected": -8.808575630187988, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.921685313020709e-08, |
|
"logits/chosen": 2.1705727577209473, |
|
"logits/rejected": 3.132054090499878, |
|
"logps/chosen": -458.5648498535156, |
|
"logps/rejected": -425.5223693847656, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.0647785663604736, |
|
"rewards/margins": 10.456083297729492, |
|
"rewards/rejected": -8.391304969787598, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.909783384908355e-08, |
|
"logits/chosen": 2.2793197631835938, |
|
"logits/rejected": 3.1851863861083984, |
|
"logps/chosen": -428.22161865234375, |
|
"logps/rejected": -393.50201416015625, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.3557255268096924, |
|
"rewards/margins": 9.879143714904785, |
|
"rewards/rejected": -8.523417472839355, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.897881456796e-08, |
|
"logits/chosen": 2.092120409011841, |
|
"logits/rejected": 3.1506097316741943, |
|
"logps/chosen": -423.780517578125, |
|
"logps/rejected": -435.0636291503906, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.041473150253296, |
|
"rewards/margins": 10.483617782592773, |
|
"rewards/rejected": -9.442144393920898, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.885979528683646e-08, |
|
"logits/chosen": 2.077341079711914, |
|
"logits/rejected": 2.840765953063965, |
|
"logps/chosen": -467.50335693359375, |
|
"logps/rejected": -450.4947814941406, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0977306365966797, |
|
"rewards/margins": 10.936107635498047, |
|
"rewards/rejected": -9.838376998901367, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.874077600571293e-08, |
|
"logits/chosen": 2.388977527618408, |
|
"logits/rejected": 3.17130970954895, |
|
"logps/chosen": -415.8170471191406, |
|
"logps/rejected": -431.8157653808594, |
|
"loss": 0.0172, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1165374517440796, |
|
"rewards/margins": 11.083638191223145, |
|
"rewards/rejected": -9.967100143432617, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.862175672458938e-08, |
|
"logits/chosen": 2.3494229316711426, |
|
"logits/rejected": 3.3899810314178467, |
|
"logps/chosen": -398.8847351074219, |
|
"logps/rejected": -420.1104431152344, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9555309414863586, |
|
"rewards/margins": 9.63708209991455, |
|
"rewards/rejected": -8.681550979614258, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.850273744346584e-08, |
|
"logits/chosen": 2.377115488052368, |
|
"logits/rejected": 3.1841914653778076, |
|
"logps/chosen": -384.47821044921875, |
|
"logps/rejected": -382.5137634277344, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.495150089263916, |
|
"rewards/margins": 9.905587196350098, |
|
"rewards/rejected": -8.410436630249023, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_logits/chosen": 1.5970555543899536, |
|
"eval_logits/rejected": 2.134828805923462, |
|
"eval_logps/chosen": -395.612060546875, |
|
"eval_logps/rejected": -388.520751953125, |
|
"eval_loss": 0.05577890947461128, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": 0.3729451596736908, |
|
"eval_rewards/margins": 8.663945198059082, |
|
"eval_rewards/rejected": -8.290999412536621, |
|
"eval_runtime": 78.1703, |
|
"eval_samples_per_second": 12.793, |
|
"eval_steps_per_second": 0.409, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 8.83837181623423e-08, |
|
"logits/chosen": 2.162808895111084, |
|
"logits/rejected": 2.938401937484741, |
|
"logps/chosen": -416.3312072753906, |
|
"logps/rejected": -431.4833984375, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4468110799789429, |
|
"rewards/margins": 11.13166332244873, |
|
"rewards/rejected": -9.684852600097656, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 8.826469888121875e-08, |
|
"logits/chosen": 1.8704240322113037, |
|
"logits/rejected": 3.454761028289795, |
|
"logps/chosen": -394.39312744140625, |
|
"logps/rejected": -435.0038146972656, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.1534335613250732, |
|
"rewards/margins": 10.062942504882812, |
|
"rewards/rejected": -8.909509658813477, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 8.814567960009521e-08, |
|
"logits/chosen": 2.3629403114318848, |
|
"logits/rejected": 3.146951675415039, |
|
"logps/chosen": -385.4207458496094, |
|
"logps/rejected": -421.41552734375, |
|
"loss": 0.0174, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5849992036819458, |
|
"rewards/margins": 10.956562995910645, |
|
"rewards/rejected": -9.371562957763672, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 8.802666031897167e-08, |
|
"logits/chosen": 1.9549896717071533, |
|
"logits/rejected": 3.260117769241333, |
|
"logps/chosen": -438.8719177246094, |
|
"logps/rejected": -423.6161193847656, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.8324927091598511, |
|
"rewards/margins": 9.532448768615723, |
|
"rewards/rejected": -8.699956893920898, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 8.790764103784812e-08, |
|
"logits/chosen": 2.376079797744751, |
|
"logits/rejected": 2.626861572265625, |
|
"logps/chosen": -420.97320556640625, |
|
"logps/rejected": -430.754638671875, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4511768817901611, |
|
"rewards/margins": 10.815112113952637, |
|
"rewards/rejected": -9.363935470581055, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.778862175672459e-08, |
|
"logits/chosen": 2.016657829284668, |
|
"logits/rejected": 3.148374557495117, |
|
"logps/chosen": -399.3986511230469, |
|
"logps/rejected": -409.0765075683594, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7621505260467529, |
|
"rewards/margins": 9.929964065551758, |
|
"rewards/rejected": -9.167813301086426, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 8.766960247560105e-08, |
|
"logits/chosen": 2.107110023498535, |
|
"logits/rejected": 2.8882648944854736, |
|
"logps/chosen": -467.67999267578125, |
|
"logps/rejected": -445.00054931640625, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5498336553573608, |
|
"rewards/margins": 11.310626983642578, |
|
"rewards/rejected": -9.760791778564453, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 8.75505831944775e-08, |
|
"logits/chosen": 2.1148109436035156, |
|
"logits/rejected": 2.921837568283081, |
|
"logps/chosen": -419.919677734375, |
|
"logps/rejected": -438.27392578125, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5840842723846436, |
|
"rewards/margins": 10.156820297241211, |
|
"rewards/rejected": -9.572736740112305, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.743156391335396e-08, |
|
"logits/chosen": 2.2187628746032715, |
|
"logits/rejected": 3.3866772651672363, |
|
"logps/chosen": -381.0819396972656, |
|
"logps/rejected": -417.1609802246094, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2810596525669098, |
|
"rewards/margins": 9.59456729888916, |
|
"rewards/rejected": -9.313508033752441, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 8.731254463223042e-08, |
|
"logits/chosen": 2.337141513824463, |
|
"logits/rejected": 2.984570026397705, |
|
"logps/chosen": -428.96307373046875, |
|
"logps/rejected": -433.2093811035156, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.118440866470337, |
|
"rewards/margins": 10.297574043273926, |
|
"rewards/rejected": -9.179132461547852, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_logits/chosen": 1.633168339729309, |
|
"eval_logits/rejected": 2.174915313720703, |
|
"eval_logps/chosen": -397.74981689453125, |
|
"eval_logps/rejected": -392.74847412109375, |
|
"eval_loss": 0.0574236661195755, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": 0.15916621685028076, |
|
"eval_rewards/margins": 8.872934341430664, |
|
"eval_rewards/rejected": -8.713767051696777, |
|
"eval_runtime": 77.9326, |
|
"eval_samples_per_second": 12.832, |
|
"eval_steps_per_second": 0.411, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 8.719352535110687e-08, |
|
"logits/chosen": 2.470761775970459, |
|
"logits/rejected": 3.466107130050659, |
|
"logps/chosen": -330.568115234375, |
|
"logps/rejected": -396.0406799316406, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5657675862312317, |
|
"rewards/margins": 9.64977741241455, |
|
"rewards/rejected": -9.084009170532227, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.707450606998333e-08, |
|
"logits/chosen": 2.0616042613983154, |
|
"logits/rejected": 3.1946635246276855, |
|
"logps/chosen": -374.26641845703125, |
|
"logps/rejected": -438.85565185546875, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9465324282646179, |
|
"rewards/margins": 10.824884414672852, |
|
"rewards/rejected": -9.8783540725708, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.695548678885979e-08, |
|
"logits/chosen": 2.382939100265503, |
|
"logits/rejected": 2.7525863647460938, |
|
"logps/chosen": -404.4330139160156, |
|
"logps/rejected": -403.4361572265625, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8075125813484192, |
|
"rewards/margins": 9.784753799438477, |
|
"rewards/rejected": -8.977242469787598, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.683646750773624e-08, |
|
"logits/chosen": 2.528764247894287, |
|
"logits/rejected": 3.0091147422790527, |
|
"logps/chosen": -448.67852783203125, |
|
"logps/rejected": -431.8081970214844, |
|
"loss": 0.0146, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1726857423782349, |
|
"rewards/margins": 11.344512939453125, |
|
"rewards/rejected": -10.17182731628418, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 8.671744822661271e-08, |
|
"logits/chosen": 2.7697949409484863, |
|
"logits/rejected": 3.7019259929656982, |
|
"logps/chosen": -388.6200256347656, |
|
"logps/rejected": -417.25152587890625, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.27654901146888733, |
|
"rewards/margins": 9.896730422973633, |
|
"rewards/rejected": -9.620182037353516, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 8.659842894548917e-08, |
|
"logits/chosen": 2.3201236724853516, |
|
"logits/rejected": 3.4561610221862793, |
|
"logps/chosen": -425.83770751953125, |
|
"logps/rejected": -425.7290954589844, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1046048402786255, |
|
"rewards/margins": 10.805675506591797, |
|
"rewards/rejected": -9.701070785522461, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 8.647940966436562e-08, |
|
"logits/chosen": 2.3291468620300293, |
|
"logits/rejected": 3.3310623168945312, |
|
"logps/chosen": -429.80987548828125, |
|
"logps/rejected": -432.98992919921875, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8949478268623352, |
|
"rewards/margins": 11.059711456298828, |
|
"rewards/rejected": -10.164762496948242, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 8.636039038324208e-08, |
|
"logits/chosen": 2.225471019744873, |
|
"logits/rejected": 3.3019492626190186, |
|
"logps/chosen": -387.1333923339844, |
|
"logps/rejected": -433.3799743652344, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5047739744186401, |
|
"rewards/margins": 12.810396194458008, |
|
"rewards/rejected": -12.305620193481445, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 8.624137110211854e-08, |
|
"logits/chosen": 2.510704517364502, |
|
"logits/rejected": 3.3782081604003906, |
|
"logps/chosen": -409.83270263671875, |
|
"logps/rejected": -438.6222229003906, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7321760654449463, |
|
"rewards/margins": 12.49064826965332, |
|
"rewards/rejected": -10.75847339630127, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 8.6122351820995e-08, |
|
"logits/chosen": 1.8151687383651733, |
|
"logits/rejected": 2.730388879776001, |
|
"logps/chosen": -369.6325378417969, |
|
"logps/rejected": -416.94476318359375, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7672537565231323, |
|
"rewards/margins": 10.405468940734863, |
|
"rewards/rejected": -9.638215065002441, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_logits/chosen": 1.7073872089385986, |
|
"eval_logits/rejected": 2.287001371383667, |
|
"eval_logps/chosen": -401.92828369140625, |
|
"eval_logps/rejected": -399.6173095703125, |
|
"eval_loss": 0.0646829605102539, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": -0.2586807608604431, |
|
"eval_rewards/margins": 9.141975402832031, |
|
"eval_rewards/rejected": -9.400656700134277, |
|
"eval_runtime": 77.8864, |
|
"eval_samples_per_second": 12.839, |
|
"eval_steps_per_second": 0.411, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 8.600333253987145e-08, |
|
"logits/chosen": 2.291734218597412, |
|
"logits/rejected": 3.170888662338257, |
|
"logps/chosen": -423.4766540527344, |
|
"logps/rejected": -427.091796875, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.495410680770874, |
|
"rewards/margins": 11.364627838134766, |
|
"rewards/rejected": -9.869218826293945, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 8.58843132587479e-08, |
|
"logits/chosen": 2.2076480388641357, |
|
"logits/rejected": 2.9256691932678223, |
|
"logps/chosen": -432.29248046875, |
|
"logps/rejected": -417.88055419921875, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4364680051803589, |
|
"rewards/margins": 11.401215553283691, |
|
"rewards/rejected": -9.964746475219727, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 8.576529397762438e-08, |
|
"logits/chosen": 2.705327272415161, |
|
"logits/rejected": 3.241112232208252, |
|
"logps/chosen": -381.64923095703125, |
|
"logps/rejected": -452.84588623046875, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8446656465530396, |
|
"rewards/margins": 11.237610816955566, |
|
"rewards/rejected": -10.392945289611816, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 8.564627469650083e-08, |
|
"logits/chosen": 2.1531457901000977, |
|
"logits/rejected": 3.033215284347534, |
|
"logps/chosen": -409.0959167480469, |
|
"logps/rejected": -468.179443359375, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9750633239746094, |
|
"rewards/margins": 11.535894393920898, |
|
"rewards/rejected": -10.560831069946289, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 8.552725541537729e-08, |
|
"logits/chosen": 2.3740274906158447, |
|
"logits/rejected": 3.0594983100891113, |
|
"logps/chosen": -408.73284912109375, |
|
"logps/rejected": -432.64154052734375, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.3420227766036987, |
|
"rewards/margins": 11.06401252746582, |
|
"rewards/rejected": -9.721988677978516, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 8.540823613425374e-08, |
|
"logits/chosen": 2.0250580310821533, |
|
"logits/rejected": 3.1984283924102783, |
|
"logps/chosen": -414.43865966796875, |
|
"logps/rejected": -414.484375, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4251967668533325, |
|
"rewards/margins": 11.274666786193848, |
|
"rewards/rejected": -9.849469184875488, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 8.528921685313021e-08, |
|
"logits/chosen": 2.57132625579834, |
|
"logits/rejected": 3.3775405883789062, |
|
"logps/chosen": -492.7275390625, |
|
"logps/rejected": -453.9833984375, |
|
"loss": 0.0129, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 2.017164707183838, |
|
"rewards/margins": 12.133844375610352, |
|
"rewards/rejected": -10.116679191589355, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 8.517019757200666e-08, |
|
"logits/chosen": 2.3586134910583496, |
|
"logits/rejected": 3.1494510173797607, |
|
"logps/chosen": -419.758056640625, |
|
"logps/rejected": -436.7438049316406, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5768086910247803, |
|
"rewards/margins": 11.310070037841797, |
|
"rewards/rejected": -9.733260154724121, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.505117829088311e-08, |
|
"logits/chosen": 2.436389684677124, |
|
"logits/rejected": 3.2424912452697754, |
|
"logps/chosen": -443.7950744628906, |
|
"logps/rejected": -466.5213317871094, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.40640783309936523, |
|
"rewards/margins": 10.672332763671875, |
|
"rewards/rejected": -10.265925407409668, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.493215900975957e-08, |
|
"logits/chosen": 2.098741054534912, |
|
"logits/rejected": 2.9514975547790527, |
|
"logps/chosen": -424.73052978515625, |
|
"logps/rejected": -460.8973083496094, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6627575159072876, |
|
"rewards/margins": 12.264575004577637, |
|
"rewards/rejected": -11.60181713104248, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_logits/chosen": 1.8804820775985718, |
|
"eval_logits/rejected": 2.3825998306274414, |
|
"eval_logps/chosen": -409.8846130371094, |
|
"eval_logps/rejected": -406.9892578125, |
|
"eval_loss": 0.06976839900016785, |
|
"eval_rewards/accuracies": 0.9609375, |
|
"eval_rewards/chosen": -1.0543094873428345, |
|
"eval_rewards/margins": 9.083538055419922, |
|
"eval_rewards/rejected": -10.137847900390625, |
|
"eval_runtime": 78.0306, |
|
"eval_samples_per_second": 12.815, |
|
"eval_steps_per_second": 0.41, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 8.481313972863604e-08, |
|
"logits/chosen": 2.7101387977600098, |
|
"logits/rejected": 3.5361340045928955, |
|
"logps/chosen": -386.73162841796875, |
|
"logps/rejected": -412.102783203125, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5167805552482605, |
|
"rewards/margins": 11.045450210571289, |
|
"rewards/rejected": -10.528669357299805, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 8.46941204475125e-08, |
|
"logits/chosen": 2.4018852710723877, |
|
"logits/rejected": 2.6615495681762695, |
|
"logps/chosen": -435.355712890625, |
|
"logps/rejected": -460.0689392089844, |
|
"loss": 0.0141, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7065870761871338, |
|
"rewards/margins": 11.625639915466309, |
|
"rewards/rejected": -9.919052124023438, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 8.457510116638895e-08, |
|
"logits/chosen": 2.355992078781128, |
|
"logits/rejected": 3.142991304397583, |
|
"logps/chosen": -436.5738830566406, |
|
"logps/rejected": -442.658447265625, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.008822202682495, |
|
"rewards/margins": 11.25963020324707, |
|
"rewards/rejected": -9.250809669494629, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.445608188526541e-08, |
|
"logits/chosen": 2.113661527633667, |
|
"logits/rejected": 3.376765727996826, |
|
"logps/chosen": -363.7850036621094, |
|
"logps/rejected": -417.59130859375, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0049993991851807, |
|
"rewards/margins": 10.82304573059082, |
|
"rewards/rejected": -9.818044662475586, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.433706260414186e-08, |
|
"logits/chosen": 2.2611918449401855, |
|
"logits/rejected": 2.977774143218994, |
|
"logps/chosen": -412.16259765625, |
|
"logps/rejected": -402.7505798339844, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.9179121255874634, |
|
"rewards/margins": 10.67682933807373, |
|
"rewards/rejected": -8.758916854858398, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.421804332301833e-08, |
|
"logits/chosen": 2.2166171073913574, |
|
"logits/rejected": 3.1910576820373535, |
|
"logps/chosen": -415.0889587402344, |
|
"logps/rejected": -410.3279724121094, |
|
"loss": 0.0128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.147801399230957, |
|
"rewards/margins": 11.321202278137207, |
|
"rewards/rejected": -9.17340087890625, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.409902404189478e-08, |
|
"logits/chosen": 2.3937222957611084, |
|
"logits/rejected": 3.467958450317383, |
|
"logps/chosen": -410.2650451660156, |
|
"logps/rejected": -424.83074951171875, |
|
"loss": 0.0172, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3514193296432495, |
|
"rewards/margins": 11.13292121887207, |
|
"rewards/rejected": -9.781502723693848, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 8.398000476077123e-08, |
|
"logits/chosen": 2.627056360244751, |
|
"logits/rejected": 2.9553661346435547, |
|
"logps/chosen": -422.7137145996094, |
|
"logps/rejected": -420.8788146972656, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.3633685111999512, |
|
"rewards/margins": 10.64666748046875, |
|
"rewards/rejected": -9.283299446105957, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 8.386098547964769e-08, |
|
"logits/chosen": 2.7881388664245605, |
|
"logits/rejected": 2.7855353355407715, |
|
"logps/chosen": -463.8809509277344, |
|
"logps/rejected": -449.48004150390625, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.032374382019043, |
|
"rewards/margins": 12.262084007263184, |
|
"rewards/rejected": -10.22970962524414, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 8.374196619852416e-08, |
|
"logits/chosen": 1.9589307308197021, |
|
"logits/rejected": 2.762117862701416, |
|
"logps/chosen": -450.49609375, |
|
"logps/rejected": -444.59149169921875, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6292638778686523, |
|
"rewards/margins": 11.14265251159668, |
|
"rewards/rejected": -9.513387680053711, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_logits/chosen": 1.7084078788757324, |
|
"eval_logits/rejected": 2.194572687149048, |
|
"eval_logps/chosen": -397.28363037109375, |
|
"eval_logps/rejected": -393.7378845214844, |
|
"eval_loss": 0.06439676135778427, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": 0.20578746497631073, |
|
"eval_rewards/margins": 9.018497467041016, |
|
"eval_rewards/rejected": -8.812708854675293, |
|
"eval_runtime": 78.0234, |
|
"eval_samples_per_second": 12.817, |
|
"eval_steps_per_second": 0.41, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 8.362294691740062e-08, |
|
"logits/chosen": 2.6022965908050537, |
|
"logits/rejected": 3.01656436920166, |
|
"logps/chosen": -399.0681457519531, |
|
"logps/rejected": -423.8030700683594, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.5128016471862793, |
|
"rewards/margins": 10.698533058166504, |
|
"rewards/rejected": -9.18572998046875, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 8.350392763627707e-08, |
|
"logits/chosen": 2.14322566986084, |
|
"logits/rejected": 3.216325044631958, |
|
"logps/chosen": -432.8180236816406, |
|
"logps/rejected": -448.30645751953125, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.6714401245117188, |
|
"rewards/margins": 11.98411750793457, |
|
"rewards/rejected": -10.312677383422852, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 8.338490835515353e-08, |
|
"logits/chosen": 2.0054235458374023, |
|
"logits/rejected": 3.5831961631774902, |
|
"logps/chosen": -402.33892822265625, |
|
"logps/rejected": -419.5140686035156, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3268228769302368, |
|
"rewards/margins": 11.421789169311523, |
|
"rewards/rejected": -10.094966888427734, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 8.326588907403e-08, |
|
"logits/chosen": 2.380002498626709, |
|
"logits/rejected": 3.5054984092712402, |
|
"logps/chosen": -452.48748779296875, |
|
"logps/rejected": -460.12786865234375, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.852749228477478, |
|
"rewards/margins": 12.324037551879883, |
|
"rewards/rejected": -10.471287727355957, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 8.314686979290645e-08, |
|
"logits/chosen": 2.2875988483428955, |
|
"logits/rejected": 3.5421195030212402, |
|
"logps/chosen": -428.4679260253906, |
|
"logps/rejected": -430.433349609375, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6448265314102173, |
|
"rewards/margins": 11.238059997558594, |
|
"rewards/rejected": -9.593233108520508, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 8.30278505117829e-08, |
|
"logits/chosen": 2.450359582901001, |
|
"logits/rejected": 3.1287174224853516, |
|
"logps/chosen": -432.19329833984375, |
|
"logps/rejected": -433.89111328125, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.381823182106018, |
|
"rewards/margins": 12.04507064819336, |
|
"rewards/rejected": -10.663248062133789, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 8.290883123065935e-08, |
|
"logits/chosen": 2.806814670562744, |
|
"logits/rejected": 3.616931200027466, |
|
"logps/chosen": -421.779052734375, |
|
"logps/rejected": -449.85260009765625, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1789385080337524, |
|
"rewards/margins": 11.785478591918945, |
|
"rewards/rejected": -10.606538772583008, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 8.278981194953582e-08, |
|
"logits/chosen": 2.440140724182129, |
|
"logits/rejected": 3.0209367275238037, |
|
"logps/chosen": -454.8857421875, |
|
"logps/rejected": -450.9249572753906, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.156050443649292, |
|
"rewards/margins": 11.445282936096191, |
|
"rewards/rejected": -10.28923225402832, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 8.267079266841228e-08, |
|
"logits/chosen": 2.5684947967529297, |
|
"logits/rejected": 3.107997417449951, |
|
"logps/chosen": -408.620849609375, |
|
"logps/rejected": -425.724365234375, |
|
"loss": 0.008, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.03949282318353653, |
|
"rewards/margins": 10.728104591369629, |
|
"rewards/rejected": -10.767596244812012, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 8.255177338728874e-08, |
|
"logits/chosen": 2.5037713050842285, |
|
"logits/rejected": 3.1614301204681396, |
|
"logps/chosen": -447.5420837402344, |
|
"logps/rejected": -419.99981689453125, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0575414896011353, |
|
"rewards/margins": 11.080565452575684, |
|
"rewards/rejected": -10.02302360534668, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_logits/chosen": 1.8239837884902954, |
|
"eval_logits/rejected": 2.3104217052459717, |
|
"eval_logps/chosen": -404.14984130859375, |
|
"eval_logps/rejected": -404.53826904296875, |
|
"eval_loss": 0.06748179346323013, |
|
"eval_rewards/accuracies": 0.96875, |
|
"eval_rewards/chosen": -0.4808317720890045, |
|
"eval_rewards/margins": 9.411918640136719, |
|
"eval_rewards/rejected": -9.892749786376953, |
|
"eval_runtime": 78.0282, |
|
"eval_samples_per_second": 12.816, |
|
"eval_steps_per_second": 0.41, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 8.243275410616519e-08, |
|
"logits/chosen": 2.3226962089538574, |
|
"logits/rejected": 3.3303439617156982, |
|
"logps/chosen": -418.3297424316406, |
|
"logps/rejected": -439.53143310546875, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6141597032546997, |
|
"rewards/margins": 10.954813003540039, |
|
"rewards/rejected": -10.340652465820312, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 8.231373482504166e-08, |
|
"logits/chosen": 2.913877487182617, |
|
"logits/rejected": 3.016862392425537, |
|
"logps/chosen": -340.18499755859375, |
|
"logps/rejected": -396.21722412109375, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.8227875232696533, |
|
"rewards/margins": 11.620365142822266, |
|
"rewards/rejected": -9.797577857971191, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 8.219471554391812e-08, |
|
"logits/chosen": 2.501906633377075, |
|
"logits/rejected": 2.8194351196289062, |
|
"logps/chosen": -432.50787353515625, |
|
"logps/rejected": -426.0884704589844, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8617050647735596, |
|
"rewards/margins": 11.113122940063477, |
|
"rewards/rejected": -9.251418113708496, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 8.207569626279457e-08, |
|
"logits/chosen": 2.374406337738037, |
|
"logits/rejected": 3.2433886528015137, |
|
"logps/chosen": -430.30352783203125, |
|
"logps/rejected": -446.37286376953125, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8586057424545288, |
|
"rewards/margins": 11.883955001831055, |
|
"rewards/rejected": -10.025348663330078, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 8.195667698167103e-08, |
|
"logits/chosen": 2.630288600921631, |
|
"logits/rejected": 3.096122980117798, |
|
"logps/chosen": -458.97149658203125, |
|
"logps/rejected": -464.8585510253906, |
|
"loss": 0.0133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6575380563735962, |
|
"rewards/margins": 13.09735107421875, |
|
"rewards/rejected": -11.439813613891602, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 8.183765770054747e-08, |
|
"logits/chosen": 2.9291977882385254, |
|
"logits/rejected": 3.383643388748169, |
|
"logps/chosen": -426.6884765625, |
|
"logps/rejected": -418.20172119140625, |
|
"loss": 0.01, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3758227825164795, |
|
"rewards/margins": 11.36131477355957, |
|
"rewards/rejected": -9.985492706298828, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 8.171863841942394e-08, |
|
"logits/chosen": 2.36037278175354, |
|
"logits/rejected": 3.379338026046753, |
|
"logps/chosen": -409.152587890625, |
|
"logps/rejected": -405.7203063964844, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7479127049446106, |
|
"rewards/margins": 10.87302303314209, |
|
"rewards/rejected": -10.12511157989502, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 8.15996191383004e-08, |
|
"logits/chosen": 2.3077635765075684, |
|
"logits/rejected": 2.982712745666504, |
|
"logps/chosen": -440.3170471191406, |
|
"logps/rejected": -441.4231872558594, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2002421617507935, |
|
"rewards/margins": 11.192548751831055, |
|
"rewards/rejected": -9.99230670928955, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.148059985717686e-08, |
|
"logits/chosen": 2.3768467903137207, |
|
"logits/rejected": 3.2358105182647705, |
|
"logps/chosen": -450.10040283203125, |
|
"logps/rejected": -459.655517578125, |
|
"loss": 0.0068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5886727571487427, |
|
"rewards/margins": 11.561334609985352, |
|
"rewards/rejected": -10.972661018371582, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 8.136158057605331e-08, |
|
"logits/chosen": 1.9187663793563843, |
|
"logits/rejected": 3.099363088607788, |
|
"logps/chosen": -489.19549560546875, |
|
"logps/rejected": -454.474609375, |
|
"loss": 0.0117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.78690505027771, |
|
"rewards/margins": 11.946157455444336, |
|
"rewards/rejected": -10.159250259399414, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_logits/chosen": 1.7307677268981934, |
|
"eval_logits/rejected": 2.2528133392333984, |
|
"eval_logps/chosen": -395.5159606933594, |
|
"eval_logps/rejected": -398.804443359375, |
|
"eval_loss": 0.06332825124263763, |
|
"eval_rewards/accuracies": 0.9609375, |
|
"eval_rewards/chosen": 0.38255468010902405, |
|
"eval_rewards/margins": 9.701919555664062, |
|
"eval_rewards/rejected": -9.319364547729492, |
|
"eval_runtime": 77.9753, |
|
"eval_samples_per_second": 12.825, |
|
"eval_steps_per_second": 0.41, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 8.124256129492978e-08, |
|
"logits/chosen": 1.937443494796753, |
|
"logits/rejected": 2.9953453540802, |
|
"logps/chosen": -452.47900390625, |
|
"logps/rejected": -460.998046875, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4172687530517578, |
|
"rewards/margins": 12.328100204467773, |
|
"rewards/rejected": -10.910831451416016, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 8.112354201380624e-08, |
|
"logits/chosen": 2.033159017562866, |
|
"logits/rejected": 2.923642158508301, |
|
"logps/chosen": -385.84124755859375, |
|
"logps/rejected": -427.04095458984375, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.8549903631210327, |
|
"rewards/margins": 11.425082206726074, |
|
"rewards/rejected": -9.57009220123291, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 8.10045227326827e-08, |
|
"logits/chosen": 2.1730008125305176, |
|
"logits/rejected": 2.7437562942504883, |
|
"logps/chosen": -401.44744873046875, |
|
"logps/rejected": -461.93597412109375, |
|
"loss": 0.009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0297939777374268, |
|
"rewards/margins": 13.02344799041748, |
|
"rewards/rejected": -10.993656158447266, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 8.088550345155915e-08, |
|
"logits/chosen": 2.1667115688323975, |
|
"logits/rejected": 2.8589937686920166, |
|
"logps/chosen": -411.73284912109375, |
|
"logps/rejected": -428.7911682128906, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2299811840057373, |
|
"rewards/margins": 11.306024551391602, |
|
"rewards/rejected": -10.076042175292969, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 8.076648417043561e-08, |
|
"logits/chosen": 2.363246202468872, |
|
"logits/rejected": 2.9583797454833984, |
|
"logps/chosen": -473.9364318847656, |
|
"logps/rejected": -445.84625244140625, |
|
"loss": 0.0158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.21724534034729, |
|
"rewards/margins": 11.700216293334961, |
|
"rewards/rejected": -9.482972145080566, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.064746488931206e-08, |
|
"logits/chosen": 2.5506205558776855, |
|
"logits/rejected": 3.462920665740967, |
|
"logps/chosen": -411.14678955078125, |
|
"logps/rejected": -444.2726135253906, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6240037083625793, |
|
"rewards/margins": 11.176986694335938, |
|
"rewards/rejected": -10.552982330322266, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.052844560818852e-08, |
|
"logits/chosen": 2.1937804222106934, |
|
"logits/rejected": 3.3300259113311768, |
|
"logps/chosen": -457.53814697265625, |
|
"logps/rejected": -449.9571838378906, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.528435230255127, |
|
"rewards/margins": 11.644414901733398, |
|
"rewards/rejected": -10.115981101989746, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.040942632706498e-08, |
|
"logits/chosen": 2.55604887008667, |
|
"logits/rejected": 3.0880343914031982, |
|
"logps/chosen": -420.58782958984375, |
|
"logps/rejected": -388.7262268066406, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.4543565511703491, |
|
"rewards/margins": 10.79311752319336, |
|
"rewards/rejected": -9.338762283325195, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.029040704594145e-08, |
|
"logits/chosen": 2.547492504119873, |
|
"logits/rejected": 3.6884007453918457, |
|
"logps/chosen": -380.635009765625, |
|
"logps/rejected": -423.63018798828125, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.180540680885315, |
|
"rewards/margins": 11.04011344909668, |
|
"rewards/rejected": -9.859573364257812, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.01713877648179e-08, |
|
"logits/chosen": 2.499735116958618, |
|
"logits/rejected": 3.318908214569092, |
|
"logps/chosen": -406.8338623046875, |
|
"logps/rejected": -433.6927795410156, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4649948179721832, |
|
"rewards/margins": 11.194982528686523, |
|
"rewards/rejected": -10.729987144470215, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_logits/chosen": 1.8153432607650757, |
|
"eval_logits/rejected": 2.3447887897491455, |
|
"eval_logps/chosen": -408.9284973144531, |
|
"eval_logps/rejected": -409.33148193359375, |
|
"eval_loss": 0.07508327066898346, |
|
"eval_rewards/accuracies": 0.9609375, |
|
"eval_rewards/chosen": -0.9586971402168274, |
|
"eval_rewards/margins": 9.413373947143555, |
|
"eval_rewards/rejected": -10.3720703125, |
|
"eval_runtime": 78.0392, |
|
"eval_samples_per_second": 12.814, |
|
"eval_steps_per_second": 0.41, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.005236848369436e-08, |
|
"logits/chosen": 2.228560447692871, |
|
"logits/rejected": 3.2750918865203857, |
|
"logps/chosen": -443.6728515625, |
|
"logps/rejected": -464.92108154296875, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.657561182975769, |
|
"rewards/margins": 11.451318740844727, |
|
"rewards/rejected": -10.793758392333984, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 7.993334920257082e-08, |
|
"logits/chosen": 2.6553711891174316, |
|
"logits/rejected": 3.3680367469787598, |
|
"logps/chosen": -449.4634704589844, |
|
"logps/rejected": -419.002197265625, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8493421673774719, |
|
"rewards/margins": 11.447193145751953, |
|
"rewards/rejected": -10.597851753234863, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 7.981432992144727e-08, |
|
"logits/chosen": 2.665163278579712, |
|
"logits/rejected": 3.3812012672424316, |
|
"logps/chosen": -468.38714599609375, |
|
"logps/rejected": -457.9358825683594, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7476972341537476, |
|
"rewards/margins": 12.187990188598633, |
|
"rewards/rejected": -11.440293312072754, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 7.969531064032373e-08, |
|
"logits/chosen": 2.5799193382263184, |
|
"logits/rejected": 3.3646559715270996, |
|
"logps/chosen": -375.97174072265625, |
|
"logps/rejected": -422.7632751464844, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5605910420417786, |
|
"rewards/margins": 11.750158309936523, |
|
"rewards/rejected": -11.189568519592285, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 7.957629135920018e-08, |
|
"logits/chosen": 2.4693617820739746, |
|
"logits/rejected": 3.5580387115478516, |
|
"logps/chosen": -430.0306091308594, |
|
"logps/rejected": -447.8194274902344, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3709467649459839, |
|
"rewards/margins": 11.733332633972168, |
|
"rewards/rejected": -11.362385749816895, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.945727207807664e-08, |
|
"logits/chosen": 2.5020830631256104, |
|
"logits/rejected": 3.252861738204956, |
|
"logps/chosen": -435.47222900390625, |
|
"logps/rejected": -447.82373046875, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.05917937681078911, |
|
"rewards/margins": 11.433965682983398, |
|
"rewards/rejected": -11.493144035339355, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 7.93382527969531e-08, |
|
"logits/chosen": 2.116004705429077, |
|
"logits/rejected": 3.1918604373931885, |
|
"logps/chosen": -463.59344482421875, |
|
"logps/rejected": -442.08203125, |
|
"loss": 0.0137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2367243766784668, |
|
"rewards/margins": 12.351614952087402, |
|
"rewards/rejected": -11.11489200592041, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 7.921923351582957e-08, |
|
"logits/chosen": 2.5514450073242188, |
|
"logits/rejected": 2.77209734916687, |
|
"logps/chosen": -384.43865966796875, |
|
"logps/rejected": -423.64892578125, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14157016575336456, |
|
"rewards/margins": 11.351391792297363, |
|
"rewards/rejected": -11.209821701049805, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 7.910021423470602e-08, |
|
"logits/chosen": 2.9975619316101074, |
|
"logits/rejected": 2.824094295501709, |
|
"logps/chosen": -428.303466796875, |
|
"logps/rejected": -438.8485412597656, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5388720631599426, |
|
"rewards/margins": 12.036158561706543, |
|
"rewards/rejected": -11.497285842895508, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.898119495358248e-08, |
|
"logits/chosen": 2.3330235481262207, |
|
"logits/rejected": 3.021794080734253, |
|
"logps/chosen": -417.4193420410156, |
|
"logps/rejected": -474.170166015625, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0910119041800499, |
|
"rewards/margins": 12.0770902633667, |
|
"rewards/rejected": -12.168102264404297, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_logits/chosen": 1.7441036701202393, |
|
"eval_logits/rejected": 2.28686785697937, |
|
"eval_logps/chosen": -404.1935119628906, |
|
"eval_logps/rejected": -408.94500732421875, |
|
"eval_loss": 0.0632321760058403, |
|
"eval_rewards/accuracies": 0.9609375, |
|
"eval_rewards/chosen": -0.48519980907440186, |
|
"eval_rewards/margins": 9.848224639892578, |
|
"eval_rewards/rejected": -10.333423614501953, |
|
"eval_runtime": 77.9801, |
|
"eval_samples_per_second": 12.824, |
|
"eval_steps_per_second": 0.41, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.886217567245894e-08, |
|
"logits/chosen": 2.2133631706237793, |
|
"logits/rejected": 3.3354735374450684, |
|
"logps/chosen": -428.5398864746094, |
|
"logps/rejected": -432.5113220214844, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9777836799621582, |
|
"rewards/margins": 12.670408248901367, |
|
"rewards/rejected": -11.692625045776367, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.874315639133539e-08, |
|
"logits/chosen": 2.4246087074279785, |
|
"logits/rejected": 3.052839756011963, |
|
"logps/chosen": -421.6343688964844, |
|
"logps/rejected": -421.95697021484375, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3704569339752197, |
|
"rewards/margins": 12.342972755432129, |
|
"rewards/rejected": -10.972516059875488, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.862413711021185e-08, |
|
"logits/chosen": 3.059401273727417, |
|
"logits/rejected": 3.639910936355591, |
|
"logps/chosen": -407.8876037597656, |
|
"logps/rejected": -447.3395080566406, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.906929612159729, |
|
"rewards/margins": 12.296318054199219, |
|
"rewards/rejected": -11.389389991760254, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.85051178290883e-08, |
|
"logits/chosen": 2.043600082397461, |
|
"logits/rejected": 2.944366931915283, |
|
"logps/chosen": -399.25140380859375, |
|
"logps/rejected": -418.093994140625, |
|
"loss": 0.0117, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.2023608684539795, |
|
"rewards/margins": 10.754437446594238, |
|
"rewards/rejected": -10.55207633972168, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.838609854796476e-08, |
|
"logits/chosen": 2.511198043823242, |
|
"logits/rejected": 3.7167916297912598, |
|
"logps/chosen": -428.31396484375, |
|
"logps/rejected": -459.7771911621094, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.24688346683979034, |
|
"rewards/margins": 11.848150253295898, |
|
"rewards/rejected": -11.601266860961914, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.826707926684123e-08, |
|
"logits/chosen": 2.4253451824188232, |
|
"logits/rejected": 3.1690335273742676, |
|
"logps/chosen": -426.9501953125, |
|
"logps/rejected": -453.51531982421875, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.7976835370063782, |
|
"rewards/margins": 12.751691818237305, |
|
"rewards/rejected": -11.954008102416992, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.814805998571769e-08, |
|
"logits/chosen": 2.323408365249634, |
|
"logits/rejected": 3.649256467819214, |
|
"logps/chosen": -410.73699951171875, |
|
"logps/rejected": -436.67669677734375, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.16351564228534698, |
|
"rewards/margins": 10.847258567810059, |
|
"rewards/rejected": -10.683743476867676, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.802904070459414e-08, |
|
"logits/chosen": 2.627110004425049, |
|
"logits/rejected": 3.4923622608184814, |
|
"logps/chosen": -434.0037536621094, |
|
"logps/rejected": -436.61669921875, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.193922996520996, |
|
"rewards/margins": 11.6904296875, |
|
"rewards/rejected": -10.496505737304688, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.79100214234706e-08, |
|
"logits/chosen": 2.3079676628112793, |
|
"logits/rejected": 3.4073116779327393, |
|
"logps/chosen": -419.6051330566406, |
|
"logps/rejected": -428.28326416015625, |
|
"loss": 0.008, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.7547308802604675, |
|
"rewards/margins": 11.233491897583008, |
|
"rewards/rejected": -10.478760719299316, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.779100214234706e-08, |
|
"logits/chosen": 2.747231960296631, |
|
"logits/rejected": 3.3032188415527344, |
|
"logps/chosen": -440.6018981933594, |
|
"logps/rejected": -423.44952392578125, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2705620527267456, |
|
"rewards/margins": 11.606225967407227, |
|
"rewards/rejected": -10.335662841796875, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_logits/chosen": 1.7818334102630615, |
|
"eval_logits/rejected": 2.3191139698028564, |
|
"eval_logps/chosen": -398.98333740234375, |
|
"eval_logps/rejected": -405.5218811035156, |
|
"eval_loss": 0.05957724153995514, |
|
"eval_rewards/accuracies": 0.953125, |
|
"eval_rewards/chosen": 0.03581659495830536, |
|
"eval_rewards/margins": 10.026926040649414, |
|
"eval_rewards/rejected": -9.991110801696777, |
|
"eval_runtime": 77.8303, |
|
"eval_samples_per_second": 12.848, |
|
"eval_steps_per_second": 0.411, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.767198286122351e-08, |
|
"logits/chosen": 2.7316629886627197, |
|
"logits/rejected": 3.3055825233459473, |
|
"logps/chosen": -377.58599853515625, |
|
"logps/rejected": -423.8103942871094, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7823238968849182, |
|
"rewards/margins": 11.870222091674805, |
|
"rewards/rejected": -11.087898254394531, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.755296358009997e-08, |
|
"logits/chosen": 2.6006321907043457, |
|
"logits/rejected": 3.5436511039733887, |
|
"logps/chosen": -359.8816833496094, |
|
"logps/rejected": -414.5054626464844, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05306140333414078, |
|
"rewards/margins": 11.554253578186035, |
|
"rewards/rejected": -11.501191139221191, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.743394429897642e-08, |
|
"logits/chosen": 2.4547390937805176, |
|
"logits/rejected": 3.4132580757141113, |
|
"logps/chosen": -448.56866455078125, |
|
"logps/rejected": -452.8617248535156, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.098752737045288, |
|
"rewards/margins": 12.525891304016113, |
|
"rewards/rejected": -11.427138328552246, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.731492501785288e-08, |
|
"logits/chosen": 2.644763469696045, |
|
"logits/rejected": 3.5032525062561035, |
|
"logps/chosen": -416.0933532714844, |
|
"logps/rejected": -436.23486328125, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0677202939987183, |
|
"rewards/margins": 11.958089828491211, |
|
"rewards/rejected": -10.890369415283203, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.719590573672935e-08, |
|
"logits/chosen": 2.381120204925537, |
|
"logits/rejected": 3.223132371902466, |
|
"logps/chosen": -429.01861572265625, |
|
"logps/rejected": -421.361328125, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6816972494125366, |
|
"rewards/margins": 11.070103645324707, |
|
"rewards/rejected": -10.388406753540039, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.70768864556058e-08, |
|
"logits/chosen": 3.1485886573791504, |
|
"logits/rejected": 3.6244475841522217, |
|
"logps/chosen": -411.9215393066406, |
|
"logps/rejected": -425.89837646484375, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4478631615638733, |
|
"rewards/margins": 11.844661712646484, |
|
"rewards/rejected": -11.39680004119873, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.695786717448226e-08, |
|
"logits/chosen": 2.1998846530914307, |
|
"logits/rejected": 3.2429378032684326, |
|
"logps/chosen": -422.6412658691406, |
|
"logps/rejected": -422.876953125, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1410973072052002, |
|
"rewards/margins": 11.749425888061523, |
|
"rewards/rejected": -10.608327865600586, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.683884789335872e-08, |
|
"logits/chosen": 2.5578694343566895, |
|
"logits/rejected": 3.4331531524658203, |
|
"logps/chosen": -455.40301513671875, |
|
"logps/rejected": -470.500732421875, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.343795657157898, |
|
"rewards/margins": 12.484710693359375, |
|
"rewards/rejected": -11.140914916992188, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.671982861223519e-08, |
|
"logits/chosen": 2.706578254699707, |
|
"logits/rejected": 3.628066301345825, |
|
"logps/chosen": -389.6455383300781, |
|
"logps/rejected": -412.09686279296875, |
|
"loss": 0.0101, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3088476657867432, |
|
"rewards/margins": 12.140149116516113, |
|
"rewards/rejected": -10.831302642822266, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.660080933111163e-08, |
|
"logits/chosen": 2.5364797115325928, |
|
"logits/rejected": 3.7345130443573, |
|
"logps/chosen": -440.4591369628906, |
|
"logps/rejected": -508.75164794921875, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2763676643371582, |
|
"rewards/margins": 16.67254638671875, |
|
"rewards/rejected": -15.3961763381958, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_logits/chosen": 1.9009253978729248, |
|
"eval_logits/rejected": 2.4084951877593994, |
|
"eval_logps/chosen": -403.50592041015625, |
|
"eval_logps/rejected": -410.25238037109375, |
|
"eval_loss": 0.06930559128522873, |
|
"eval_rewards/accuracies": 0.9453125, |
|
"eval_rewards/chosen": -0.4164417088031769, |
|
"eval_rewards/margins": 10.047719955444336, |
|
"eval_rewards/rejected": -10.464160919189453, |
|
"eval_runtime": 78.1903, |
|
"eval_samples_per_second": 12.789, |
|
"eval_steps_per_second": 0.409, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.648179004998809e-08, |
|
"logits/chosen": 2.4650321006774902, |
|
"logits/rejected": 2.9449849128723145, |
|
"logps/chosen": -446.25445556640625, |
|
"logps/rejected": -468.34954833984375, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0595293045043945, |
|
"rewards/margins": 12.140164375305176, |
|
"rewards/rejected": -11.080634117126465, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.636277076886454e-08, |
|
"logits/chosen": 2.7862162590026855, |
|
"logits/rejected": 3.3674285411834717, |
|
"logps/chosen": -447.1483459472656, |
|
"logps/rejected": -483.9112854003906, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6543643474578857, |
|
"rewards/margins": 13.146313667297363, |
|
"rewards/rejected": -11.491949081420898, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.624375148774101e-08, |
|
"logits/chosen": 2.4357800483703613, |
|
"logits/rejected": 3.8338236808776855, |
|
"logps/chosen": -415.13055419921875, |
|
"logps/rejected": -419.103515625, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.2529149055480957, |
|
"rewards/margins": 11.368078231811523, |
|
"rewards/rejected": -10.11516284942627, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 7.612473220661747e-08, |
|
"logits/chosen": 3.0562214851379395, |
|
"logits/rejected": 2.8682100772857666, |
|
"logps/chosen": -413.2584533691406, |
|
"logps/rejected": -434.45428466796875, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3094024956226349, |
|
"rewards/margins": 11.557034492492676, |
|
"rewards/rejected": -11.24763298034668, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.600571292549393e-08, |
|
"logits/chosen": 2.3887839317321777, |
|
"logits/rejected": 3.3047709465026855, |
|
"logps/chosen": -442.767333984375, |
|
"logps/rejected": -466.8439025878906, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5330932140350342, |
|
"rewards/margins": 12.568510055541992, |
|
"rewards/rejected": -11.035417556762695, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.588669364437038e-08, |
|
"logits/chosen": 2.524888515472412, |
|
"logits/rejected": 3.4284675121307373, |
|
"logps/chosen": -402.7549743652344, |
|
"logps/rejected": -445.7957458496094, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.39540719985961914, |
|
"rewards/margins": 11.571582794189453, |
|
"rewards/rejected": -11.176176071166992, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 7.576767436324685e-08, |
|
"logits/chosen": 2.311568260192871, |
|
"logits/rejected": 3.2848758697509766, |
|
"logps/chosen": -423.08416748046875, |
|
"logps/rejected": -438.42718505859375, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0096681118011475, |
|
"rewards/margins": 11.884648323059082, |
|
"rewards/rejected": -10.874979972839355, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.564865508212331e-08, |
|
"logits/chosen": 2.790484666824341, |
|
"logits/rejected": 3.298766613006592, |
|
"logps/chosen": -360.9853515625, |
|
"logps/rejected": -404.8863830566406, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.7994908094406128, |
|
"rewards/margins": 10.370939254760742, |
|
"rewards/rejected": -9.571449279785156, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.552963580099975e-08, |
|
"logits/chosen": 2.6981308460235596, |
|
"logits/rejected": 3.2417426109313965, |
|
"logps/chosen": -399.3218994140625, |
|
"logps/rejected": -431.1190490722656, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.13810133934021, |
|
"rewards/margins": 12.226466178894043, |
|
"rewards/rejected": -11.08836555480957, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.541061651987621e-08, |
|
"logits/chosen": 2.300840377807617, |
|
"logits/rejected": 3.730071544647217, |
|
"logps/chosen": -458.31536865234375, |
|
"logps/rejected": -440.55523681640625, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.555953860282898, |
|
"rewards/margins": 11.838624954223633, |
|
"rewards/rejected": -10.282670974731445, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_logits/chosen": 1.8183174133300781, |
|
"eval_logits/rejected": 2.3427019119262695, |
|
"eval_logps/chosen": -399.7039794921875, |
|
"eval_logps/rejected": -408.1551513671875, |
|
"eval_loss": 0.059715636074543, |
|
"eval_rewards/accuracies": 0.953125, |
|
"eval_rewards/chosen": -0.03624638170003891, |
|
"eval_rewards/margins": 10.218188285827637, |
|
"eval_rewards/rejected": -10.254435539245605, |
|
"eval_runtime": 78.1202, |
|
"eval_samples_per_second": 12.801, |
|
"eval_steps_per_second": 0.41, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.529159723875268e-08, |
|
"logits/chosen": 2.5125820636749268, |
|
"logits/rejected": 2.929962635040283, |
|
"logps/chosen": -435.52734375, |
|
"logps/rejected": -480.02215576171875, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.0602343082427979, |
|
"rewards/margins": 12.658498764038086, |
|
"rewards/rejected": -11.59826374053955, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.517257795762913e-08, |
|
"logits/chosen": 2.5797853469848633, |
|
"logits/rejected": 3.4592716693878174, |
|
"logps/chosen": -427.9090881347656, |
|
"logps/rejected": -460.9290466308594, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.9765796661376953, |
|
"rewards/margins": 12.438034057617188, |
|
"rewards/rejected": -11.461454391479492, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 7.505355867650559e-08, |
|
"logits/chosen": 2.317451000213623, |
|
"logits/rejected": 3.6136322021484375, |
|
"logps/chosen": -426.43841552734375, |
|
"logps/rejected": -431.55279541015625, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6562097072601318, |
|
"rewards/margins": 11.710824966430664, |
|
"rewards/rejected": -10.05461597442627, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 7.493453939538205e-08, |
|
"logits/chosen": 2.4118270874023438, |
|
"logits/rejected": 3.3411223888397217, |
|
"logps/chosen": -433.496337890625, |
|
"logps/rejected": -413.53826904296875, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.49976015090942383, |
|
"rewards/margins": 10.6139554977417, |
|
"rewards/rejected": -10.114194869995117, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 7.48155201142585e-08, |
|
"logits/chosen": 2.3743271827697754, |
|
"logits/rejected": 3.5587539672851562, |
|
"logps/chosen": -422.71014404296875, |
|
"logps/rejected": -442.51556396484375, |
|
"loss": 0.014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9052066802978516, |
|
"rewards/margins": 11.696832656860352, |
|
"rewards/rejected": -10.791627883911133, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 7.469650083313497e-08, |
|
"logits/chosen": 2.606020450592041, |
|
"logits/rejected": 3.443312168121338, |
|
"logps/chosen": -397.82208251953125, |
|
"logps/rejected": -423.3816833496094, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.306690514087677, |
|
"rewards/margins": 11.203126907348633, |
|
"rewards/rejected": -10.896435737609863, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 7.457748155201143e-08, |
|
"logits/chosen": 2.916489839553833, |
|
"logits/rejected": 3.762838840484619, |
|
"logps/chosen": -384.3777770996094, |
|
"logps/rejected": -457.49041748046875, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04699459671974182, |
|
"rewards/margins": 11.909761428833008, |
|
"rewards/rejected": -11.86276626586914, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 7.445846227088787e-08, |
|
"logits/chosen": 2.0478157997131348, |
|
"logits/rejected": 3.478813886642456, |
|
"logps/chosen": -432.97686767578125, |
|
"logps/rejected": -439.83221435546875, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5262401103973389, |
|
"rewards/margins": 12.083934783935547, |
|
"rewards/rejected": -10.557694435119629, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 7.433944298976433e-08, |
|
"logits/chosen": 2.548459053039551, |
|
"logits/rejected": 3.1747069358825684, |
|
"logps/chosen": -429.4046936035156, |
|
"logps/rejected": -495.275146484375, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3433806300163269, |
|
"rewards/margins": 12.296731948852539, |
|
"rewards/rejected": -11.953351974487305, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 7.42204237086408e-08, |
|
"logits/chosen": 2.3155176639556885, |
|
"logits/rejected": 3.0107157230377197, |
|
"logps/chosen": -458.78216552734375, |
|
"logps/rejected": -431.2632751464844, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5119603872299194, |
|
"rewards/margins": 12.521995544433594, |
|
"rewards/rejected": -11.010034561157227, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_logits/chosen": 1.7790201902389526, |
|
"eval_logits/rejected": 2.328681468963623, |
|
"eval_logps/chosen": -406.3954772949219, |
|
"eval_logps/rejected": -412.999755859375, |
|
"eval_loss": 0.0663955956697464, |
|
"eval_rewards/accuracies": 0.9609375, |
|
"eval_rewards/chosen": -0.70539790391922, |
|
"eval_rewards/margins": 10.033500671386719, |
|
"eval_rewards/rejected": -10.738900184631348, |
|
"eval_runtime": 78.1747, |
|
"eval_samples_per_second": 12.792, |
|
"eval_steps_per_second": 0.409, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 7.410140442751725e-08, |
|
"logits/chosen": 2.371422290802002, |
|
"logits/rejected": 3.271979808807373, |
|
"logps/chosen": -402.83258056640625, |
|
"logps/rejected": -439.00079345703125, |
|
"loss": 0.0137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6034157872200012, |
|
"rewards/margins": 12.108831405639648, |
|
"rewards/rejected": -11.505415916442871, |
|
"step": 3110 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 9336, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|