|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9992254066615027, |
|
"eval_steps": 100, |
|
"global_step": 726, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.84931506849315e-09, |
|
"logits/chosen": -2.3491616249084473, |
|
"logits/rejected": -2.418564796447754, |
|
"logps/chosen": -271.3881530761719, |
|
"logps/rejected": -208.9749298095703, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.84931506849315e-08, |
|
"logits/chosen": -2.4231245517730713, |
|
"logits/rejected": -2.3566601276397705, |
|
"logps/chosen": -293.38800048828125, |
|
"logps/rejected": -226.29283142089844, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4548611044883728, |
|
"rewards/chosen": 0.003188559552654624, |
|
"rewards/margins": 0.0021638227626681328, |
|
"rewards/rejected": 0.0010247372556477785, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.36986301369863e-07, |
|
"logits/chosen": -2.431933879852295, |
|
"logits/rejected": -2.405198574066162, |
|
"logps/chosen": -278.5166931152344, |
|
"logps/rejected": -216.7791290283203, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.504687488079071, |
|
"rewards/chosen": -0.000816329091321677, |
|
"rewards/margins": 0.0019420869648456573, |
|
"rewards/rejected": -0.0027584161143749952, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.054794520547945e-07, |
|
"logits/chosen": -2.389657497406006, |
|
"logits/rejected": -2.348972797393799, |
|
"logps/chosen": -252.9993438720703, |
|
"logps/rejected": -207.1633758544922, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0014556010719388723, |
|
"rewards/margins": 0.0006705918349325657, |
|
"rewards/rejected": -0.0021261931397020817, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.73972602739726e-07, |
|
"logits/chosen": -2.460561752319336, |
|
"logits/rejected": -2.414844036102295, |
|
"logps/chosen": -283.7592468261719, |
|
"logps/rejected": -216.4773712158203, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.5484374761581421, |
|
"rewards/chosen": 0.004375931341201067, |
|
"rewards/margins": 0.00752140861004591, |
|
"rewards/rejected": -0.0031454775016754866, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.424657534246575e-07, |
|
"logits/chosen": -2.4366953372955322, |
|
"logits/rejected": -2.3972277641296387, |
|
"logps/chosen": -267.2607727050781, |
|
"logps/rejected": -223.6705780029297, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.004865794442594051, |
|
"rewards/margins": 0.007948420941829681, |
|
"rewards/rejected": -0.0030826255679130554, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.10958904109589e-07, |
|
"logits/chosen": -2.412304639816284, |
|
"logits/rejected": -2.3892178535461426, |
|
"logps/chosen": -266.85028076171875, |
|
"logps/rejected": -214.7494659423828, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00616841483861208, |
|
"rewards/margins": 0.012422902509570122, |
|
"rewards/rejected": -0.006254489067941904, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.794520547945205e-07, |
|
"logits/chosen": -2.390881299972534, |
|
"logits/rejected": -2.3977627754211426, |
|
"logps/chosen": -254.04043579101562, |
|
"logps/rejected": -214.6400909423828, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.009625923819839954, |
|
"rewards/margins": 0.01777799427509308, |
|
"rewards/rejected": -0.008152070455253124, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.946401225114854e-07, |
|
"logits/chosen": -2.4295055866241455, |
|
"logits/rejected": -2.37807035446167, |
|
"logps/chosen": -265.05718994140625, |
|
"logps/rejected": -218.77059936523438, |
|
"loss": 0.6795, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": 0.013735203072428703, |
|
"rewards/margins": 0.028081998229026794, |
|
"rewards/rejected": -0.014346795156598091, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.869831546707504e-07, |
|
"logits/chosen": -2.4787497520446777, |
|
"logits/rejected": -2.422356128692627, |
|
"logps/chosen": -271.730224609375, |
|
"logps/rejected": -224.4839324951172, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.022287212312221527, |
|
"rewards/margins": 0.04716240242123604, |
|
"rewards/rejected": -0.02487519010901451, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.793261868300153e-07, |
|
"logits/chosen": -2.417426586151123, |
|
"logits/rejected": -2.4039013385772705, |
|
"logps/chosen": -273.8762512207031, |
|
"logps/rejected": -227.797607421875, |
|
"loss": 0.6692, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": 0.025275733321905136, |
|
"rewards/margins": 0.057271964848041534, |
|
"rewards/rejected": -0.0319962315261364, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7166921898928023e-07, |
|
"logits/chosen": -2.4509871006011963, |
|
"logits/rejected": -2.3908462524414062, |
|
"logps/chosen": -252.6064453125, |
|
"logps/rejected": -222.64639282226562, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.667187511920929, |
|
"rewards/chosen": 0.024279529228806496, |
|
"rewards/margins": 0.06397499889135361, |
|
"rewards/rejected": -0.03969546779990196, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.640122511485451e-07, |
|
"logits/chosen": -2.412073850631714, |
|
"logits/rejected": -2.4017536640167236, |
|
"logps/chosen": -256.89813232421875, |
|
"logps/rejected": -219.8057098388672, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.029207896441221237, |
|
"rewards/margins": 0.07588861137628555, |
|
"rewards/rejected": -0.04668071120977402, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.563552833078101e-07, |
|
"logits/chosen": -2.4562458992004395, |
|
"logits/rejected": -2.3951973915100098, |
|
"logps/chosen": -265.9952697753906, |
|
"logps/rejected": -225.94125366210938, |
|
"loss": 0.6504, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.025836100801825523, |
|
"rewards/margins": 0.09206128865480423, |
|
"rewards/rejected": -0.06622518599033356, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4869831546707505e-07, |
|
"logits/chosen": -2.460266590118408, |
|
"logits/rejected": -2.401520013809204, |
|
"logps/chosen": -270.29888916015625, |
|
"logps/rejected": -230.37539672851562, |
|
"loss": 0.6495, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.03395112603902817, |
|
"rewards/margins": 0.11508414894342422, |
|
"rewards/rejected": -0.08113302290439606, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4104134762633994e-07, |
|
"logits/chosen": -2.4852752685546875, |
|
"logits/rejected": -2.4318604469299316, |
|
"logps/chosen": -265.973388671875, |
|
"logps/rejected": -226.55484008789062, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": 0.03317371755838394, |
|
"rewards/margins": 0.12171275913715363, |
|
"rewards/rejected": -0.08853904157876968, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.333843797856049e-07, |
|
"logits/chosen": -2.4467227458953857, |
|
"logits/rejected": -2.4029695987701416, |
|
"logps/chosen": -279.33648681640625, |
|
"logps/rejected": -239.00009155273438, |
|
"loss": 0.6343, |
|
"rewards/accuracies": 0.6796875, |
|
"rewards/chosen": 0.04138711839914322, |
|
"rewards/margins": 0.1465190351009369, |
|
"rewards/rejected": -0.10513193905353546, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.257274119448698e-07, |
|
"logits/chosen": -2.4798355102539062, |
|
"logits/rejected": -2.452397108078003, |
|
"logps/chosen": -270.727783203125, |
|
"logps/rejected": -239.09780883789062, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": 0.024355659261345863, |
|
"rewards/margins": 0.1352422684431076, |
|
"rewards/rejected": -0.11088661849498749, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.180704441041347e-07, |
|
"logits/chosen": -2.4359121322631836, |
|
"logits/rejected": -2.388683795928955, |
|
"logps/chosen": -256.79022216796875, |
|
"logps/rejected": -226.1436767578125, |
|
"loss": 0.6367, |
|
"rewards/accuracies": 0.6640625, |
|
"rewards/chosen": 0.016556020826101303, |
|
"rewards/margins": 0.147763192653656, |
|
"rewards/rejected": -0.1312071532011032, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1041347626339966e-07, |
|
"logits/chosen": -2.4478251934051514, |
|
"logits/rejected": -2.4065427780151367, |
|
"logps/chosen": -261.63702392578125, |
|
"logps/rejected": -213.1779327392578, |
|
"loss": 0.6269, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.02061801217496395, |
|
"rewards/margins": 0.17272573709487915, |
|
"rewards/rejected": -0.15210774540901184, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.027565084226646e-07, |
|
"logits/chosen": -2.4714255332946777, |
|
"logits/rejected": -2.414602279663086, |
|
"logps/chosen": -262.29486083984375, |
|
"logps/rejected": -218.0116424560547, |
|
"loss": 0.6175, |
|
"rewards/accuracies": 0.6890624761581421, |
|
"rewards/chosen": 0.027450546622276306, |
|
"rewards/margins": 0.19447624683380127, |
|
"rewards/rejected": -0.16702571511268616, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9509954058192954e-07, |
|
"logits/chosen": -2.4752840995788574, |
|
"logits/rejected": -2.4354655742645264, |
|
"logps/chosen": -283.89959716796875, |
|
"logps/rejected": -231.7078399658203, |
|
"loss": 0.6161, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": 0.024292152374982834, |
|
"rewards/margins": 0.21237091720104218, |
|
"rewards/rejected": -0.18807876110076904, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.874425727411945e-07, |
|
"logits/chosen": -2.422091007232666, |
|
"logits/rejected": -2.40881609916687, |
|
"logps/chosen": -276.7785339355469, |
|
"logps/rejected": -229.2734832763672, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.660937488079071, |
|
"rewards/chosen": 0.017709506675601006, |
|
"rewards/margins": 0.21948948502540588, |
|
"rewards/rejected": -0.20178000628948212, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.797856049004594e-07, |
|
"logits/chosen": -2.4441866874694824, |
|
"logits/rejected": -2.38869571685791, |
|
"logps/chosen": -264.10430908203125, |
|
"logps/rejected": -228.3271484375, |
|
"loss": 0.6117, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.029474353417754173, |
|
"rewards/margins": 0.22035422921180725, |
|
"rewards/rejected": -0.19087985157966614, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7212863705972436e-07, |
|
"logits/chosen": -2.4633097648620605, |
|
"logits/rejected": -2.4186224937438965, |
|
"logps/chosen": -271.4654235839844, |
|
"logps/rejected": -222.46841430664062, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.024074096232652664, |
|
"rewards/margins": 0.2165375053882599, |
|
"rewards/rejected": -0.19246339797973633, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.144517421722412, |
|
"eval_logits/rejected": -2.0242087841033936, |
|
"eval_logps/chosen": -264.5997619628906, |
|
"eval_logps/rejected": -221.6983184814453, |
|
"eval_loss": 0.6057174205780029, |
|
"eval_rewards/accuracies": 0.6759999990463257, |
|
"eval_rewards/chosen": 0.007874858565628529, |
|
"eval_rewards/margins": 0.24786852300167084, |
|
"eval_rewards/rejected": -0.23999367654323578, |
|
"eval_runtime": 278.9133, |
|
"eval_samples_per_second": 7.171, |
|
"eval_steps_per_second": 0.448, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6447166921898925e-07, |
|
"logits/chosen": -2.400252103805542, |
|
"logits/rejected": -2.3472890853881836, |
|
"logps/chosen": -257.4571838378906, |
|
"logps/rejected": -210.4391326904297, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.6734374761581421, |
|
"rewards/chosen": 0.004229591693729162, |
|
"rewards/margins": 0.22420725226402283, |
|
"rewards/rejected": -0.21997769176959991, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.568147013782542e-07, |
|
"logits/chosen": -2.415523052215576, |
|
"logits/rejected": -2.3758111000061035, |
|
"logps/chosen": -261.9351501464844, |
|
"logps/rejected": -226.16259765625, |
|
"loss": 0.6006, |
|
"rewards/accuracies": 0.7265625, |
|
"rewards/chosen": 0.023142099380493164, |
|
"rewards/margins": 0.2655286490917206, |
|
"rewards/rejected": -0.24238653481006622, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4915773353751913e-07, |
|
"logits/chosen": -2.429934024810791, |
|
"logits/rejected": -2.365861415863037, |
|
"logps/chosen": -278.4029846191406, |
|
"logps/rejected": -236.08688354492188, |
|
"loss": 0.5925, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": 0.02966948226094246, |
|
"rewards/margins": 0.33913469314575195, |
|
"rewards/rejected": -0.3094651699066162, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.41500765696784e-07, |
|
"logits/chosen": -2.4358582496643066, |
|
"logits/rejected": -2.396267890930176, |
|
"logps/chosen": -251.093017578125, |
|
"logps/rejected": -225.80685424804688, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": 0.010072538629174232, |
|
"rewards/margins": 0.24589493870735168, |
|
"rewards/rejected": -0.2358224093914032, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.33843797856049e-07, |
|
"logits/chosen": -2.408804416656494, |
|
"logits/rejected": -2.394888401031494, |
|
"logps/chosen": -283.15380859375, |
|
"logps/rejected": -228.33767700195312, |
|
"loss": 0.5915, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.008710218593478203, |
|
"rewards/margins": 0.3084966242313385, |
|
"rewards/rejected": -0.29978638887405396, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.2618683001531396e-07, |
|
"logits/chosen": -2.4084572792053223, |
|
"logits/rejected": -2.337435722351074, |
|
"logps/chosen": -261.3924865722656, |
|
"logps/rejected": -227.77651977539062, |
|
"loss": 0.588, |
|
"rewards/accuracies": 0.698437511920929, |
|
"rewards/chosen": 0.008477389812469482, |
|
"rewards/margins": 0.298746258020401, |
|
"rewards/rejected": -0.29026883840560913, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1852986217457885e-07, |
|
"logits/chosen": -2.4575297832489014, |
|
"logits/rejected": -2.373924493789673, |
|
"logps/chosen": -261.287109375, |
|
"logps/rejected": -228.5553741455078, |
|
"loss": 0.5972, |
|
"rewards/accuracies": 0.6953125, |
|
"rewards/chosen": 0.002108477521687746, |
|
"rewards/margins": 0.2948620915412903, |
|
"rewards/rejected": -0.2927536368370056, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.108728943338438e-07, |
|
"logits/chosen": -2.4443328380584717, |
|
"logits/rejected": -2.4351658821105957, |
|
"logps/chosen": -252.80996704101562, |
|
"logps/rejected": -237.87631225585938, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.015406561084091663, |
|
"rewards/margins": 0.2515925168991089, |
|
"rewards/rejected": -0.2669990658760071, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0321592649310873e-07, |
|
"logits/chosen": -2.424647092819214, |
|
"logits/rejected": -2.357273578643799, |
|
"logps/chosen": -253.7325897216797, |
|
"logps/rejected": -224.3144073486328, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.6859375238418579, |
|
"rewards/chosen": -0.0076437839306890965, |
|
"rewards/margins": 0.2989902198314667, |
|
"rewards/rejected": -0.30663400888442993, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.955589586523736e-07, |
|
"logits/chosen": -2.4427545070648193, |
|
"logits/rejected": -2.3824856281280518, |
|
"logps/chosen": -265.68939208984375, |
|
"logps/rejected": -226.4335174560547, |
|
"loss": 0.592, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": -0.015530401840806007, |
|
"rewards/margins": 0.3260301351547241, |
|
"rewards/rejected": -0.3415605425834656, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8790199081163856e-07, |
|
"logits/chosen": -2.439944267272949, |
|
"logits/rejected": -2.3695976734161377, |
|
"logps/chosen": -266.065673828125, |
|
"logps/rejected": -225.2880859375, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.026788845658302307, |
|
"rewards/margins": 0.28384846448898315, |
|
"rewards/rejected": -0.31063732504844666, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.802450229709035e-07, |
|
"logits/chosen": -2.399728298187256, |
|
"logits/rejected": -2.3489761352539062, |
|
"logps/chosen": -254.9022216796875, |
|
"logps/rejected": -213.33193969726562, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.667187511920929, |
|
"rewards/chosen": -0.04024948924779892, |
|
"rewards/margins": 0.29891303181648254, |
|
"rewards/rejected": -0.33916252851486206, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.725880551301684e-07, |
|
"logits/chosen": -2.462254047393799, |
|
"logits/rejected": -2.406602621078491, |
|
"logps/chosen": -274.6975402832031, |
|
"logps/rejected": -232.84591674804688, |
|
"loss": 0.5929, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.02199350856244564, |
|
"rewards/margins": 0.31067317724227905, |
|
"rewards/rejected": -0.33266669511795044, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.649310872894334e-07, |
|
"logits/chosen": -2.4482955932617188, |
|
"logits/rejected": -2.4154446125030518, |
|
"logps/chosen": -275.00775146484375, |
|
"logps/rejected": -223.1331787109375, |
|
"loss": 0.5816, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.020305102691054344, |
|
"rewards/margins": 0.37037259340286255, |
|
"rewards/rejected": -0.3906777501106262, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.572741194486983e-07, |
|
"logits/chosen": -2.448878765106201, |
|
"logits/rejected": -2.393206834793091, |
|
"logps/chosen": -273.81109619140625, |
|
"logps/rejected": -208.37985229492188, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.029856573790311813, |
|
"rewards/margins": 0.3645634055137634, |
|
"rewards/rejected": -0.3944200277328491, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.496171516079632e-07, |
|
"logits/chosen": -2.4658501148223877, |
|
"logits/rejected": -2.399857521057129, |
|
"logps/chosen": -293.2225341796875, |
|
"logps/rejected": -239.4982452392578, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.0061371102929115295, |
|
"rewards/margins": 0.399463027715683, |
|
"rewards/rejected": -0.39332595467567444, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.4196018376722816e-07, |
|
"logits/chosen": -2.429685115814209, |
|
"logits/rejected": -2.4006247520446777, |
|
"logps/chosen": -278.5813903808594, |
|
"logps/rejected": -228.4702911376953, |
|
"loss": 0.5864, |
|
"rewards/accuracies": 0.6859375238418579, |
|
"rewards/chosen": -0.02374974638223648, |
|
"rewards/margins": 0.37017589807510376, |
|
"rewards/rejected": -0.39392566680908203, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.343032159264931e-07, |
|
"logits/chosen": -2.403900146484375, |
|
"logits/rejected": -2.3333194255828857, |
|
"logps/chosen": -268.872802734375, |
|
"logps/rejected": -224.37728881835938, |
|
"loss": 0.579, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.020599449053406715, |
|
"rewards/margins": 0.431951105594635, |
|
"rewards/rejected": -0.4525505602359772, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.26646248085758e-07, |
|
"logits/chosen": -2.383470058441162, |
|
"logits/rejected": -2.3353710174560547, |
|
"logps/chosen": -259.7237854003906, |
|
"logps/rejected": -217.79946899414062, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.0417955107986927, |
|
"rewards/margins": 0.39140504598617554, |
|
"rewards/rejected": -0.43320053815841675, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1898928024502298e-07, |
|
"logits/chosen": -2.4446728229522705, |
|
"logits/rejected": -2.3874154090881348, |
|
"logps/chosen": -263.4950256347656, |
|
"logps/rejected": -221.4724578857422, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": -0.02173582836985588, |
|
"rewards/margins": 0.39518997073173523, |
|
"rewards/rejected": -0.4169258177280426, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.113323124042879e-07, |
|
"logits/chosen": -2.4275262355804443, |
|
"logits/rejected": -2.3907971382141113, |
|
"logps/chosen": -271.2684326171875, |
|
"logps/rejected": -231.44381713867188, |
|
"loss": 0.5744, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.020908143371343613, |
|
"rewards/margins": 0.41243448853492737, |
|
"rewards/rejected": -0.4333426058292389, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.036753445635528e-07, |
|
"logits/chosen": -2.4363036155700684, |
|
"logits/rejected": -2.4147400856018066, |
|
"logps/chosen": -284.01824951171875, |
|
"logps/rejected": -238.273681640625, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.018130071461200714, |
|
"rewards/margins": 0.4541456699371338, |
|
"rewards/rejected": -0.4722757339477539, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9601837672281775e-07, |
|
"logits/chosen": -2.4180634021759033, |
|
"logits/rejected": -2.3854622840881348, |
|
"logps/chosen": -270.1515808105469, |
|
"logps/rejected": -236.3723907470703, |
|
"loss": 0.576, |
|
"rewards/accuracies": 0.7046874761581421, |
|
"rewards/chosen": -0.05924994498491287, |
|
"rewards/margins": 0.37609511613845825, |
|
"rewards/rejected": -0.4353450834751129, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.883614088820827e-07, |
|
"logits/chosen": -2.4381699562072754, |
|
"logits/rejected": -2.391515016555786, |
|
"logps/chosen": -268.735595703125, |
|
"logps/rejected": -224.8667755126953, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.723437488079071, |
|
"rewards/chosen": -0.04678649455308914, |
|
"rewards/margins": 0.3817201852798462, |
|
"rewards/rejected": -0.42850667238235474, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -2.127939224243164, |
|
"eval_logits/rejected": -2.007131576538086, |
|
"eval_logps/chosen": -265.25634765625, |
|
"eval_logps/rejected": -224.01229858398438, |
|
"eval_loss": 0.5730655789375305, |
|
"eval_rewards/accuracies": 0.6899999976158142, |
|
"eval_rewards/chosen": -0.05778134614229202, |
|
"eval_rewards/margins": 0.4136123061180115, |
|
"eval_rewards/rejected": -0.4713936746120453, |
|
"eval_runtime": 277.7189, |
|
"eval_samples_per_second": 7.202, |
|
"eval_steps_per_second": 0.45, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.807044410413476e-07, |
|
"logits/chosen": -2.4097964763641357, |
|
"logits/rejected": -2.3763108253479004, |
|
"logps/chosen": -257.9292297363281, |
|
"logps/rejected": -236.3641815185547, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.05177872255444527, |
|
"rewards/margins": 0.39789050817489624, |
|
"rewards/rejected": -0.4496693015098572, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7304747320061255e-07, |
|
"logits/chosen": -2.4072229862213135, |
|
"logits/rejected": -2.4033942222595215, |
|
"logps/chosen": -263.5710754394531, |
|
"logps/rejected": -230.6610107421875, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.707812488079071, |
|
"rewards/chosen": -0.019381705671548843, |
|
"rewards/margins": 0.4000469744205475, |
|
"rewards/rejected": -0.41942867636680603, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6539050535987747e-07, |
|
"logits/chosen": -2.4798319339752197, |
|
"logits/rejected": -2.370913028717041, |
|
"logps/chosen": -270.12432861328125, |
|
"logps/rejected": -225.058349609375, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.039138875901699066, |
|
"rewards/margins": 0.4365014135837555, |
|
"rewards/rejected": -0.47564029693603516, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5773353751914243e-07, |
|
"logits/chosen": -2.4861385822296143, |
|
"logits/rejected": -2.425265312194824, |
|
"logps/chosen": -284.8677673339844, |
|
"logps/rejected": -229.98681640625, |
|
"loss": 0.5715, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.024127285927534103, |
|
"rewards/margins": 0.4679562449455261, |
|
"rewards/rejected": -0.4920835494995117, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5007656967840735e-07, |
|
"logits/chosen": -2.383533000946045, |
|
"logits/rejected": -2.3430206775665283, |
|
"logps/chosen": -254.0509796142578, |
|
"logps/rejected": -230.5810089111328, |
|
"loss": 0.5677, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.039885733276605606, |
|
"rewards/margins": 0.4442899823188782, |
|
"rewards/rejected": -0.4841756820678711, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4241960183767226e-07, |
|
"logits/chosen": -2.4291586875915527, |
|
"logits/rejected": -2.372559070587158, |
|
"logps/chosen": -282.87982177734375, |
|
"logps/rejected": -235.8987274169922, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.692187488079071, |
|
"rewards/chosen": -0.05947133153676987, |
|
"rewards/margins": 0.41908422112464905, |
|
"rewards/rejected": -0.4785555303096771, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.347626339969372e-07, |
|
"logits/chosen": -2.423152208328247, |
|
"logits/rejected": -2.3877062797546387, |
|
"logps/chosen": -270.82269287109375, |
|
"logps/rejected": -242.1062469482422, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.01524378638714552, |
|
"rewards/margins": 0.40917444229125977, |
|
"rewards/rejected": -0.42441821098327637, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2710566615620215e-07, |
|
"logits/chosen": -2.3735625743865967, |
|
"logits/rejected": -2.327951431274414, |
|
"logps/chosen": -274.332763671875, |
|
"logps/rejected": -225.1637420654297, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.707812488079071, |
|
"rewards/chosen": -0.03534569963812828, |
|
"rewards/margins": 0.4445571005344391, |
|
"rewards/rejected": -0.47990283370018005, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1944869831546706e-07, |
|
"logits/chosen": -2.3997702598571777, |
|
"logits/rejected": -2.3793346881866455, |
|
"logps/chosen": -267.025390625, |
|
"logps/rejected": -238.75692749023438, |
|
"loss": 0.5724, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.07640588283538818, |
|
"rewards/margins": 0.4082149565219879, |
|
"rewards/rejected": -0.4846208095550537, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.11791730474732e-07, |
|
"logits/chosen": -2.4132089614868164, |
|
"logits/rejected": -2.3745548725128174, |
|
"logps/chosen": -262.74658203125, |
|
"logps/rejected": -226.48898315429688, |
|
"loss": 0.5658, |
|
"rewards/accuracies": 0.7359374761581421, |
|
"rewards/chosen": -0.07082664221525192, |
|
"rewards/margins": 0.48243194818496704, |
|
"rewards/rejected": -0.5532585382461548, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0413476263399694e-07, |
|
"logits/chosen": -2.451371669769287, |
|
"logits/rejected": -2.407169818878174, |
|
"logps/chosen": -269.4725646972656, |
|
"logps/rejected": -219.11929321289062, |
|
"loss": 0.5664, |
|
"rewards/accuracies": 0.729687511920929, |
|
"rewards/chosen": -0.07148631662130356, |
|
"rewards/margins": 0.4374977946281433, |
|
"rewards/rejected": -0.5089840888977051, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.647779479326186e-08, |
|
"logits/chosen": -2.3929615020751953, |
|
"logits/rejected": -2.3882527351379395, |
|
"logps/chosen": -251.06576538085938, |
|
"logps/rejected": -224.4808807373047, |
|
"loss": 0.5768, |
|
"rewards/accuracies": 0.667187511920929, |
|
"rewards/chosen": -0.06799022853374481, |
|
"rewards/margins": 0.3539872467517853, |
|
"rewards/rejected": -0.42197751998901367, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.88208269525268e-08, |
|
"logits/chosen": -2.3815102577209473, |
|
"logits/rejected": -2.3912739753723145, |
|
"logps/chosen": -260.7129821777344, |
|
"logps/rejected": -223.4461212158203, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.06933742761611938, |
|
"rewards/margins": 0.41847410798072815, |
|
"rewards/rejected": -0.48781150579452515, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.116385911179173e-08, |
|
"logits/chosen": -2.3711135387420654, |
|
"logits/rejected": -2.3626675605773926, |
|
"logps/chosen": -279.54461669921875, |
|
"logps/rejected": -219.48974609375, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.023505648598074913, |
|
"rewards/margins": 0.4627605378627777, |
|
"rewards/rejected": -0.4862661361694336, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.350689127105667e-08, |
|
"logits/chosen": -2.4528985023498535, |
|
"logits/rejected": -2.3787388801574707, |
|
"logps/chosen": -272.67572021484375, |
|
"logps/rejected": -232.7178192138672, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.0497988685965538, |
|
"rewards/margins": 0.46719294786453247, |
|
"rewards/rejected": -0.5169917941093445, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.584992343032159e-08, |
|
"logits/chosen": -2.355626106262207, |
|
"logits/rejected": -2.342153549194336, |
|
"logps/chosen": -263.1979675292969, |
|
"logps/rejected": -229.1007843017578, |
|
"loss": 0.5752, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.04885732755064964, |
|
"rewards/margins": 0.42579683661460876, |
|
"rewards/rejected": -0.4746541380882263, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.819295558958652e-08, |
|
"logits/chosen": -2.3994874954223633, |
|
"logits/rejected": -2.34912109375, |
|
"logps/chosen": -288.324462890625, |
|
"logps/rejected": -222.3997344970703, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.7484375238418579, |
|
"rewards/chosen": -0.021456807851791382, |
|
"rewards/margins": 0.5244570374488831, |
|
"rewards/rejected": -0.545913815498352, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.0535987748851455e-08, |
|
"logits/chosen": -2.4647653102874756, |
|
"logits/rejected": -2.4115538597106934, |
|
"logps/chosen": -275.77947998046875, |
|
"logps/rejected": -231.9734649658203, |
|
"loss": 0.5621, |
|
"rewards/accuracies": 0.7015625238418579, |
|
"rewards/chosen": -0.07127931714057922, |
|
"rewards/margins": 0.41370710730552673, |
|
"rewards/rejected": -0.48498645424842834, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.287901990811638e-08, |
|
"logits/chosen": -2.4428927898406982, |
|
"logits/rejected": -2.3569588661193848, |
|
"logps/chosen": -268.5105285644531, |
|
"logps/rejected": -244.9532928466797, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.048072461038827896, |
|
"rewards/margins": 0.46722808480262756, |
|
"rewards/rejected": -0.5153006315231323, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.522205206738132e-08, |
|
"logits/chosen": -2.4071362018585205, |
|
"logits/rejected": -2.3730602264404297, |
|
"logps/chosen": -275.3606872558594, |
|
"logps/rejected": -230.1616668701172, |
|
"loss": 0.5682, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.076107919216156, |
|
"rewards/margins": 0.44682103395462036, |
|
"rewards/rejected": -0.5229289531707764, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.7565084226646246e-08, |
|
"logits/chosen": -2.3889846801757812, |
|
"logits/rejected": -2.376112937927246, |
|
"logps/chosen": -264.30804443359375, |
|
"logps/rejected": -236.71640014648438, |
|
"loss": 0.5747, |
|
"rewards/accuracies": 0.7109375, |
|
"rewards/chosen": -0.04645932838320732, |
|
"rewards/margins": 0.4625559449195862, |
|
"rewards/rejected": -0.5090152621269226, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.9908116385911178e-08, |
|
"logits/chosen": -2.4064643383026123, |
|
"logits/rejected": -2.388768434524536, |
|
"logps/chosen": -260.59393310546875, |
|
"logps/rejected": -227.6177215576172, |
|
"loss": 0.5718, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.09053535759449005, |
|
"rewards/margins": 0.3823908865451813, |
|
"rewards/rejected": -0.47292619943618774, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.225114854517611e-08, |
|
"logits/chosen": -2.436859130859375, |
|
"logits/rejected": -2.3719522953033447, |
|
"logps/chosen": -280.87774658203125, |
|
"logps/rejected": -221.83944702148438, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.06765580177307129, |
|
"rewards/margins": 0.4343256950378418, |
|
"rewards/rejected": -0.5019814968109131, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.594180704441042e-09, |
|
"logits/chosen": -2.4007372856140137, |
|
"logits/rejected": -2.378627300262451, |
|
"logps/chosen": -257.9851989746094, |
|
"logps/rejected": -225.0294189453125, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.053695209324359894, |
|
"rewards/margins": 0.4172247052192688, |
|
"rewards/rejected": -0.4709199070930481, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -2.1214964389801025, |
|
"eval_logits/rejected": -2.000164031982422, |
|
"eval_logps/chosen": -265.42364501953125, |
|
"eval_logps/rejected": -224.6269989013672, |
|
"eval_loss": 0.5655443072319031, |
|
"eval_rewards/accuracies": 0.699999988079071, |
|
"eval_rewards/chosen": -0.07451467216014862, |
|
"eval_rewards/margins": 0.4583480656147003, |
|
"eval_rewards/rejected": -0.5328627228736877, |
|
"eval_runtime": 278.3863, |
|
"eval_samples_per_second": 7.184, |
|
"eval_steps_per_second": 0.449, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 726, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6037390495627379, |
|
"train_runtime": 36278.6969, |
|
"train_samples_per_second": 5.124, |
|
"train_steps_per_second": 0.02 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 726, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|