|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9980364656381484, |
|
"eval_steps": 100, |
|
"global_step": 2004, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04488078541374474, |
|
"grad_norm": 4.790558815002441, |
|
"learning_rate": 9.850299401197606e-05, |
|
"logits/chosen": -3.3742988109588623, |
|
"logits/rejected": -3.0817112922668457, |
|
"logps/chosen": -273.48614501953125, |
|
"logps/rejected": -234.3329315185547, |
|
"loss": 0.6831, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.0712718814611435, |
|
"rewards/margins": 0.024287192150950432, |
|
"rewards/rejected": 0.04698468744754791, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08976157082748948, |
|
"grad_norm": 5.551278591156006, |
|
"learning_rate": 9.700598802395209e-05, |
|
"logits/chosen": -3.378220558166504, |
|
"logits/rejected": -3.129826307296753, |
|
"logps/chosen": -267.0759582519531, |
|
"logps/rejected": -238.60873413085938, |
|
"loss": 0.6691, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": 0.21243497729301453, |
|
"rewards/margins": 0.07040555775165558, |
|
"rewards/rejected": 0.14202943444252014, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13464235624123422, |
|
"grad_norm": 7.342463493347168, |
|
"learning_rate": 9.550898203592816e-05, |
|
"logits/chosen": -3.3940162658691406, |
|
"logits/rejected": -3.142778158187866, |
|
"logps/chosen": -267.77581787109375, |
|
"logps/rejected": -233.1001434326172, |
|
"loss": 0.6586, |
|
"rewards/accuracies": 0.6145833134651184, |
|
"rewards/chosen": 0.2707298994064331, |
|
"rewards/margins": 0.12286876887083054, |
|
"rewards/rejected": 0.14786113798618317, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1496026180458158, |
|
"eval_logits/chosen": -3.403446674346924, |
|
"eval_logits/rejected": -3.1215860843658447, |
|
"eval_logps/chosen": -268.9533386230469, |
|
"eval_logps/rejected": -229.84756469726562, |
|
"eval_loss": 0.6490052342414856, |
|
"eval_rewards/accuracies": 0.6090127229690552, |
|
"eval_rewards/chosen": 0.2928723990917206, |
|
"eval_rewards/margins": 0.17060735821723938, |
|
"eval_rewards/rejected": 0.12226507067680359, |
|
"eval_runtime": 1689.4226, |
|
"eval_samples_per_second": 3.166, |
|
"eval_steps_per_second": 3.166, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17952314165497896, |
|
"grad_norm": 7.352132797241211, |
|
"learning_rate": 9.40119760479042e-05, |
|
"logits/chosen": -3.416881561279297, |
|
"logits/rejected": -3.130246877670288, |
|
"logps/chosen": -271.2243347167969, |
|
"logps/rejected": -233.06614685058594, |
|
"loss": 0.6385, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.295549601316452, |
|
"rewards/margins": 0.20651350915431976, |
|
"rewards/rejected": 0.08903612196445465, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2244039270687237, |
|
"grad_norm": 7.124303340911865, |
|
"learning_rate": 9.251497005988024e-05, |
|
"logits/chosen": -3.3911712169647217, |
|
"logits/rejected": -3.1303460597991943, |
|
"logps/chosen": -265.11651611328125, |
|
"logps/rejected": -234.63478088378906, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.6322916746139526, |
|
"rewards/chosen": 0.23116879165172577, |
|
"rewards/margins": 0.23007448017597198, |
|
"rewards/rejected": 0.0010943154338747263, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.26928471248246844, |
|
"grad_norm": 6.228757381439209, |
|
"learning_rate": 9.101796407185628e-05, |
|
"logits/chosen": -3.4022183418273926, |
|
"logits/rejected": -3.151683807373047, |
|
"logps/chosen": -267.1882019042969, |
|
"logps/rejected": -232.48471069335938, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.6322916746139526, |
|
"rewards/chosen": 0.3013507127761841, |
|
"rewards/margins": 0.20289267599582672, |
|
"rewards/rejected": 0.09845803678035736, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2992052360916316, |
|
"eval_logits/chosen": -3.4198923110961914, |
|
"eval_logits/rejected": -3.143324613571167, |
|
"eval_logps/chosen": -268.99163818359375, |
|
"eval_logps/rejected": -230.40538024902344, |
|
"eval_loss": 0.6396481394767761, |
|
"eval_rewards/accuracies": 0.6142483353614807, |
|
"eval_rewards/chosen": 0.2890413999557495, |
|
"eval_rewards/margins": 0.22255805134773254, |
|
"eval_rewards/rejected": 0.06648338586091995, |
|
"eval_runtime": 1688.7699, |
|
"eval_samples_per_second": 3.167, |
|
"eval_steps_per_second": 3.167, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3141654978962132, |
|
"grad_norm": 6.4461493492126465, |
|
"learning_rate": 8.952095808383235e-05, |
|
"logits/chosen": -3.434596300125122, |
|
"logits/rejected": -3.132395029067993, |
|
"logps/chosen": -267.54437255859375, |
|
"logps/rejected": -224.43540954589844, |
|
"loss": 0.638, |
|
"rewards/accuracies": 0.6197916865348816, |
|
"rewards/chosen": 0.2697771489620209, |
|
"rewards/margins": 0.2309388816356659, |
|
"rewards/rejected": 0.03883826732635498, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3590462833099579, |
|
"grad_norm": 4.610179424285889, |
|
"learning_rate": 8.80239520958084e-05, |
|
"logits/chosen": -3.428438901901245, |
|
"logits/rejected": -3.169167995452881, |
|
"logps/chosen": -265.898193359375, |
|
"logps/rejected": -230.3724822998047, |
|
"loss": 0.6406, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": 0.28266531229019165, |
|
"rewards/margins": 0.23549547791481018, |
|
"rewards/rejected": 0.04716984182596207, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.40392706872370265, |
|
"grad_norm": 5.838982582092285, |
|
"learning_rate": 8.652694610778443e-05, |
|
"logits/chosen": -3.4263041019439697, |
|
"logits/rejected": -3.1684255599975586, |
|
"logps/chosen": -267.129150390625, |
|
"logps/rejected": -233.6484832763672, |
|
"loss": 0.6251, |
|
"rewards/accuracies": 0.6302083134651184, |
|
"rewards/chosen": 0.1708066761493683, |
|
"rewards/margins": 0.28462281823158264, |
|
"rewards/rejected": -0.11381613463163376, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4488078541374474, |
|
"grad_norm": 5.045168876647949, |
|
"learning_rate": 8.502994011976048e-05, |
|
"logits/chosen": -3.439959764480591, |
|
"logits/rejected": -3.173079490661621, |
|
"logps/chosen": -273.4431457519531, |
|
"logps/rejected": -236.24371337890625, |
|
"loss": 0.6327, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.17748431861400604, |
|
"rewards/margins": 0.269964337348938, |
|
"rewards/rejected": -0.09247999638319016, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4488078541374474, |
|
"eval_logits/chosen": -3.424139976501465, |
|
"eval_logits/rejected": -3.150641918182373, |
|
"eval_logps/chosen": -269.5113525390625, |
|
"eval_logps/rejected": -231.47146606445312, |
|
"eval_loss": 0.6353974938392639, |
|
"eval_rewards/accuracies": 0.6181750297546387, |
|
"eval_rewards/chosen": 0.23706810176372528, |
|
"eval_rewards/margins": 0.2771916091442108, |
|
"eval_rewards/rejected": -0.04012349247932434, |
|
"eval_runtime": 1688.647, |
|
"eval_samples_per_second": 3.167, |
|
"eval_steps_per_second": 3.167, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.49368863955119213, |
|
"grad_norm": 5.068808555603027, |
|
"learning_rate": 8.353293413173653e-05, |
|
"logits/chosen": -3.4144017696380615, |
|
"logits/rejected": -3.1683106422424316, |
|
"logps/chosen": -272.5228271484375, |
|
"logps/rejected": -239.20681762695312, |
|
"loss": 0.646, |
|
"rewards/accuracies": 0.6208333373069763, |
|
"rewards/chosen": 0.22192205488681793, |
|
"rewards/margins": 0.24012483656406403, |
|
"rewards/rejected": -0.018202781677246094, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5385694249649369, |
|
"grad_norm": 6.0258941650390625, |
|
"learning_rate": 8.203592814371259e-05, |
|
"logits/chosen": -3.4079012870788574, |
|
"logits/rejected": -3.1440396308898926, |
|
"logps/chosen": -276.3011474609375, |
|
"logps/rejected": -235.62054443359375, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.6270833611488342, |
|
"rewards/chosen": 0.09719991683959961, |
|
"rewards/margins": 0.2948659658432007, |
|
"rewards/rejected": -0.19766603410243988, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5834502103786816, |
|
"grad_norm": 5.713747024536133, |
|
"learning_rate": 8.053892215568862e-05, |
|
"logits/chosen": -3.3723533153533936, |
|
"logits/rejected": -3.1148476600646973, |
|
"logps/chosen": -274.2776794433594, |
|
"logps/rejected": -234.34136962890625, |
|
"loss": 0.6342, |
|
"rewards/accuracies": 0.6270833611488342, |
|
"rewards/chosen": 0.17114956676959991, |
|
"rewards/margins": 0.29201894998550415, |
|
"rewards/rejected": -0.12086938321590424, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5984104721832632, |
|
"eval_logits/chosen": -3.390080451965332, |
|
"eval_logits/rejected": -3.111392021179199, |
|
"eval_logps/chosen": -268.8110656738281, |
|
"eval_logps/rejected": -230.63925170898438, |
|
"eval_loss": 0.6309967041015625, |
|
"eval_rewards/accuracies": 0.630142092704773, |
|
"eval_rewards/chosen": 0.30709749460220337, |
|
"eval_rewards/margins": 0.2640005946159363, |
|
"eval_rewards/rejected": 0.04309689626097679, |
|
"eval_runtime": 1688.4508, |
|
"eval_samples_per_second": 3.167, |
|
"eval_steps_per_second": 3.167, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6283309957924264, |
|
"grad_norm": 5.401741027832031, |
|
"learning_rate": 7.904191616766467e-05, |
|
"logits/chosen": -3.389784574508667, |
|
"logits/rejected": -3.113330602645874, |
|
"logps/chosen": -270.7179260253906, |
|
"logps/rejected": -234.7329864501953, |
|
"loss": 0.6352, |
|
"rewards/accuracies": 0.6260416507720947, |
|
"rewards/chosen": 0.2668881416320801, |
|
"rewards/margins": 0.25750893354415894, |
|
"rewards/rejected": 0.009379198774695396, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6732117812061711, |
|
"grad_norm": 6.569407939910889, |
|
"learning_rate": 7.754491017964072e-05, |
|
"logits/chosen": -3.420933246612549, |
|
"logits/rejected": -3.107722520828247, |
|
"logps/chosen": -279.6606750488281, |
|
"logps/rejected": -232.16326904296875, |
|
"loss": 0.6152, |
|
"rewards/accuracies": 0.6395833492279053, |
|
"rewards/chosen": 0.23483921587467194, |
|
"rewards/margins": 0.30834510922431946, |
|
"rewards/rejected": -0.07350588589906693, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7180925666199158, |
|
"grad_norm": 4.889843463897705, |
|
"learning_rate": 7.604790419161677e-05, |
|
"logits/chosen": -3.4380805492401123, |
|
"logits/rejected": -3.1253116130828857, |
|
"logps/chosen": -279.8207092285156, |
|
"logps/rejected": -233.9474639892578, |
|
"loss": 0.612, |
|
"rewards/accuracies": 0.6697916388511658, |
|
"rewards/chosen": 0.06458248198032379, |
|
"rewards/margins": 0.378538578748703, |
|
"rewards/rejected": -0.3139561414718628, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.748013090229079, |
|
"eval_logits/chosen": -3.419874429702759, |
|
"eval_logits/rejected": -3.1424779891967773, |
|
"eval_logps/chosen": -270.2199401855469, |
|
"eval_logps/rejected": -232.4933319091797, |
|
"eval_loss": 0.6269693374633789, |
|
"eval_rewards/accuracies": 0.627711296081543, |
|
"eval_rewards/chosen": 0.16621026396751404, |
|
"eval_rewards/margins": 0.3085208237171173, |
|
"eval_rewards/rejected": -0.14231054484844208, |
|
"eval_runtime": 1688.3228, |
|
"eval_samples_per_second": 3.168, |
|
"eval_steps_per_second": 3.168, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7629733520336606, |
|
"grad_norm": 4.36655330657959, |
|
"learning_rate": 7.455089820359282e-05, |
|
"logits/chosen": -3.4343178272247314, |
|
"logits/rejected": -3.1612308025360107, |
|
"logps/chosen": -272.99578857421875, |
|
"logps/rejected": -234.4145965576172, |
|
"loss": 0.629, |
|
"rewards/accuracies": 0.6427083611488342, |
|
"rewards/chosen": 0.13686993718147278, |
|
"rewards/margins": 0.3045249283313751, |
|
"rewards/rejected": -0.16765499114990234, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8078541374474053, |
|
"grad_norm": 4.971264839172363, |
|
"learning_rate": 7.305389221556886e-05, |
|
"logits/chosen": -3.4246087074279785, |
|
"logits/rejected": -3.172884464263916, |
|
"logps/chosen": -267.14556884765625, |
|
"logps/rejected": -233.85691833496094, |
|
"loss": 0.6269, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 0.22364649176597595, |
|
"rewards/margins": 0.28497254848480225, |
|
"rewards/rejected": -0.06132606416940689, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.85273492286115, |
|
"grad_norm": 5.077197074890137, |
|
"learning_rate": 7.155688622754491e-05, |
|
"logits/chosen": -3.4349772930145264, |
|
"logits/rejected": -3.1722497940063477, |
|
"logps/chosen": -268.02630615234375, |
|
"logps/rejected": -231.99020385742188, |
|
"loss": 0.63, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": 0.2208840399980545, |
|
"rewards/margins": 0.2860158383846283, |
|
"rewards/rejected": -0.06513180583715439, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.8976157082748948, |
|
"grad_norm": 4.760651111602783, |
|
"learning_rate": 7.005988023952096e-05, |
|
"logits/chosen": -3.4018094539642334, |
|
"logits/rejected": -3.1606853008270264, |
|
"logps/chosen": -268.86090087890625, |
|
"logps/rejected": -233.84007263183594, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.6208333373069763, |
|
"rewards/chosen": 0.25363439321517944, |
|
"rewards/margins": 0.2553554177284241, |
|
"rewards/rejected": -0.0017210314981639385, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8976157082748948, |
|
"eval_logits/chosen": -3.4228434562683105, |
|
"eval_logits/rejected": -3.145069122314453, |
|
"eval_logps/chosen": -269.40740966796875, |
|
"eval_logps/rejected": -231.58685302734375, |
|
"eval_loss": 0.6246524453163147, |
|
"eval_rewards/accuracies": 0.6312640309333801, |
|
"eval_rewards/chosen": 0.24746553599834442, |
|
"eval_rewards/margins": 0.2991257905960083, |
|
"eval_rewards/rejected": -0.051660239696502686, |
|
"eval_runtime": 1688.5162, |
|
"eval_samples_per_second": 3.167, |
|
"eval_steps_per_second": 3.167, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9424964936886395, |
|
"grad_norm": 5.144285678863525, |
|
"learning_rate": 6.856287425149701e-05, |
|
"logits/chosen": -3.4329488277435303, |
|
"logits/rejected": -3.1452276706695557, |
|
"logps/chosen": -269.5411376953125, |
|
"logps/rejected": -228.07046508789062, |
|
"loss": 0.6185, |
|
"rewards/accuracies": 0.6260416507720947, |
|
"rewards/chosen": 0.26634886860847473, |
|
"rewards/margins": 0.3162167966365814, |
|
"rewards/rejected": -0.049867913126945496, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.9873772791023843, |
|
"grad_norm": 4.551113128662109, |
|
"learning_rate": 6.706586826347305e-05, |
|
"logits/chosen": -3.435673713684082, |
|
"logits/rejected": -3.1743414402008057, |
|
"logps/chosen": -273.6510314941406, |
|
"logps/rejected": -241.5527801513672, |
|
"loss": 0.6236, |
|
"rewards/accuracies": 0.6364583373069763, |
|
"rewards/chosen": 0.11331641674041748, |
|
"rewards/margins": 0.32483571767807007, |
|
"rewards/rejected": -0.2115192860364914, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.032258064516129, |
|
"grad_norm": 4.672567844390869, |
|
"learning_rate": 6.55688622754491e-05, |
|
"logits/chosen": -3.4276251792907715, |
|
"logits/rejected": -3.1490509510040283, |
|
"logps/chosen": -269.5851135253906, |
|
"logps/rejected": -237.02989196777344, |
|
"loss": 0.5554, |
|
"rewards/accuracies": 0.7302083373069763, |
|
"rewards/chosen": 0.08210794627666473, |
|
"rewards/margins": 0.49889788031578064, |
|
"rewards/rejected": -0.4167899191379547, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.0472183263207107, |
|
"eval_logits/chosen": -3.4185428619384766, |
|
"eval_logits/rejected": -3.1414475440979004, |
|
"eval_logps/chosen": -270.3139343261719, |
|
"eval_logps/rejected": -232.80120849609375, |
|
"eval_loss": 0.6221644282341003, |
|
"eval_rewards/accuracies": 0.6338818073272705, |
|
"eval_rewards/chosen": 0.15681201219558716, |
|
"eval_rewards/margins": 0.3299100995063782, |
|
"eval_rewards/rejected": -0.17309808731079102, |
|
"eval_runtime": 1688.7954, |
|
"eval_samples_per_second": 3.167, |
|
"eval_steps_per_second": 3.167, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0771388499298737, |
|
"grad_norm": 4.2797369956970215, |
|
"learning_rate": 6.407185628742515e-05, |
|
"logits/chosen": -3.445012092590332, |
|
"logits/rejected": -3.1330997943878174, |
|
"logps/chosen": -266.95782470703125, |
|
"logps/rejected": -227.27879333496094, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.7635416388511658, |
|
"rewards/chosen": 0.25232160091400146, |
|
"rewards/margins": 0.5462218523025513, |
|
"rewards/rejected": -0.2939002215862274, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1220196353436185, |
|
"grad_norm": 5.101881980895996, |
|
"learning_rate": 6.25748502994012e-05, |
|
"logits/chosen": -3.425431728363037, |
|
"logits/rejected": -3.1551194190979004, |
|
"logps/chosen": -271.9197082519531, |
|
"logps/rejected": -233.2086639404297, |
|
"loss": 0.5308, |
|
"rewards/accuracies": 0.7385416626930237, |
|
"rewards/chosen": 0.2957630157470703, |
|
"rewards/margins": 0.5949270129203796, |
|
"rewards/rejected": -0.29916396737098694, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.1669004207573632, |
|
"grad_norm": 5.360141754150391, |
|
"learning_rate": 6.107784431137725e-05, |
|
"logits/chosen": -3.4009079933166504, |
|
"logits/rejected": -3.1200320720672607, |
|
"logps/chosen": -272.1022644042969, |
|
"logps/rejected": -236.18499755859375, |
|
"loss": 0.5226, |
|
"rewards/accuracies": 0.7520833611488342, |
|
"rewards/chosen": 0.30986490845680237, |
|
"rewards/margins": 0.59710294008255, |
|
"rewards/rejected": -0.2872380018234253, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.1968209443665265, |
|
"eval_logits/chosen": -3.405834674835205, |
|
"eval_logits/rejected": -3.1333200931549072, |
|
"eval_logps/chosen": -272.1116638183594, |
|
"eval_logps/rejected": -235.3762664794922, |
|
"eval_loss": 0.6281805038452148, |
|
"eval_rewards/accuracies": 0.6335078477859497, |
|
"eval_rewards/chosen": -0.022958112880587578, |
|
"eval_rewards/margins": 0.40764307975769043, |
|
"eval_rewards/rejected": -0.43060120940208435, |
|
"eval_runtime": 1688.3487, |
|
"eval_samples_per_second": 3.168, |
|
"eval_steps_per_second": 3.168, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.211781206171108, |
|
"grad_norm": 5.8285088539123535, |
|
"learning_rate": 5.95808383233533e-05, |
|
"logits/chosen": -3.3979651927948, |
|
"logits/rejected": -3.1520321369171143, |
|
"logps/chosen": -274.0641174316406, |
|
"logps/rejected": -240.42205810546875, |
|
"loss": 0.5402, |
|
"rewards/accuracies": 0.7354166507720947, |
|
"rewards/chosen": 0.1310122311115265, |
|
"rewards/margins": 0.5752567052841187, |
|
"rewards/rejected": -0.44424447417259216, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.2566619915848527, |
|
"grad_norm": 5.5216851234436035, |
|
"learning_rate": 5.808383233532935e-05, |
|
"logits/chosen": -3.4025676250457764, |
|
"logits/rejected": -3.1448330879211426, |
|
"logps/chosen": -274.1934509277344, |
|
"logps/rejected": -243.01490783691406, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.7552083134651184, |
|
"rewards/chosen": 0.14752289652824402, |
|
"rewards/margins": 0.626766562461853, |
|
"rewards/rejected": -0.479243665933609, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.3015427769985974, |
|
"grad_norm": 5.673742294311523, |
|
"learning_rate": 5.6586826347305385e-05, |
|
"logits/chosen": -3.3895277976989746, |
|
"logits/rejected": -3.1401288509368896, |
|
"logps/chosen": -273.1130676269531, |
|
"logps/rejected": -241.987060546875, |
|
"loss": 0.5497, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.02567141316831112, |
|
"rewards/margins": 0.5613437294960022, |
|
"rewards/rejected": -0.5356722474098206, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.3464235624123422, |
|
"grad_norm": 6.6557440757751465, |
|
"learning_rate": 5.508982035928144e-05, |
|
"logits/chosen": -3.3836898803710938, |
|
"logits/rejected": -3.1433603763580322, |
|
"logps/chosen": -266.1312561035156, |
|
"logps/rejected": -238.70309448242188, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.7322916388511658, |
|
"rewards/chosen": 0.016369260847568512, |
|
"rewards/margins": 0.563011109828949, |
|
"rewards/rejected": -0.5466418862342834, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3464235624123422, |
|
"eval_logits/chosen": -3.3754773139953613, |
|
"eval_logits/rejected": -3.106959819793701, |
|
"eval_logps/chosen": -271.97869873046875, |
|
"eval_logps/rejected": -234.78985595703125, |
|
"eval_loss": 0.629611074924469, |
|
"eval_rewards/accuracies": 0.6299551129341125, |
|
"eval_rewards/chosen": -0.009662697091698647, |
|
"eval_rewards/margins": 0.3622985780239105, |
|
"eval_rewards/rejected": -0.3719612658023834, |
|
"eval_runtime": 1688.3293, |
|
"eval_samples_per_second": 3.168, |
|
"eval_steps_per_second": 3.168, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.391304347826087, |
|
"grad_norm": 5.2359724044799805, |
|
"learning_rate": 5.359281437125748e-05, |
|
"logits/chosen": -3.3651976585388184, |
|
"logits/rejected": -3.123444080352783, |
|
"logps/chosen": -271.6989440917969, |
|
"logps/rejected": -236.84417724609375, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": 0.1108192428946495, |
|
"rewards/margins": 0.5575817823410034, |
|
"rewards/rejected": -0.4467625319957733, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.4361851332398317, |
|
"grad_norm": 5.669713497161865, |
|
"learning_rate": 5.209580838323354e-05, |
|
"logits/chosen": -3.3611793518066406, |
|
"logits/rejected": -3.099807024002075, |
|
"logps/chosen": -274.7862243652344, |
|
"logps/rejected": -237.52139282226562, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.7260416746139526, |
|
"rewards/chosen": 0.027847904711961746, |
|
"rewards/margins": 0.5692722797393799, |
|
"rewards/rejected": -0.5414243936538696, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.4810659186535764, |
|
"grad_norm": 6.500946044921875, |
|
"learning_rate": 5.059880239520959e-05, |
|
"logits/chosen": -3.3827614784240723, |
|
"logits/rejected": -3.09436297416687, |
|
"logps/chosen": -276.1948547363281, |
|
"logps/rejected": -238.23243713378906, |
|
"loss": 0.5235, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.051430441439151764, |
|
"rewards/margins": 0.6151652932167053, |
|
"rewards/rejected": -0.6665957570075989, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.496026180458158, |
|
"eval_logits/chosen": -3.366751194000244, |
|
"eval_logits/rejected": -3.1013710498809814, |
|
"eval_logps/chosen": -272.0386047363281, |
|
"eval_logps/rejected": -234.93768310546875, |
|
"eval_loss": 0.628265380859375, |
|
"eval_rewards/accuracies": 0.6325729489326477, |
|
"eval_rewards/chosen": -0.01565566658973694, |
|
"eval_rewards/margins": 0.37109050154685974, |
|
"eval_rewards/rejected": -0.38674619793891907, |
|
"eval_runtime": 1688.5815, |
|
"eval_samples_per_second": 3.167, |
|
"eval_steps_per_second": 3.167, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5259467040673211, |
|
"grad_norm": 5.458423614501953, |
|
"learning_rate": 4.910179640718563e-05, |
|
"logits/chosen": -3.3471567630767822, |
|
"logits/rejected": -3.1277146339416504, |
|
"logps/chosen": -269.085205078125, |
|
"logps/rejected": -243.0161895751953, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.7489583492279053, |
|
"rewards/chosen": 0.08274559676647186, |
|
"rewards/margins": 0.5906849503517151, |
|
"rewards/rejected": -0.5079393982887268, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.5708274894810659, |
|
"grad_norm": 7.57076358795166, |
|
"learning_rate": 4.7604790419161675e-05, |
|
"logits/chosen": -3.3720383644104004, |
|
"logits/rejected": -3.0789096355438232, |
|
"logps/chosen": -266.96270751953125, |
|
"logps/rejected": -228.08006286621094, |
|
"loss": 0.5331, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.014148912392556667, |
|
"rewards/margins": 0.5797646045684814, |
|
"rewards/rejected": -0.5656156539916992, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.6157082748948106, |
|
"grad_norm": 5.330569744110107, |
|
"learning_rate": 4.610778443113773e-05, |
|
"logits/chosen": -3.359792947769165, |
|
"logits/rejected": -3.09041166305542, |
|
"logps/chosen": -276.38226318359375, |
|
"logps/rejected": -243.1844024658203, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.7395833134651184, |
|
"rewards/chosen": 0.06125294789671898, |
|
"rewards/margins": 0.6237131953239441, |
|
"rewards/rejected": -0.56246018409729, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.645628798503974, |
|
"eval_logits/chosen": -3.3514223098754883, |
|
"eval_logits/rejected": -3.0870354175567627, |
|
"eval_logps/chosen": -271.9643859863281, |
|
"eval_logps/rejected": -234.57383728027344, |
|
"eval_loss": 0.6333222389221191, |
|
"eval_rewards/accuracies": 0.620792806148529, |
|
"eval_rewards/chosen": -0.008234047330915928, |
|
"eval_rewards/margins": 0.34212782979011536, |
|
"eval_rewards/rejected": -0.3503618538379669, |
|
"eval_runtime": 1688.4687, |
|
"eval_samples_per_second": 3.167, |
|
"eval_steps_per_second": 3.167, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6605890603085554, |
|
"grad_norm": 6.506576061248779, |
|
"learning_rate": 4.4610778443113777e-05, |
|
"logits/chosen": -3.3484690189361572, |
|
"logits/rejected": -3.130286455154419, |
|
"logps/chosen": -273.8847961425781, |
|
"logps/rejected": -247.9475860595703, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.7322916388511658, |
|
"rewards/chosen": 0.10436714440584183, |
|
"rewards/margins": 0.5445392727851868, |
|
"rewards/rejected": -0.44017213582992554, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.7054698457223, |
|
"grad_norm": 6.028050422668457, |
|
"learning_rate": 4.311377245508982e-05, |
|
"logits/chosen": -3.3555943965911865, |
|
"logits/rejected": -3.125157356262207, |
|
"logps/chosen": -267.73516845703125, |
|
"logps/rejected": -236.43356323242188, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.7354166507720947, |
|
"rewards/chosen": -0.052730146795511246, |
|
"rewards/margins": 0.5250240564346313, |
|
"rewards/rejected": -0.5777541995048523, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.7503506311360448, |
|
"grad_norm": 6.8205647468566895, |
|
"learning_rate": 4.161676646706587e-05, |
|
"logits/chosen": -3.383364677429199, |
|
"logits/rejected": -3.1156816482543945, |
|
"logps/chosen": -273.9105529785156, |
|
"logps/rejected": -237.84432983398438, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.06969426572322845, |
|
"rewards/margins": 0.6354466676712036, |
|
"rewards/rejected": -0.705141007900238, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.7952314165497896, |
|
"grad_norm": 6.074138641357422, |
|
"learning_rate": 4.0119760479041915e-05, |
|
"logits/chosen": -3.391815185546875, |
|
"logits/rejected": -3.1276473999023438, |
|
"logps/chosen": -279.9575500488281, |
|
"logps/rejected": -244.89010620117188, |
|
"loss": 0.5156, |
|
"rewards/accuracies": 0.7479166388511658, |
|
"rewards/chosen": -0.07707042992115021, |
|
"rewards/margins": 0.6421669125556946, |
|
"rewards/rejected": -0.7192373871803284, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.7952314165497896, |
|
"eval_logits/chosen": -3.379970073699951, |
|
"eval_logits/rejected": -3.1176722049713135, |
|
"eval_logps/chosen": -274.0703430175781, |
|
"eval_logps/rejected": -237.28604125976562, |
|
"eval_loss": 0.6306100487709045, |
|
"eval_rewards/accuracies": 0.6350037455558777, |
|
"eval_rewards/chosen": -0.21883098781108856, |
|
"eval_rewards/margins": 0.4027484953403473, |
|
"eval_rewards/rejected": -0.6215794086456299, |
|
"eval_runtime": 1688.6398, |
|
"eval_samples_per_second": 3.167, |
|
"eval_steps_per_second": 3.167, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8401122019635343, |
|
"grad_norm": 5.888485431671143, |
|
"learning_rate": 3.8622754491017966e-05, |
|
"logits/chosen": -3.3851656913757324, |
|
"logits/rejected": -3.1222336292266846, |
|
"logps/chosen": -272.52117919921875, |
|
"logps/rejected": -237.61158752441406, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.7416666746139526, |
|
"rewards/chosen": -0.14584079384803772, |
|
"rewards/margins": 0.6132307052612305, |
|
"rewards/rejected": -0.7590714693069458, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.884992987377279, |
|
"grad_norm": 6.371288299560547, |
|
"learning_rate": 3.712574850299401e-05, |
|
"logits/chosen": -3.379087209701538, |
|
"logits/rejected": -3.1119582653045654, |
|
"logps/chosen": -273.9693603515625, |
|
"logps/rejected": -238.9442901611328, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.7614583373069763, |
|
"rewards/chosen": -0.13823945820331573, |
|
"rewards/margins": 0.6301066279411316, |
|
"rewards/rejected": -0.7683460116386414, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.9298737727910238, |
|
"grad_norm": 6.35048246383667, |
|
"learning_rate": 3.562874251497006e-05, |
|
"logits/chosen": -3.3981616497039795, |
|
"logits/rejected": -3.162132740020752, |
|
"logps/chosen": -268.2223205566406, |
|
"logps/rejected": -237.77635192871094, |
|
"loss": 0.5352, |
|
"rewards/accuracies": 0.7364583611488342, |
|
"rewards/chosen": -0.19809262454509735, |
|
"rewards/margins": 0.6094833016395569, |
|
"rewards/rejected": -0.8075758814811707, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.9448340345956054, |
|
"eval_logits/chosen": -3.3784019947052, |
|
"eval_logits/rejected": -3.116711378097534, |
|
"eval_logps/chosen": -274.5598449707031, |
|
"eval_logps/rejected": -237.6013946533203, |
|
"eval_loss": 0.6299869418144226, |
|
"eval_rewards/accuracies": 0.6327599287033081, |
|
"eval_rewards/chosen": -0.2677817940711975, |
|
"eval_rewards/margins": 0.38533419370651245, |
|
"eval_rewards/rejected": -0.6531160473823547, |
|
"eval_runtime": 1688.4156, |
|
"eval_samples_per_second": 3.167, |
|
"eval_steps_per_second": 3.167, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9747545582047685, |
|
"grad_norm": 6.9590959548950195, |
|
"learning_rate": 3.413173652694611e-05, |
|
"logits/chosen": -3.376148223876953, |
|
"logits/rejected": -3.1122324466705322, |
|
"logps/chosen": -282.5127258300781, |
|
"logps/rejected": -247.69212341308594, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -0.10552702099084854, |
|
"rewards/margins": 0.6293079257011414, |
|
"rewards/rejected": -0.7348350286483765, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.0196353436185133, |
|
"grad_norm": 5.992002010345459, |
|
"learning_rate": 3.263473053892216e-05, |
|
"logits/chosen": -3.397113800048828, |
|
"logits/rejected": -3.136545419692993, |
|
"logps/chosen": -278.75390625, |
|
"logps/rejected": -246.4496307373047, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.11011376976966858, |
|
"rewards/margins": 0.6908566355705261, |
|
"rewards/rejected": -0.8009704351425171, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.064516129032258, |
|
"grad_norm": 7.003468990325928, |
|
"learning_rate": 3.1137724550898205e-05, |
|
"logits/chosen": -3.370246410369873, |
|
"logits/rejected": -3.093019723892212, |
|
"logps/chosen": -279.3021240234375, |
|
"logps/rejected": -242.58258056640625, |
|
"loss": 0.446, |
|
"rewards/accuracies": 0.8114583492279053, |
|
"rewards/chosen": -0.05044478550553322, |
|
"rewards/margins": 0.8847902417182922, |
|
"rewards/rejected": -0.9352350234985352, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.0944366526414213, |
|
"eval_logits/chosen": -3.3703560829162598, |
|
"eval_logits/rejected": -3.111078977584839, |
|
"eval_logps/chosen": -274.6524353027344, |
|
"eval_logps/rejected": -237.6984405517578, |
|
"eval_loss": 0.6312919855117798, |
|
"eval_rewards/accuracies": 0.6325729489326477, |
|
"eval_rewards/chosen": -0.27703869342803955, |
|
"eval_rewards/margins": 0.38578376173973083, |
|
"eval_rewards/rejected": -0.6628224849700928, |
|
"eval_runtime": 1685.7321, |
|
"eval_samples_per_second": 3.173, |
|
"eval_steps_per_second": 3.173, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.1093969144460027, |
|
"grad_norm": 5.607975959777832, |
|
"learning_rate": 2.9640718562874252e-05, |
|
"logits/chosen": -3.365170955657959, |
|
"logits/rejected": -3.1272387504577637, |
|
"logps/chosen": -271.7861022949219, |
|
"logps/rejected": -242.59573364257812, |
|
"loss": 0.4698, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.10354464501142502, |
|
"rewards/margins": 0.7553961277008057, |
|
"rewards/rejected": -0.8589407801628113, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.1542776998597475, |
|
"grad_norm": 5.758065223693848, |
|
"learning_rate": 2.81437125748503e-05, |
|
"logits/chosen": -3.362076997756958, |
|
"logits/rejected": -3.1057257652282715, |
|
"logps/chosen": -267.41705322265625, |
|
"logps/rejected": -235.0531463623047, |
|
"loss": 0.4586, |
|
"rewards/accuracies": 0.8166666626930237, |
|
"rewards/chosen": -0.11131696403026581, |
|
"rewards/margins": 0.785390317440033, |
|
"rewards/rejected": -0.89670729637146, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.1991584852734922, |
|
"grad_norm": 6.8294501304626465, |
|
"learning_rate": 2.6646706586826347e-05, |
|
"logits/chosen": -3.368708610534668, |
|
"logits/rejected": -3.0964319705963135, |
|
"logps/chosen": -269.26409912109375, |
|
"logps/rejected": -237.1984405517578, |
|
"loss": 0.4496, |
|
"rewards/accuracies": 0.8145833611488342, |
|
"rewards/chosen": -0.10699882358312607, |
|
"rewards/margins": 0.8314520120620728, |
|
"rewards/rejected": -0.938450813293457, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.244039270687237, |
|
"grad_norm": 6.356990337371826, |
|
"learning_rate": 2.5149700598802394e-05, |
|
"logits/chosen": -3.374453067779541, |
|
"logits/rejected": -3.129500389099121, |
|
"logps/chosen": -271.7542724609375, |
|
"logps/rejected": -241.45423889160156, |
|
"loss": 0.4552, |
|
"rewards/accuracies": 0.8135416507720947, |
|
"rewards/chosen": -0.20287248492240906, |
|
"rewards/margins": 0.8104608058929443, |
|
"rewards/rejected": -1.0133334398269653, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.244039270687237, |
|
"eval_logits/chosen": -3.360283613204956, |
|
"eval_logits/rejected": -3.1080915927886963, |
|
"eval_logps/chosen": -276.3040466308594, |
|
"eval_logps/rejected": -239.7833251953125, |
|
"eval_loss": 0.6368128657341003, |
|
"eval_rewards/accuracies": 0.6351907253265381, |
|
"eval_rewards/chosen": -0.44220101833343506, |
|
"eval_rewards/margins": 0.4291093647480011, |
|
"eval_rewards/rejected": -0.871310293674469, |
|
"eval_runtime": 1686.3289, |
|
"eval_samples_per_second": 3.171, |
|
"eval_steps_per_second": 3.171, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.2889200561009817, |
|
"grad_norm": 6.016663551330566, |
|
"learning_rate": 2.3652694610778445e-05, |
|
"logits/chosen": -3.3569111824035645, |
|
"logits/rejected": -3.123525857925415, |
|
"logps/chosen": -274.6582946777344, |
|
"logps/rejected": -241.02252197265625, |
|
"loss": 0.4577, |
|
"rewards/accuracies": 0.7947916388511658, |
|
"rewards/chosen": -0.20317865908145905, |
|
"rewards/margins": 0.8162151575088501, |
|
"rewards/rejected": -1.0193939208984375, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.3338008415147264, |
|
"grad_norm": 5.684780120849609, |
|
"learning_rate": 2.2155688622754492e-05, |
|
"logits/chosen": -3.3533644676208496, |
|
"logits/rejected": -3.146190881729126, |
|
"logps/chosen": -271.4990234375, |
|
"logps/rejected": -242.20486450195312, |
|
"loss": 0.4674, |
|
"rewards/accuracies": 0.7989583611488342, |
|
"rewards/chosen": -0.12297000735998154, |
|
"rewards/margins": 0.8095114827156067, |
|
"rewards/rejected": -0.9324816465377808, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.378681626928471, |
|
"grad_norm": 7.419367790222168, |
|
"learning_rate": 2.065868263473054e-05, |
|
"logits/chosen": -3.364116907119751, |
|
"logits/rejected": -3.092254638671875, |
|
"logps/chosen": -270.5090026855469, |
|
"logps/rejected": -237.64964294433594, |
|
"loss": 0.4443, |
|
"rewards/accuracies": 0.8177083134651184, |
|
"rewards/chosen": -0.22570447623729706, |
|
"rewards/margins": 0.84433513879776, |
|
"rewards/rejected": -1.0700395107269287, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.393641888733053, |
|
"eval_logits/chosen": -3.354207992553711, |
|
"eval_logits/rejected": -3.103837013244629, |
|
"eval_logps/chosen": -276.166015625, |
|
"eval_logps/rejected": -239.59542846679688, |
|
"eval_loss": 0.6390828490257263, |
|
"eval_rewards/accuracies": 0.6344428062438965, |
|
"eval_rewards/chosen": -0.4283973276615143, |
|
"eval_rewards/margins": 0.42412257194519043, |
|
"eval_rewards/rejected": -0.8525198101997375, |
|
"eval_runtime": 1685.5337, |
|
"eval_samples_per_second": 3.173, |
|
"eval_steps_per_second": 3.173, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.423562412342216, |
|
"grad_norm": 7.774267196655273, |
|
"learning_rate": 1.916167664670659e-05, |
|
"logits/chosen": -3.355332851409912, |
|
"logits/rejected": -3.1059510707855225, |
|
"logps/chosen": -277.3658752441406, |
|
"logps/rejected": -247.025146484375, |
|
"loss": 0.4466, |
|
"rewards/accuracies": 0.8052083253860474, |
|
"rewards/chosen": -0.20841935276985168, |
|
"rewards/margins": 0.860171377658844, |
|
"rewards/rejected": -1.0685906410217285, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.4684431977559607, |
|
"grad_norm": 7.505038738250732, |
|
"learning_rate": 1.7664670658682637e-05, |
|
"logits/chosen": -3.345045804977417, |
|
"logits/rejected": -3.1149702072143555, |
|
"logps/chosen": -278.6654968261719, |
|
"logps/rejected": -250.63827514648438, |
|
"loss": 0.4452, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.24601925909519196, |
|
"rewards/margins": 0.8955973982810974, |
|
"rewards/rejected": -1.141616702079773, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.5133239831697054, |
|
"grad_norm": 7.055671215057373, |
|
"learning_rate": 1.6167664670658684e-05, |
|
"logits/chosen": -3.3556442260742188, |
|
"logits/rejected": -3.086568593978882, |
|
"logps/chosen": -275.1831359863281, |
|
"logps/rejected": -240.82598876953125, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.8052083253860474, |
|
"rewards/chosen": -0.29922303557395935, |
|
"rewards/margins": 0.8765833377838135, |
|
"rewards/rejected": -1.1758064031600952, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.5432445067788687, |
|
"eval_logits/chosen": -3.349193811416626, |
|
"eval_logits/rejected": -3.098674774169922, |
|
"eval_logps/chosen": -277.01483154296875, |
|
"eval_logps/rejected": -240.51805114746094, |
|
"eval_loss": 0.642005980014801, |
|
"eval_rewards/accuracies": 0.6297681331634521, |
|
"eval_rewards/chosen": -0.5132736563682556, |
|
"eval_rewards/margins": 0.4315095543861389, |
|
"eval_rewards/rejected": -0.9447831511497498, |
|
"eval_runtime": 1686.0549, |
|
"eval_samples_per_second": 3.172, |
|
"eval_steps_per_second": 3.172, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.55820476858345, |
|
"grad_norm": 7.247310638427734, |
|
"learning_rate": 1.467065868263473e-05, |
|
"logits/chosen": -3.3303916454315186, |
|
"logits/rejected": -3.118966817855835, |
|
"logps/chosen": -276.04510498046875, |
|
"logps/rejected": -250.57984924316406, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.8072916865348816, |
|
"rewards/chosen": -0.27220281958580017, |
|
"rewards/margins": 0.8313066363334656, |
|
"rewards/rejected": -1.103509545326233, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.603085553997195, |
|
"grad_norm": 6.719433784484863, |
|
"learning_rate": 1.317365269461078e-05, |
|
"logits/chosen": -3.3551132678985596, |
|
"logits/rejected": -3.1186678409576416, |
|
"logps/chosen": -277.4861755371094, |
|
"logps/rejected": -251.39437866210938, |
|
"loss": 0.455, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -0.23053057491779327, |
|
"rewards/margins": 0.8569380640983582, |
|
"rewards/rejected": -1.0874686241149902, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.6479663394109396, |
|
"grad_norm": 6.049899101257324, |
|
"learning_rate": 1.1676646706586828e-05, |
|
"logits/chosen": -3.3462002277374268, |
|
"logits/rejected": -3.0950281620025635, |
|
"logps/chosen": -279.08447265625, |
|
"logps/rejected": -243.8069305419922, |
|
"loss": 0.4414, |
|
"rewards/accuracies": 0.8072916865348816, |
|
"rewards/chosen": -0.24068358540534973, |
|
"rewards/margins": 0.8998420238494873, |
|
"rewards/rejected": -1.1405255794525146, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.6928471248246844, |
|
"grad_norm": 7.545809268951416, |
|
"learning_rate": 1.0179640718562875e-05, |
|
"logits/chosen": -3.346256732940674, |
|
"logits/rejected": -3.112372875213623, |
|
"logps/chosen": -270.18499755859375, |
|
"logps/rejected": -240.69723510742188, |
|
"loss": 0.4603, |
|
"rewards/accuracies": 0.8083333373069763, |
|
"rewards/chosen": -0.25966814160346985, |
|
"rewards/margins": 0.8120385408401489, |
|
"rewards/rejected": -1.071706771850586, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.6928471248246844, |
|
"eval_logits/chosen": -3.3438971042633057, |
|
"eval_logits/rejected": -3.0931475162506104, |
|
"eval_logps/chosen": -276.7391662597656, |
|
"eval_logps/rejected": -240.1473388671875, |
|
"eval_loss": 0.6427502036094666, |
|
"eval_rewards/accuracies": 0.6297681331634521, |
|
"eval_rewards/chosen": -0.48571139574050903, |
|
"eval_rewards/margins": 0.4220017194747925, |
|
"eval_rewards/rejected": -0.9077131152153015, |
|
"eval_runtime": 1686.025, |
|
"eval_samples_per_second": 3.172, |
|
"eval_steps_per_second": 3.172, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.737727910238429, |
|
"grad_norm": 5.611355304718018, |
|
"learning_rate": 8.682634730538922e-06, |
|
"logits/chosen": -3.347557306289673, |
|
"logits/rejected": -3.109966993331909, |
|
"logps/chosen": -275.6930236816406, |
|
"logps/rejected": -247.47406005859375, |
|
"loss": 0.4457, |
|
"rewards/accuracies": 0.8291666507720947, |
|
"rewards/chosen": -0.3010416030883789, |
|
"rewards/margins": 0.8664290308952332, |
|
"rewards/rejected": -1.1674706935882568, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.782608695652174, |
|
"grad_norm": 8.53209114074707, |
|
"learning_rate": 7.18562874251497e-06, |
|
"logits/chosen": -3.3400204181671143, |
|
"logits/rejected": -3.103865623474121, |
|
"logps/chosen": -285.2029724121094, |
|
"logps/rejected": -255.09078979492188, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.22924675047397614, |
|
"rewards/margins": 0.8349610567092896, |
|
"rewards/rejected": -1.0642077922821045, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.8274894810659186, |
|
"grad_norm": 7.011772155761719, |
|
"learning_rate": 5.688622754491018e-06, |
|
"logits/chosen": -3.3375208377838135, |
|
"logits/rejected": -3.0882365703582764, |
|
"logps/chosen": -269.7694091796875, |
|
"logps/rejected": -238.83042907714844, |
|
"loss": 0.4511, |
|
"rewards/accuracies": 0.8031250238418579, |
|
"rewards/chosen": -0.3288494944572449, |
|
"rewards/margins": 0.8713601231575012, |
|
"rewards/rejected": -1.2002094984054565, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.8424497428705005, |
|
"eval_logits/chosen": -3.3421826362609863, |
|
"eval_logits/rejected": -3.0923619270324707, |
|
"eval_logps/chosen": -277.226806640625, |
|
"eval_logps/rejected": -240.6798858642578, |
|
"eval_loss": 0.6432516574859619, |
|
"eval_rewards/accuracies": 0.6295811533927917, |
|
"eval_rewards/chosen": -0.5344744324684143, |
|
"eval_rewards/margins": 0.42649218440055847, |
|
"eval_rewards/rejected": -0.9609667062759399, |
|
"eval_runtime": 1686.3389, |
|
"eval_samples_per_second": 3.171, |
|
"eval_steps_per_second": 3.171, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.8723702664796633, |
|
"grad_norm": 7.099593162536621, |
|
"learning_rate": 4.191616766467066e-06, |
|
"logits/chosen": -3.359609365463257, |
|
"logits/rejected": -3.0945444107055664, |
|
"logps/chosen": -280.75030517578125, |
|
"logps/rejected": -245.13504028320312, |
|
"loss": 0.4418, |
|
"rewards/accuracies": 0.8197916746139526, |
|
"rewards/chosen": -0.30151474475860596, |
|
"rewards/margins": 0.8953721523284912, |
|
"rewards/rejected": -1.1968867778778076, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.917251051893408, |
|
"grad_norm": 7.788060188293457, |
|
"learning_rate": 2.6946107784431138e-06, |
|
"logits/chosen": -3.3403496742248535, |
|
"logits/rejected": -3.091184139251709, |
|
"logps/chosen": -280.9390869140625, |
|
"logps/rejected": -247.351806640625, |
|
"loss": 0.444, |
|
"rewards/accuracies": 0.8302083611488342, |
|
"rewards/chosen": -0.2591714859008789, |
|
"rewards/margins": 0.885311484336853, |
|
"rewards/rejected": -1.1444830894470215, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.962131837307153, |
|
"grad_norm": 7.973437786102295, |
|
"learning_rate": 1.1976047904191619e-06, |
|
"logits/chosen": -3.328507900238037, |
|
"logits/rejected": -3.088214635848999, |
|
"logps/chosen": -271.0534362792969, |
|
"logps/rejected": -242.70079040527344, |
|
"loss": 0.4531, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -0.357342392206192, |
|
"rewards/margins": 0.865265429019928, |
|
"rewards/rejected": -1.2226077318191528, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.992052360916316, |
|
"eval_logits/chosen": -3.342743158340454, |
|
"eval_logits/rejected": -3.0929155349731445, |
|
"eval_logps/chosen": -277.3058166503906, |
|
"eval_logps/rejected": -240.79270935058594, |
|
"eval_loss": 0.6429719924926758, |
|
"eval_rewards/accuracies": 0.6299551129341125, |
|
"eval_rewards/chosen": -0.5423800349235535, |
|
"eval_rewards/margins": 0.42986956238746643, |
|
"eval_rewards/rejected": -0.9722495079040527, |
|
"eval_runtime": 1684.4785, |
|
"eval_samples_per_second": 3.175, |
|
"eval_steps_per_second": 3.175, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 30, |
|
"max_steps": 2004, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|