|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00026171159382360636, |
|
"grad_norm": 2.427435874938965, |
|
"learning_rate": 1.3054830287206268e-08, |
|
"logits/chosen": -2.452890634536743, |
|
"logits/rejected": -2.3576245307922363, |
|
"logps/chosen": -290.49053955078125, |
|
"logps/rejected": -374.69940185546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0026171159382360636, |
|
"grad_norm": 2.4065892696380615, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": -2.280916452407837, |
|
"logits/rejected": -2.18080735206604, |
|
"logps/chosen": -279.5721435546875, |
|
"logps/rejected": -245.38124084472656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4236111044883728, |
|
"rewards/chosen": 0.0002959521661978215, |
|
"rewards/margins": 4.458064722712152e-05, |
|
"rewards/rejected": 0.0002513715880922973, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005234231876472127, |
|
"grad_norm": 2.543537139892578, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": -2.286400318145752, |
|
"logits/rejected": -2.1322734355926514, |
|
"logps/chosen": -305.47900390625, |
|
"logps/rejected": -237.6411895751953, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0033905524760484695, |
|
"rewards/margins": 0.0010894734878093004, |
|
"rewards/rejected": 0.002301078988239169, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007851347814708191, |
|
"grad_norm": 2.317607879638672, |
|
"learning_rate": 3.9164490861618804e-07, |
|
"logits/chosen": -2.2721304893493652, |
|
"logits/rejected": -2.2249627113342285, |
|
"logps/chosen": -251.0873565673828, |
|
"logps/rejected": -251.26864624023438, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.012264861725270748, |
|
"rewards/margins": 0.0016630779718980193, |
|
"rewards/rejected": 0.010601785033941269, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 1.9544142484664917, |
|
"learning_rate": 5.221932114882506e-07, |
|
"logits/chosen": -2.1681597232818604, |
|
"logits/rejected": -2.1325502395629883, |
|
"logps/chosen": -216.1050262451172, |
|
"logps/rejected": -221.6034698486328, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.019591109827160835, |
|
"rewards/margins": 0.00413005193695426, |
|
"rewards/rejected": 0.015461057424545288, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01308557969118032, |
|
"grad_norm": 2.0900888442993164, |
|
"learning_rate": 6.527415143603135e-07, |
|
"logits/chosen": -2.2135119438171387, |
|
"logits/rejected": -2.1745445728302, |
|
"logps/chosen": -266.76007080078125, |
|
"logps/rejected": -234.2284698486328, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.02980896458029747, |
|
"rewards/margins": 0.005023510195314884, |
|
"rewards/rejected": 0.02478545531630516, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015702695629416383, |
|
"grad_norm": 2.1390092372894287, |
|
"learning_rate": 7.832898172323761e-07, |
|
"logits/chosen": -2.1692872047424316, |
|
"logits/rejected": -2.1056342124938965, |
|
"logps/chosen": -252.186767578125, |
|
"logps/rejected": -226.5349884033203, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.03267771750688553, |
|
"rewards/margins": 0.0062465183436870575, |
|
"rewards/rejected": 0.026431197300553322, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.018319811567652448, |
|
"grad_norm": 2.0599091053009033, |
|
"learning_rate": 9.138381201044387e-07, |
|
"logits/chosen": -2.309943675994873, |
|
"logits/rejected": -2.187107563018799, |
|
"logps/chosen": -271.86541748046875, |
|
"logps/rejected": -246.50680541992188, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.04242750257253647, |
|
"rewards/margins": 0.011090461164712906, |
|
"rewards/rejected": 0.031337037682533264, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 2.3880298137664795, |
|
"learning_rate": 1.0443864229765013e-06, |
|
"logits/chosen": -2.2041609287261963, |
|
"logits/rejected": -2.1138315200805664, |
|
"logps/chosen": -257.4185485839844, |
|
"logps/rejected": -246.7639923095703, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.03884550929069519, |
|
"rewards/margins": 0.011877561919391155, |
|
"rewards/rejected": 0.02696794643998146, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.023554043444124574, |
|
"grad_norm": 2.3031389713287354, |
|
"learning_rate": 1.1749347258485642e-06, |
|
"logits/chosen": -2.208482265472412, |
|
"logits/rejected": -2.1343834400177, |
|
"logps/chosen": -249.96255493164062, |
|
"logps/rejected": -234.4242706298828, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.043103523552417755, |
|
"rewards/margins": 0.018813790753483772, |
|
"rewards/rejected": 0.024289730936288834, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"grad_norm": 2.119929075241089, |
|
"learning_rate": 1.305483028720627e-06, |
|
"logits/chosen": -2.2504467964172363, |
|
"logits/rejected": -2.178734540939331, |
|
"logps/chosen": -246.7833251953125, |
|
"logps/rejected": -230.8575897216797, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.04890027642250061, |
|
"rewards/margins": 0.025924110785126686, |
|
"rewards/rejected": 0.022976163774728775, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"eval_logits/chosen": -2.1481568813323975, |
|
"eval_logits/rejected": -2.055117607116699, |
|
"eval_logps/chosen": -259.46044921875, |
|
"eval_logps/rejected": -242.01309204101562, |
|
"eval_loss": 0.6808694005012512, |
|
"eval_rewards/accuracies": 0.6554999947547913, |
|
"eval_rewards/chosen": 0.05141494795680046, |
|
"eval_rewards/margins": 0.025842413306236267, |
|
"eval_rewards/rejected": 0.025572534650564194, |
|
"eval_runtime": 1599.8543, |
|
"eval_samples_per_second": 1.25, |
|
"eval_steps_per_second": 0.156, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.028788275320596704, |
|
"grad_norm": 2.4198131561279297, |
|
"learning_rate": 1.4360313315926894e-06, |
|
"logits/chosen": -2.2423720359802246, |
|
"logits/rejected": -2.1254634857177734, |
|
"logps/chosen": -284.2754821777344, |
|
"logps/rejected": -239.1751251220703, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.050642453134059906, |
|
"rewards/margins": 0.03400001674890518, |
|
"rewards/rejected": 0.016642430797219276, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 2.272566556930542, |
|
"learning_rate": 1.5665796344647521e-06, |
|
"logits/chosen": -2.273714303970337, |
|
"logits/rejected": -2.160338878631592, |
|
"logps/chosen": -287.36285400390625, |
|
"logps/rejected": -272.5426025390625, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.05583573505282402, |
|
"rewards/margins": 0.04974224418401718, |
|
"rewards/rejected": 0.006093493662774563, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03402250719706883, |
|
"grad_norm": 2.827535390853882, |
|
"learning_rate": 1.6971279373368146e-06, |
|
"logits/chosen": -2.2895429134368896, |
|
"logits/rejected": -2.1921463012695312, |
|
"logps/chosen": -250.36807250976562, |
|
"logps/rejected": -254.2834930419922, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.04658503085374832, |
|
"rewards/margins": 0.06275991350412369, |
|
"rewards/rejected": -0.01617487706243992, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.036639623135304895, |
|
"grad_norm": 2.8360142707824707, |
|
"learning_rate": 1.8276762402088774e-06, |
|
"logits/chosen": -2.285165309906006, |
|
"logits/rejected": -2.075157880783081, |
|
"logps/chosen": -272.5437927246094, |
|
"logps/rejected": -229.80880737304688, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.021640608087182045, |
|
"rewards/margins": 0.06897087395191193, |
|
"rewards/rejected": -0.047330256551504135, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03925673907354096, |
|
"grad_norm": 3.0254032611846924, |
|
"learning_rate": 1.9582245430809403e-06, |
|
"logits/chosen": -2.289304494857788, |
|
"logits/rejected": -2.16825532913208, |
|
"logps/chosen": -283.7846984863281, |
|
"logps/rejected": -248.1438446044922, |
|
"loss": 0.6606, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.0013313032686710358, |
|
"rewards/margins": 0.07430683076381683, |
|
"rewards/rejected": -0.0729755312204361, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 3.2089273929595947, |
|
"learning_rate": 2.0887728459530026e-06, |
|
"logits/chosen": -2.209859609603882, |
|
"logits/rejected": -2.1506431102752686, |
|
"logps/chosen": -262.52569580078125, |
|
"logps/rejected": -270.04766845703125, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.05757613852620125, |
|
"rewards/margins": 0.07334659993648529, |
|
"rewards/rejected": -0.13092274963855743, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04449097095001309, |
|
"grad_norm": 3.7007648944854736, |
|
"learning_rate": 2.2193211488250653e-06, |
|
"logits/chosen": -2.190873384475708, |
|
"logits/rejected": -2.1105270385742188, |
|
"logps/chosen": -227.3632049560547, |
|
"logps/rejected": -236.8821563720703, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0691681057214737, |
|
"rewards/margins": 0.0719941109418869, |
|
"rewards/rejected": -0.1411622166633606, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04710808688824915, |
|
"grad_norm": 5.2089338302612305, |
|
"learning_rate": 2.3498694516971284e-06, |
|
"logits/chosen": -2.2085936069488525, |
|
"logits/rejected": -2.1274473667144775, |
|
"logps/chosen": -273.2780456542969, |
|
"logps/rejected": -269.22747802734375, |
|
"loss": 0.6559, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.17684438824653625, |
|
"rewards/margins": 0.0919983834028244, |
|
"rewards/rejected": -0.26884278655052185, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04972520282648522, |
|
"grad_norm": 4.031327724456787, |
|
"learning_rate": 2.4804177545691907e-06, |
|
"logits/chosen": -2.2836358547210693, |
|
"logits/rejected": -2.1660006046295166, |
|
"logps/chosen": -281.2835388183594, |
|
"logps/rejected": -266.60821533203125, |
|
"loss": 0.637, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.11275825649499893, |
|
"rewards/margins": 0.13688938319683075, |
|
"rewards/rejected": -0.24964764714241028, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 6.425544261932373, |
|
"learning_rate": 2.610966057441254e-06, |
|
"logits/chosen": -2.2068774700164795, |
|
"logits/rejected": -2.0849859714508057, |
|
"logps/chosen": -266.81500244140625, |
|
"logps/rejected": -241.287353515625, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.1718008667230606, |
|
"rewards/margins": 0.12619325518608093, |
|
"rewards/rejected": -0.29799407720565796, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"eval_logits/chosen": -2.0999979972839355, |
|
"eval_logits/rejected": -2.011294364929199, |
|
"eval_logps/chosen": -283.4154357910156, |
|
"eval_logps/rejected": -278.46148681640625, |
|
"eval_loss": 0.6356053948402405, |
|
"eval_rewards/accuracies": 0.6759999990463257, |
|
"eval_rewards/chosen": -0.18813487887382507, |
|
"eval_rewards/margins": 0.15077635645866394, |
|
"eval_rewards/rejected": -0.3389112055301666, |
|
"eval_runtime": 1598.5625, |
|
"eval_samples_per_second": 1.251, |
|
"eval_steps_per_second": 0.156, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05495943470295734, |
|
"grad_norm": 4.150570869445801, |
|
"learning_rate": 2.741514360313316e-06, |
|
"logits/chosen": -2.255913734436035, |
|
"logits/rejected": -2.120842456817627, |
|
"logps/chosen": -280.205810546875, |
|
"logps/rejected": -268.5466613769531, |
|
"loss": 0.6131, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.18164019286632538, |
|
"rewards/margins": 0.19993841648101807, |
|
"rewards/rejected": -0.38157862424850464, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05757655064119341, |
|
"grad_norm": 4.034811496734619, |
|
"learning_rate": 2.872062663185379e-06, |
|
"logits/chosen": -2.1413655281066895, |
|
"logits/rejected": -2.093209743499756, |
|
"logps/chosen": -298.56549072265625, |
|
"logps/rejected": -289.4757995605469, |
|
"loss": 0.6254, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.4465310573577881, |
|
"rewards/margins": 0.17338070273399353, |
|
"rewards/rejected": -0.619911789894104, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06019366657942947, |
|
"grad_norm": 6.063634395599365, |
|
"learning_rate": 3.0026109660574416e-06, |
|
"logits/chosen": -2.266742706298828, |
|
"logits/rejected": -2.164170742034912, |
|
"logps/chosen": -352.3594665527344, |
|
"logps/rejected": -333.99053955078125, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.46258726716041565, |
|
"rewards/margins": 0.17155149579048157, |
|
"rewards/rejected": -0.6341387033462524, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 5.352989673614502, |
|
"learning_rate": 3.1331592689295043e-06, |
|
"logits/chosen": -2.136970281600952, |
|
"logits/rejected": -2.0645029544830322, |
|
"logps/chosen": -342.98870849609375, |
|
"logps/rejected": -350.2208251953125, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.4678193926811218, |
|
"rewards/margins": 0.2228115350008011, |
|
"rewards/rejected": -0.6906309127807617, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06542789845590159, |
|
"grad_norm": 6.087672233581543, |
|
"learning_rate": 3.263707571801567e-06, |
|
"logits/chosen": -2.162543296813965, |
|
"logits/rejected": -2.1394383907318115, |
|
"logps/chosen": -298.310302734375, |
|
"logps/rejected": -299.23260498046875, |
|
"loss": 0.6014, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3882629871368408, |
|
"rewards/margins": 0.26374003291130066, |
|
"rewards/rejected": -0.6520029902458191, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06804501439413765, |
|
"grad_norm": 9.075284004211426, |
|
"learning_rate": 3.3942558746736293e-06, |
|
"logits/chosen": -2.2069649696350098, |
|
"logits/rejected": -2.0832362174987793, |
|
"logps/chosen": -315.36358642578125, |
|
"logps/rejected": -315.8193664550781, |
|
"loss": 0.6231, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.43095770478248596, |
|
"rewards/margins": 0.2242399901151657, |
|
"rewards/rejected": -0.6551976203918457, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07066213033237373, |
|
"grad_norm": 4.0352067947387695, |
|
"learning_rate": 3.524804177545692e-06, |
|
"logits/chosen": -2.1507174968719482, |
|
"logits/rejected": -2.084745407104492, |
|
"logps/chosen": -323.89361572265625, |
|
"logps/rejected": -321.73907470703125, |
|
"loss": 0.5962, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6326006650924683, |
|
"rewards/margins": 0.293195515871048, |
|
"rewards/rejected": -0.9257962107658386, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 6.033257007598877, |
|
"learning_rate": 3.6553524804177547e-06, |
|
"logits/chosen": -2.17421293258667, |
|
"logits/rejected": -2.054452896118164, |
|
"logps/chosen": -340.52557373046875, |
|
"logps/rejected": -340.84259033203125, |
|
"loss": 0.627, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8170977830886841, |
|
"rewards/margins": 0.2173783779144287, |
|
"rewards/rejected": -1.0344761610031128, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07589636220884585, |
|
"grad_norm": 6.212845325469971, |
|
"learning_rate": 3.7859007832898174e-06, |
|
"logits/chosen": -2.1532936096191406, |
|
"logits/rejected": -2.0849764347076416, |
|
"logps/chosen": -353.55975341796875, |
|
"logps/rejected": -354.45782470703125, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6797887086868286, |
|
"rewards/margins": 0.26041343808174133, |
|
"rewards/rejected": -0.9402019381523132, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"grad_norm": 9.043365478515625, |
|
"learning_rate": 3.9164490861618806e-06, |
|
"logits/chosen": -2.1526737213134766, |
|
"logits/rejected": -2.0208210945129395, |
|
"logps/chosen": -306.4209899902344, |
|
"logps/rejected": -320.6431884765625, |
|
"loss": 0.6073, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6231580972671509, |
|
"rewards/margins": 0.2800864577293396, |
|
"rewards/rejected": -0.9032446146011353, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"eval_logits/chosen": -2.0781641006469727, |
|
"eval_logits/rejected": -1.9948630332946777, |
|
"eval_logps/chosen": -333.25830078125, |
|
"eval_logps/rejected": -342.0091247558594, |
|
"eval_loss": 0.6053693890571594, |
|
"eval_rewards/accuracies": 0.6815000176429749, |
|
"eval_rewards/chosen": -0.6865635514259338, |
|
"eval_rewards/margins": 0.2878238558769226, |
|
"eval_rewards/rejected": -0.9743873476982117, |
|
"eval_runtime": 1598.3515, |
|
"eval_samples_per_second": 1.251, |
|
"eval_steps_per_second": 0.156, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08113059408531798, |
|
"grad_norm": 8.154093742370605, |
|
"learning_rate": 4.046997389033943e-06, |
|
"logits/chosen": -2.2768380641937256, |
|
"logits/rejected": -2.16288161277771, |
|
"logps/chosen": -355.626953125, |
|
"logps/rejected": -341.3152770996094, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6677303910255432, |
|
"rewards/margins": 0.39315730333328247, |
|
"rewards/rejected": -1.0608876943588257, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 12.870465278625488, |
|
"learning_rate": 4.177545691906005e-06, |
|
"logits/chosen": -2.2009758949279785, |
|
"logits/rejected": -2.101364850997925, |
|
"logps/chosen": -330.45904541015625, |
|
"logps/rejected": -343.94305419921875, |
|
"loss": 0.6039, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7225368022918701, |
|
"rewards/margins": 0.30548617243766785, |
|
"rewards/rejected": -1.0280230045318604, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08636482596179011, |
|
"grad_norm": 5.590673923492432, |
|
"learning_rate": 4.308093994778068e-06, |
|
"logits/chosen": -2.086026668548584, |
|
"logits/rejected": -2.0480690002441406, |
|
"logps/chosen": -313.5097351074219, |
|
"logps/rejected": -321.5640869140625, |
|
"loss": 0.5895, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5396815538406372, |
|
"rewards/margins": 0.322670042514801, |
|
"rewards/rejected": -0.8623515963554382, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08898194190002617, |
|
"grad_norm": 7.664637088775635, |
|
"learning_rate": 4.4386422976501306e-06, |
|
"logits/chosen": -2.1061110496520996, |
|
"logits/rejected": -2.049543857574463, |
|
"logps/chosen": -358.71868896484375, |
|
"logps/rejected": -383.11328125, |
|
"loss": 0.5694, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6914780735969543, |
|
"rewards/margins": 0.45771294832229614, |
|
"rewards/rejected": -1.1491910219192505, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09159905783826224, |
|
"grad_norm": 10.34107780456543, |
|
"learning_rate": 4.569190600522193e-06, |
|
"logits/chosen": -1.9783916473388672, |
|
"logits/rejected": -1.901391625404358, |
|
"logps/chosen": -401.55120849609375, |
|
"logps/rejected": -426.84832763671875, |
|
"loss": 0.6179, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1243749856948853, |
|
"rewards/margins": 0.3678717613220215, |
|
"rewards/rejected": -1.4922468662261963, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 6.533565044403076, |
|
"learning_rate": 4.699738903394257e-06, |
|
"logits/chosen": -1.9793760776519775, |
|
"logits/rejected": -1.922286033630371, |
|
"logps/chosen": -367.68817138671875, |
|
"logps/rejected": -385.93798828125, |
|
"loss": 0.5861, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2674810886383057, |
|
"rewards/margins": 0.39134687185287476, |
|
"rewards/rejected": -1.6588280200958252, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09683328971473436, |
|
"grad_norm": 9.993318557739258, |
|
"learning_rate": 4.8302872062663196e-06, |
|
"logits/chosen": -1.9993594884872437, |
|
"logits/rejected": -1.8775148391723633, |
|
"logps/chosen": -397.10125732421875, |
|
"logps/rejected": -394.6053771972656, |
|
"loss": 0.5852, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2026114463806152, |
|
"rewards/margins": 0.4132401943206787, |
|
"rewards/rejected": -1.615851640701294, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09945040565297043, |
|
"grad_norm": 8.581938743591309, |
|
"learning_rate": 4.9608355091383814e-06, |
|
"logits/chosen": -1.9525811672210693, |
|
"logits/rejected": -1.7932708263397217, |
|
"logps/chosen": -388.2157287597656, |
|
"logps/rejected": -399.0534973144531, |
|
"loss": 0.5682, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9025002717971802, |
|
"rewards/margins": 0.44248518347740173, |
|
"rewards/rejected": -1.3449854850769043, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1020675215912065, |
|
"grad_norm": 17.113487243652344, |
|
"learning_rate": 4.9999488562447675e-06, |
|
"logits/chosen": -1.920275330543518, |
|
"logits/rejected": -1.8368419408798218, |
|
"logps/chosen": -342.7761535644531, |
|
"logps/rejected": -365.7699279785156, |
|
"loss": 0.5702, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6181488633155823, |
|
"rewards/margins": 0.4308691620826721, |
|
"rewards/rejected": -1.0490180253982544, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 18.049278259277344, |
|
"learning_rate": 4.999698361256577e-06, |
|
"logits/chosen": -1.844530463218689, |
|
"logits/rejected": -1.724854826927185, |
|
"logps/chosen": -372.54058837890625, |
|
"logps/rejected": -370.39080810546875, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0718820095062256, |
|
"rewards/margins": 0.3849504590034485, |
|
"rewards/rejected": -1.4568325281143188, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"eval_logits/chosen": -1.6757981777191162, |
|
"eval_logits/rejected": -1.5843830108642578, |
|
"eval_logps/chosen": -409.4522399902344, |
|
"eval_logps/rejected": -440.5653381347656, |
|
"eval_loss": 0.5824012160301208, |
|
"eval_rewards/accuracies": 0.6830000281333923, |
|
"eval_rewards/chosen": -1.448502540588379, |
|
"eval_rewards/margins": 0.5114473700523376, |
|
"eval_rewards/rejected": -1.9599499702453613, |
|
"eval_runtime": 1596.5137, |
|
"eval_samples_per_second": 1.253, |
|
"eval_steps_per_second": 0.157, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10730175346767862, |
|
"grad_norm": 7.473143577575684, |
|
"learning_rate": 4.999239142174581e-06, |
|
"logits/chosen": -1.7772619724273682, |
|
"logits/rejected": -1.71932053565979, |
|
"logps/chosen": -362.2901306152344, |
|
"logps/rejected": -392.97271728515625, |
|
"loss": 0.6257, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2004709243774414, |
|
"rewards/margins": 0.3244817852973938, |
|
"rewards/rejected": -1.5249526500701904, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10991886940591468, |
|
"grad_norm": 5.672206401824951, |
|
"learning_rate": 4.99857123734344e-06, |
|
"logits/chosen": -1.798180341720581, |
|
"logits/rejected": -1.6758928298950195, |
|
"logps/chosen": -342.5751037597656, |
|
"logps/rejected": -385.69757080078125, |
|
"loss": 0.5303, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.1391090154647827, |
|
"rewards/margins": 0.5212605595588684, |
|
"rewards/rejected": -1.660369634628296, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11253598534415074, |
|
"grad_norm": 7.920849800109863, |
|
"learning_rate": 4.997694702533016e-06, |
|
"logits/chosen": -1.839582085609436, |
|
"logits/rejected": -1.7662830352783203, |
|
"logps/chosen": -396.0287170410156, |
|
"logps/rejected": -421.93707275390625, |
|
"loss": 0.542, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1852526664733887, |
|
"rewards/margins": 0.49562257528305054, |
|
"rewards/rejected": -1.6808754205703735, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 9.021227836608887, |
|
"learning_rate": 4.996609610933713e-06, |
|
"logits/chosen": -1.9095804691314697, |
|
"logits/rejected": -1.8493105173110962, |
|
"logps/chosen": -372.8834533691406, |
|
"logps/rejected": -393.52532958984375, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0081276893615723, |
|
"rewards/margins": 0.4901418089866638, |
|
"rewards/rejected": -1.4982694387435913, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11777021722062288, |
|
"grad_norm": 8.462127685546875, |
|
"learning_rate": 4.995316053150366e-06, |
|
"logits/chosen": -1.7473382949829102, |
|
"logits/rejected": -1.6900889873504639, |
|
"logps/chosen": -388.23175048828125, |
|
"logps/rejected": -426.498046875, |
|
"loss": 0.5359, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.2048237323760986, |
|
"rewards/margins": 0.612975537776947, |
|
"rewards/rejected": -1.8177993297576904, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12038733315885894, |
|
"grad_norm": 12.204924583435059, |
|
"learning_rate": 4.9938141371946815e-06, |
|
"logits/chosen": -1.7278436422348022, |
|
"logits/rejected": -1.6576976776123047, |
|
"logps/chosen": -472.71600341796875, |
|
"logps/rejected": -530.8753662109375, |
|
"loss": 0.5586, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.0762267112731934, |
|
"rewards/margins": 0.7278280258178711, |
|
"rewards/rejected": -2.8040547370910645, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.123004449097095, |
|
"grad_norm": 6.473967552185059, |
|
"learning_rate": 4.992103988476206e-06, |
|
"logits/chosen": -1.8171007633209229, |
|
"logits/rejected": -1.712386131286621, |
|
"logps/chosen": -393.3753662109375, |
|
"logps/rejected": -441.9925231933594, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4961185455322266, |
|
"rewards/margins": 0.6115677356719971, |
|
"rewards/rejected": -2.1076862812042236, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 5.5269598960876465, |
|
"learning_rate": 4.990185749791866e-06, |
|
"logits/chosen": -1.9438987970352173, |
|
"logits/rejected": -1.8433564901351929, |
|
"logps/chosen": -341.74261474609375, |
|
"logps/rejected": -403.5177917480469, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8496583104133606, |
|
"rewards/margins": 0.5994860529899597, |
|
"rewards/rejected": -1.4491443634033203, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12823868097356714, |
|
"grad_norm": 9.541817665100098, |
|
"learning_rate": 4.9880595813140395e-06, |
|
"logits/chosen": -1.9532935619354248, |
|
"logits/rejected": -1.8457939624786377, |
|
"logps/chosen": -374.6437072753906, |
|
"logps/rejected": -403.85614013671875, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8984916806221008, |
|
"rewards/margins": 0.5953295826911926, |
|
"rewards/rejected": -1.493821144104004, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"grad_norm": 13.012099266052246, |
|
"learning_rate": 4.985725660577184e-06, |
|
"logits/chosen": -1.8454961776733398, |
|
"logits/rejected": -1.7134668827056885, |
|
"logps/chosen": -394.9266052246094, |
|
"logps/rejected": -415.48138427734375, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2125742435455322, |
|
"rewards/margins": 0.6179044842720032, |
|
"rewards/rejected": -1.8304786682128906, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"eval_logits/chosen": -1.6658297777175903, |
|
"eval_logits/rejected": -1.56244695186615, |
|
"eval_logps/chosen": -379.18035888671875, |
|
"eval_logps/rejected": -420.46356201171875, |
|
"eval_loss": 0.5725830793380737, |
|
"eval_rewards/accuracies": 0.6915000081062317, |
|
"eval_rewards/chosen": -1.1457839012145996, |
|
"eval_rewards/margins": 0.6131481528282166, |
|
"eval_rewards/rejected": -1.758932113647461, |
|
"eval_runtime": 1597.6305, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.156, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13347291285003926, |
|
"grad_norm": 9.327789306640625, |
|
"learning_rate": 4.983184182463009e-06, |
|
"logits/chosen": -1.7511212825775146, |
|
"logits/rejected": -1.6526283025741577, |
|
"logps/chosen": -403.2107238769531, |
|
"logps/rejected": -449.2831115722656, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2511955499649048, |
|
"rewards/margins": 0.830196738243103, |
|
"rewards/rejected": -2.0813920497894287, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 20.807098388671875, |
|
"learning_rate": 4.980435359184203e-06, |
|
"logits/chosen": -1.7725025415420532, |
|
"logits/rejected": -1.7354393005371094, |
|
"logps/chosen": -416.3721618652344, |
|
"logps/rejected": -457.4166564941406, |
|
"loss": 0.5914, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4335078001022339, |
|
"rewards/margins": 0.5734153985977173, |
|
"rewards/rejected": -2.006923198699951, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13870714472651138, |
|
"grad_norm": 9.319575309753418, |
|
"learning_rate": 4.9774794202667236e-06, |
|
"logits/chosen": -1.8060506582260132, |
|
"logits/rejected": -1.7954254150390625, |
|
"logps/chosen": -347.8722229003906, |
|
"logps/rejected": -408.69842529296875, |
|
"loss": 0.568, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8692201375961304, |
|
"rewards/margins": 0.5232836008071899, |
|
"rewards/rejected": -1.3925037384033203, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14132426066474746, |
|
"grad_norm": 18.703466415405273, |
|
"learning_rate": 4.974316612530615e-06, |
|
"logits/chosen": -1.718746542930603, |
|
"logits/rejected": -1.6051177978515625, |
|
"logps/chosen": -397.3692932128906, |
|
"logps/rejected": -437.828125, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.1902121305465698, |
|
"rewards/margins": 0.8715072870254517, |
|
"rewards/rejected": -2.0617194175720215, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1439413766029835, |
|
"grad_norm": 13.324700355529785, |
|
"learning_rate": 4.970947200069416e-06, |
|
"logits/chosen": -1.5196049213409424, |
|
"logits/rejected": -1.463122010231018, |
|
"logps/chosen": -523.3265380859375, |
|
"logps/rejected": -561.0288696289062, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.449286937713623, |
|
"rewards/margins": 0.5750513076782227, |
|
"rewards/rejected": -3.0243382453918457, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 7.8647990226745605, |
|
"learning_rate": 4.967371464228096e-06, |
|
"logits/chosen": -1.6284644603729248, |
|
"logits/rejected": -1.54505455493927, |
|
"logps/chosen": -521.0708618164062, |
|
"logps/rejected": -584.5469970703125, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.636112689971924, |
|
"rewards/margins": 0.6315088868141174, |
|
"rewards/rejected": -3.2676215171813965, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.14917560847945563, |
|
"grad_norm": 8.891359329223633, |
|
"learning_rate": 4.963589703579569e-06, |
|
"logits/chosen": -1.7360155582427979, |
|
"logits/rejected": -1.6161645650863647, |
|
"logps/chosen": -575.2686767578125, |
|
"logps/rejected": -591.63671875, |
|
"loss": 0.6015, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.752664566040039, |
|
"rewards/margins": 0.5114163160324097, |
|
"rewards/rejected": -3.264080762863159, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1517927244176917, |
|
"grad_norm": 13.137563705444336, |
|
"learning_rate": 4.9596022338997615e-06, |
|
"logits/chosen": -1.8721704483032227, |
|
"logits/rejected": -1.7023900747299194, |
|
"logps/chosen": -481.35992431640625, |
|
"logps/rejected": -496.80792236328125, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8363927602767944, |
|
"rewards/margins": 0.565157949924469, |
|
"rewards/rejected": -2.401550769805908, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15440984035592778, |
|
"grad_norm": 9.32674789428711, |
|
"learning_rate": 4.955409388141243e-06, |
|
"logits/chosen": -1.9033949375152588, |
|
"logits/rejected": -1.81440007686615, |
|
"logps/chosen": -351.5168762207031, |
|
"logps/rejected": -374.45159912109375, |
|
"loss": 0.5774, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9435514211654663, |
|
"rewards/margins": 0.46548739075660706, |
|
"rewards/rejected": -1.4090386629104614, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 8.5282621383667, |
|
"learning_rate": 4.951011516405429e-06, |
|
"logits/chosen": -1.9967750310897827, |
|
"logits/rejected": -1.959011435508728, |
|
"logps/chosen": -324.1109924316406, |
|
"logps/rejected": -367.7194519042969, |
|
"loss": 0.5373, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7343847155570984, |
|
"rewards/margins": 0.5855700373649597, |
|
"rewards/rejected": -1.319954752922058, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"eval_logits/chosen": -1.7954951524734497, |
|
"eval_logits/rejected": -1.6945267915725708, |
|
"eval_logps/chosen": -377.46051025390625, |
|
"eval_logps/rejected": -426.21209716796875, |
|
"eval_loss": 0.5631101727485657, |
|
"eval_rewards/accuracies": 0.703000009059906, |
|
"eval_rewards/chosen": -1.1285854578018188, |
|
"eval_rewards/margins": 0.6878318190574646, |
|
"eval_rewards/rejected": -1.8164173364639282, |
|
"eval_runtime": 1597.4817, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.156, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1596440722323999, |
|
"grad_norm": 8.468684196472168, |
|
"learning_rate": 4.946408985913344e-06, |
|
"logits/chosen": -1.8055137395858765, |
|
"logits/rejected": -1.7273778915405273, |
|
"logps/chosen": -379.7837829589844, |
|
"logps/rejected": -452.4205627441406, |
|
"loss": 0.5374, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.3405410051345825, |
|
"rewards/margins": 0.9033535122871399, |
|
"rewards/rejected": -2.243894577026367, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16226118817063595, |
|
"grad_norm": 8.468870162963867, |
|
"learning_rate": 4.941602180974958e-06, |
|
"logits/chosen": -1.712264060974121, |
|
"logits/rejected": -1.5265555381774902, |
|
"logps/chosen": -442.0708923339844, |
|
"logps/rejected": -476.46112060546875, |
|
"loss": 0.5459, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.5436513423919678, |
|
"rewards/margins": 0.9296489953994751, |
|
"rewards/rejected": -2.4733004570007324, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16487830410887203, |
|
"grad_norm": 8.517386436462402, |
|
"learning_rate": 4.936591502957101e-06, |
|
"logits/chosen": -1.650029182434082, |
|
"logits/rejected": -1.5338895320892334, |
|
"logps/chosen": -383.0647888183594, |
|
"logps/rejected": -462.663330078125, |
|
"loss": 0.5098, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3505860567092896, |
|
"rewards/margins": 0.8982070684432983, |
|
"rewards/rejected": -2.248793125152588, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 7.095081329345703, |
|
"learning_rate": 4.931377370249946e-06, |
|
"logits/chosen": -1.6547809839248657, |
|
"logits/rejected": -1.4826760292053223, |
|
"logps/chosen": -434.3949279785156, |
|
"logps/rejected": -483.05657958984375, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.691422462463379, |
|
"rewards/margins": 0.7445409297943115, |
|
"rewards/rejected": -2.4359633922576904, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17011253598534415, |
|
"grad_norm": 16.53241539001465, |
|
"learning_rate": 4.925960218232073e-06, |
|
"logits/chosen": -1.586260199546814, |
|
"logits/rejected": -1.4878358840942383, |
|
"logps/chosen": -451.49871826171875, |
|
"logps/rejected": -535.3369750976562, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9628738164901733, |
|
"rewards/margins": 0.9344732165336609, |
|
"rewards/rejected": -2.8973469734191895, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17272965192358022, |
|
"grad_norm": 12.95315170288086, |
|
"learning_rate": 4.920340499234116e-06, |
|
"logits/chosen": -1.5459370613098145, |
|
"logits/rejected": -1.3826103210449219, |
|
"logps/chosen": -457.12640380859375, |
|
"logps/rejected": -501.2940368652344, |
|
"loss": 0.5561, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.9127668142318726, |
|
"rewards/margins": 0.7934447526931763, |
|
"rewards/rejected": -2.706211566925049, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.17534676786181627, |
|
"grad_norm": 13.084680557250977, |
|
"learning_rate": 4.914518682500995e-06, |
|
"logits/chosen": -1.6595103740692139, |
|
"logits/rejected": -1.5149381160736084, |
|
"logps/chosen": -505.25360107421875, |
|
"logps/rejected": -557.6201782226562, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.2182066440582275, |
|
"rewards/margins": 0.930493950843811, |
|
"rewards/rejected": -3.1487009525299072, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 9.387535095214844, |
|
"learning_rate": 4.9084952541527315e-06, |
|
"logits/chosen": -1.5597246885299683, |
|
"logits/rejected": -1.4202911853790283, |
|
"logps/chosen": -468.02777099609375, |
|
"logps/rejected": -508.26483154296875, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9475713968276978, |
|
"rewards/margins": 0.8734270334243774, |
|
"rewards/rejected": -2.820998430252075, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1805809997382884, |
|
"grad_norm": 10.536702156066895, |
|
"learning_rate": 4.902270717143858e-06, |
|
"logits/chosen": -1.6048400402069092, |
|
"logits/rejected": -1.5331923961639404, |
|
"logps/chosen": -395.47686767578125, |
|
"logps/rejected": -517.7385864257812, |
|
"loss": 0.4506, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5932337045669556, |
|
"rewards/margins": 1.1062676906585693, |
|
"rewards/rejected": -2.6995015144348145, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"grad_norm": 6.061295509338379, |
|
"learning_rate": 4.895845591221427e-06, |
|
"logits/chosen": -1.5165598392486572, |
|
"logits/rejected": -1.4684141874313354, |
|
"logps/chosen": -450.1396484375, |
|
"logps/rejected": -525.9497680664062, |
|
"loss": 0.5394, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9692981243133545, |
|
"rewards/margins": 0.802649199962616, |
|
"rewards/rejected": -2.7719473838806152, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"eval_logits/chosen": -1.271895408630371, |
|
"eval_logits/rejected": -1.1628035306930542, |
|
"eval_logps/chosen": -491.6012268066406, |
|
"eval_logps/rejected": -551.1991577148438, |
|
"eval_loss": 0.5473812222480774, |
|
"eval_rewards/accuracies": 0.7039999961853027, |
|
"eval_rewards/chosen": -2.2699923515319824, |
|
"eval_rewards/margins": 0.7962960004806519, |
|
"eval_rewards/rejected": -3.066288471221924, |
|
"eval_runtime": 1597.7171, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.156, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18581523161476055, |
|
"grad_norm": 13.924752235412598, |
|
"learning_rate": 4.8892204128816e-06, |
|
"logits/chosen": -1.3581098318099976, |
|
"logits/rejected": -1.2599608898162842, |
|
"logps/chosen": -515.8197631835938, |
|
"logps/rejected": -587.2666625976562, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.4833767414093018, |
|
"rewards/margins": 0.8504128456115723, |
|
"rewards/rejected": -3.333789348602295, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 9.558119773864746, |
|
"learning_rate": 4.882395735324864e-06, |
|
"logits/chosen": -1.140836477279663, |
|
"logits/rejected": -0.9971574544906616, |
|
"logps/chosen": -565.4805908203125, |
|
"logps/rejected": -648.9151611328125, |
|
"loss": 0.4868, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.9943554401397705, |
|
"rewards/margins": 1.006219744682312, |
|
"rewards/rejected": -4.000575065612793, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19104946349123267, |
|
"grad_norm": 16.967636108398438, |
|
"learning_rate": 4.87537212840983e-06, |
|
"logits/chosen": -1.0284086465835571, |
|
"logits/rejected": -0.9112738370895386, |
|
"logps/chosen": -624.4201049804688, |
|
"logps/rejected": -673.4655151367188, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.582063674926758, |
|
"rewards/margins": 0.7967410087585449, |
|
"rewards/rejected": -4.378804683685303, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19366657942946872, |
|
"grad_norm": 17.04477310180664, |
|
"learning_rate": 4.8681501786056545e-06, |
|
"logits/chosen": -1.1650705337524414, |
|
"logits/rejected": -1.0339478254318237, |
|
"logps/chosen": -438.68585205078125, |
|
"logps/rejected": -490.553955078125, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1054441928863525, |
|
"rewards/margins": 0.8412445187568665, |
|
"rewards/rejected": -2.946688652038574, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1962836953677048, |
|
"grad_norm": 20.108728408813477, |
|
"learning_rate": 4.860730488943068e-06, |
|
"logits/chosen": -1.2052314281463623, |
|
"logits/rejected": -1.157947301864624, |
|
"logps/chosen": -439.97589111328125, |
|
"logps/rejected": -518.9736328125, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.0552916526794434, |
|
"rewards/margins": 0.809428870677948, |
|
"rewards/rejected": -2.864720106124878, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 6.034134387969971, |
|
"learning_rate": 4.853113678964022e-06, |
|
"logits/chosen": -1.3641645908355713, |
|
"logits/rejected": -1.3035409450531006, |
|
"logps/chosen": -421.71343994140625, |
|
"logps/rejected": -494.55810546875, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4936286211013794, |
|
"rewards/margins": 0.8079965710639954, |
|
"rewards/rejected": -2.3016250133514404, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.20151792724417691, |
|
"grad_norm": 5.774670124053955, |
|
"learning_rate": 4.845300384669958e-06, |
|
"logits/chosen": -1.4722392559051514, |
|
"logits/rejected": -1.3768599033355713, |
|
"logps/chosen": -374.771484375, |
|
"logps/rejected": -418.16656494140625, |
|
"loss": 0.5331, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2165186405181885, |
|
"rewards/margins": 0.6629087328910828, |
|
"rewards/rejected": -1.8794273138046265, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.204135043182413, |
|
"grad_norm": 8.666418075561523, |
|
"learning_rate": 4.837291258468701e-06, |
|
"logits/chosen": -1.63046395778656, |
|
"logits/rejected": -1.5205990076065063, |
|
"logps/chosen": -421.2227478027344, |
|
"logps/rejected": -473.2460021972656, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3603990077972412, |
|
"rewards/margins": 0.7691534161567688, |
|
"rewards/rejected": -2.1295523643493652, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.20675215912064904, |
|
"grad_norm": 15.76352310180664, |
|
"learning_rate": 4.829086969119984e-06, |
|
"logits/chosen": -1.4890462160110474, |
|
"logits/rejected": -1.4975069761276245, |
|
"logps/chosen": -429.3702087402344, |
|
"logps/rejected": -498.40496826171875, |
|
"loss": 0.5898, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.7876300811767578, |
|
"rewards/margins": 0.6727088093757629, |
|
"rewards/rejected": -2.460339069366455, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 14.872965812683105, |
|
"learning_rate": 4.820688201679605e-06, |
|
"logits/chosen": -1.6982934474945068, |
|
"logits/rejected": -1.4959887266159058, |
|
"logps/chosen": -433.38470458984375, |
|
"logps/rejected": -451.4923400878906, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.708482027053833, |
|
"rewards/margins": 0.8051029443740845, |
|
"rewards/rejected": -2.513584852218628, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": -1.599565863609314, |
|
"eval_logits/rejected": -1.510375738143921, |
|
"eval_logps/chosen": -420.76544189453125, |
|
"eval_logps/rejected": -474.2269287109375, |
|
"eval_loss": 0.5322815179824829, |
|
"eval_rewards/accuracies": 0.7225000262260437, |
|
"eval_rewards/chosen": -1.5616350173950195, |
|
"eval_rewards/margins": 0.7349306344985962, |
|
"eval_rewards/rejected": -2.2965660095214844, |
|
"eval_runtime": 1595.9174, |
|
"eval_samples_per_second": 1.253, |
|
"eval_steps_per_second": 0.157, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21198639099712116, |
|
"grad_norm": 6.158263683319092, |
|
"learning_rate": 4.8120956574422315e-06, |
|
"logits/chosen": -1.8132537603378296, |
|
"logits/rejected": -1.8140255212783813, |
|
"logps/chosen": -400.74359130859375, |
|
"logps/rejected": -452.96392822265625, |
|
"loss": 0.58, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2691619396209717, |
|
"rewards/margins": 0.5926799178123474, |
|
"rewards/rejected": -1.8618419170379639, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21460350693535724, |
|
"grad_norm": 14.991472244262695, |
|
"learning_rate": 4.803310053882831e-06, |
|
"logits/chosen": -1.883763313293457, |
|
"logits/rejected": -1.9024746417999268, |
|
"logps/chosen": -327.5868225097656, |
|
"logps/rejected": -404.1509704589844, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9834963083267212, |
|
"rewards/margins": 0.6419707536697388, |
|
"rewards/rejected": -1.6254669427871704, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2172206228735933, |
|
"grad_norm": 10.492687225341797, |
|
"learning_rate": 4.794332124596775e-06, |
|
"logits/chosen": -1.8571357727050781, |
|
"logits/rejected": -1.813534140586853, |
|
"logps/chosen": -396.38372802734375, |
|
"logps/rejected": -451.81634521484375, |
|
"loss": 0.5868, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2799537181854248, |
|
"rewards/margins": 0.5913030505180359, |
|
"rewards/rejected": -1.8712568283081055, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 10.114534378051758, |
|
"learning_rate": 4.785162619238575e-06, |
|
"logits/chosen": -1.8008487224578857, |
|
"logits/rejected": -1.698720932006836, |
|
"logps/chosen": -378.2573547363281, |
|
"logps/rejected": -424.20782470703125, |
|
"loss": 0.5402, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.244909405708313, |
|
"rewards/margins": 0.7323002815246582, |
|
"rewards/rejected": -1.9772096872329712, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22245485475006543, |
|
"grad_norm": 7.237858295440674, |
|
"learning_rate": 4.775802303459288e-06, |
|
"logits/chosen": -1.6909987926483154, |
|
"logits/rejected": -1.64451003074646, |
|
"logps/chosen": -384.024169921875, |
|
"logps/rejected": -455.19024658203125, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3631868362426758, |
|
"rewards/margins": 0.7588311433792114, |
|
"rewards/rejected": -2.1220178604125977, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.22507197068830148, |
|
"grad_norm": 20.08100700378418, |
|
"learning_rate": 4.766251958842589e-06, |
|
"logits/chosen": -1.5939120054244995, |
|
"logits/rejected": -1.5511482954025269, |
|
"logps/chosen": -447.6766662597656, |
|
"logps/rejected": -501.3028259277344, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.7521028518676758, |
|
"rewards/margins": 0.6410677433013916, |
|
"rewards/rejected": -2.3931705951690674, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22768908662653756, |
|
"grad_norm": 8.703922271728516, |
|
"learning_rate": 4.7565123828395066e-06, |
|
"logits/chosen": -1.5731687545776367, |
|
"logits/rejected": -1.5117802619934082, |
|
"logps/chosen": -424.31463623046875, |
|
"logps/rejected": -501.74725341796875, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.6849206686019897, |
|
"rewards/margins": 0.7798541784286499, |
|
"rewards/rejected": -2.4647748470306396, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 7.03041410446167, |
|
"learning_rate": 4.746584388701831e-06, |
|
"logits/chosen": -1.6322410106658936, |
|
"logits/rejected": -1.613351583480835, |
|
"logps/chosen": -416.93463134765625, |
|
"logps/rejected": -476.6209411621094, |
|
"loss": 0.5234, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5381996631622314, |
|
"rewards/margins": 0.7384254932403564, |
|
"rewards/rejected": -2.276625156402588, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23292331850300968, |
|
"grad_norm": 7.439998149871826, |
|
"learning_rate": 4.736468805414218e-06, |
|
"logits/chosen": -1.6738321781158447, |
|
"logits/rejected": -1.6679372787475586, |
|
"logps/chosen": -351.4508056640625, |
|
"logps/rejected": -435.82196044921875, |
|
"loss": 0.546, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0406602621078491, |
|
"rewards/margins": 0.7058707475662231, |
|
"rewards/rejected": -1.7465311288833618, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"grad_norm": 14.334321975708008, |
|
"learning_rate": 4.7261664776249595e-06, |
|
"logits/chosen": -1.5745285749435425, |
|
"logits/rejected": -1.511311650276184, |
|
"logps/chosen": -331.2271423339844, |
|
"logps/rejected": -421.39752197265625, |
|
"loss": 0.4763, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.024084210395813, |
|
"rewards/margins": 0.9915010333061218, |
|
"rewards/rejected": -2.015585422515869, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"eval_logits/chosen": -1.4989054203033447, |
|
"eval_logits/rejected": -1.415571928024292, |
|
"eval_logps/chosen": -425.90301513671875, |
|
"eval_logps/rejected": -485.7890319824219, |
|
"eval_loss": 0.5385720133781433, |
|
"eval_rewards/accuracies": 0.7160000205039978, |
|
"eval_rewards/chosen": -1.6130101680755615, |
|
"eval_rewards/margins": 0.7991763353347778, |
|
"eval_rewards/rejected": -2.412186622619629, |
|
"eval_runtime": 1597.0973, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.157, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2381575503794818, |
|
"grad_norm": 12.946525573730469, |
|
"learning_rate": 4.715678265575463e-06, |
|
"logits/chosen": -1.5601496696472168, |
|
"logits/rejected": -1.4170053005218506, |
|
"logps/chosen": -460.2450256347656, |
|
"logps/rejected": -471.73272705078125, |
|
"loss": 0.5673, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.759927749633789, |
|
"rewards/margins": 0.6714185476303101, |
|
"rewards/rejected": -2.4313464164733887, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 7.0246663093566895, |
|
"learning_rate": 4.705005045028415e-06, |
|
"logits/chosen": -1.5280827283859253, |
|
"logits/rejected": -1.4348738193511963, |
|
"logps/chosen": -420.05682373046875, |
|
"logps/rejected": -474.1004333496094, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.5702658891677856, |
|
"rewards/margins": 0.7144767642021179, |
|
"rewards/rejected": -2.284742832183838, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24339178225595393, |
|
"grad_norm": 10.933419227600098, |
|
"learning_rate": 4.694147707194659e-06, |
|
"logits/chosen": -1.7351709604263306, |
|
"logits/rejected": -1.6752073764801025, |
|
"logps/chosen": -409.02301025390625, |
|
"logps/rejected": -459.7960510253906, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3725465536117554, |
|
"rewards/margins": 0.7059783935546875, |
|
"rewards/rejected": -2.0785250663757324, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.24600889819419, |
|
"grad_norm": 9.32016372680664, |
|
"learning_rate": 4.683107158658782e-06, |
|
"logits/chosen": -1.6696386337280273, |
|
"logits/rejected": -1.5904427766799927, |
|
"logps/chosen": -443.6543884277344, |
|
"logps/rejected": -504.19293212890625, |
|
"loss": 0.4894, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.5346601009368896, |
|
"rewards/margins": 0.8928316235542297, |
|
"rewards/rejected": -2.4274916648864746, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.24862601413242608, |
|
"grad_norm": 15.52425765991211, |
|
"learning_rate": 4.671884321303407e-06, |
|
"logits/chosen": -1.7015752792358398, |
|
"logits/rejected": -1.627673864364624, |
|
"logps/chosen": -428.1681213378906, |
|
"logps/rejected": -498.12530517578125, |
|
"loss": 0.5229, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.8192806243896484, |
|
"rewards/margins": 0.843016505241394, |
|
"rewards/rejected": -2.662297248840332, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 9.358636856079102, |
|
"learning_rate": 4.660480132232224e-06, |
|
"logits/chosen": -1.7376630306243896, |
|
"logits/rejected": -1.6711089611053467, |
|
"logps/chosen": -425.44720458984375, |
|
"logps/rejected": -465.4773864746094, |
|
"loss": 0.5612, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.5372945070266724, |
|
"rewards/margins": 0.6410431861877441, |
|
"rewards/rejected": -2.178337812423706, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25386024600889817, |
|
"grad_norm": 42.93048095703125, |
|
"learning_rate": 4.6488955436917414e-06, |
|
"logits/chosen": -1.6823867559432983, |
|
"logits/rejected": -1.5265601873397827, |
|
"logps/chosen": -445.4508361816406, |
|
"logps/rejected": -489.4844665527344, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.655534029006958, |
|
"rewards/margins": 0.9063774347305298, |
|
"rewards/rejected": -2.5619113445281982, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2564773619471343, |
|
"grad_norm": 10.477679252624512, |
|
"learning_rate": 4.6371315229917644e-06, |
|
"logits/chosen": -1.5334604978561401, |
|
"logits/rejected": -1.4493004083633423, |
|
"logps/chosen": -509.8636779785156, |
|
"logps/rejected": -587.6615600585938, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.265300750732422, |
|
"rewards/margins": 0.9821793437004089, |
|
"rewards/rejected": -3.2474799156188965, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2590944778853703, |
|
"grad_norm": 12.81888198852539, |
|
"learning_rate": 4.625189052424638e-06, |
|
"logits/chosen": -1.4550929069519043, |
|
"logits/rejected": -1.353437066078186, |
|
"logps/chosen": -502.6724548339844, |
|
"logps/rejected": -594.8109130859375, |
|
"loss": 0.4652, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.6365292072296143, |
|
"rewards/margins": 1.1491750478744507, |
|
"rewards/rejected": -3.7857041358947754, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 12.723479270935059, |
|
"learning_rate": 4.613069129183218e-06, |
|
"logits/chosen": -1.5442698001861572, |
|
"logits/rejected": -1.4192157983779907, |
|
"logps/chosen": -524.4293212890625, |
|
"logps/rejected": -580.075439453125, |
|
"loss": 0.5266, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.2106876373291016, |
|
"rewards/margins": 0.9188516736030579, |
|
"rewards/rejected": -3.1295390129089355, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"eval_logits/chosen": -1.3050363063812256, |
|
"eval_logits/rejected": -1.2043476104736328, |
|
"eval_logps/chosen": -482.4830627441406, |
|
"eval_logps/rejected": -550.0311279296875, |
|
"eval_loss": 0.523389995098114, |
|
"eval_rewards/accuracies": 0.7279999852180481, |
|
"eval_rewards/chosen": -2.1788110733032227, |
|
"eval_rewards/margins": 0.8757960796356201, |
|
"eval_rewards/rejected": -3.0546071529388428, |
|
"eval_runtime": 1595.4607, |
|
"eval_samples_per_second": 1.254, |
|
"eval_steps_per_second": 0.157, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2643287097618425, |
|
"grad_norm": 10.429312705993652, |
|
"learning_rate": 4.600772765277607e-06, |
|
"logits/chosen": -1.3304941654205322, |
|
"logits/rejected": -1.263293743133545, |
|
"logps/chosen": -446.70635986328125, |
|
"logps/rejected": -535.6898193359375, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.131920099258423, |
|
"rewards/margins": 0.9416648745536804, |
|
"rewards/rejected": -3.073584794998169, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2669458257000785, |
|
"grad_norm": 16.054344177246094, |
|
"learning_rate": 4.588300987450652e-06, |
|
"logits/chosen": -1.4466055631637573, |
|
"logits/rejected": -1.3539087772369385, |
|
"logps/chosen": -452.9022521972656, |
|
"logps/rejected": -500.742919921875, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9927232265472412, |
|
"rewards/margins": 0.8132057189941406, |
|
"rewards/rejected": -2.8059287071228027, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.26956294163831457, |
|
"grad_norm": 7.860910415649414, |
|
"learning_rate": 4.5756548370922136e-06, |
|
"logits/chosen": -1.514672875404358, |
|
"logits/rejected": -1.447291374206543, |
|
"logps/chosen": -394.26031494140625, |
|
"logps/rejected": -461.3570251464844, |
|
"loss": 0.5174, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.536415934562683, |
|
"rewards/margins": 0.7608687281608582, |
|
"rewards/rejected": -2.2972846031188965, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 13.006108283996582, |
|
"learning_rate": 4.562835370152206e-06, |
|
"logits/chosen": -1.559757113456726, |
|
"logits/rejected": -1.4329888820648193, |
|
"logps/chosen": -482.7850646972656, |
|
"logps/rejected": -568.3600463867188, |
|
"loss": 0.4777, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8161367177963257, |
|
"rewards/margins": 1.1671006679534912, |
|
"rewards/rejected": -2.9832375049591064, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2747971735147867, |
|
"grad_norm": 10.500707626342773, |
|
"learning_rate": 4.54984365705243e-06, |
|
"logits/chosen": -1.5195045471191406, |
|
"logits/rejected": -1.4307035207748413, |
|
"logps/chosen": -455.4422912597656, |
|
"logps/rejected": -578.11181640625, |
|
"loss": 0.4731, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8703197240829468, |
|
"rewards/margins": 1.292755126953125, |
|
"rewards/rejected": -3.1630749702453613, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.27741428945302277, |
|
"grad_norm": 8.591322898864746, |
|
"learning_rate": 4.536680782597191e-06, |
|
"logits/chosen": -1.5150272846221924, |
|
"logits/rejected": -1.4561691284179688, |
|
"logps/chosen": -402.49053955078125, |
|
"logps/rejected": -479.2098083496094, |
|
"loss": 0.5844, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6649020910263062, |
|
"rewards/margins": 0.8239375352859497, |
|
"rewards/rejected": -2.488839626312256, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2800314053912588, |
|
"grad_norm": 12.471731185913086, |
|
"learning_rate": 4.523347845882718e-06, |
|
"logits/chosen": -1.598962664604187, |
|
"logits/rejected": -1.4552241563796997, |
|
"logps/chosen": -423.9444274902344, |
|
"logps/rejected": -476.2308654785156, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.3588608503341675, |
|
"rewards/margins": 1.0158392190933228, |
|
"rewards/rejected": -2.3747003078460693, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 8.733776092529297, |
|
"learning_rate": 4.50984596020539e-06, |
|
"logits/chosen": -1.2865254878997803, |
|
"logits/rejected": -1.242490530014038, |
|
"logps/chosen": -467.01776123046875, |
|
"logps/rejected": -511.63507080078125, |
|
"loss": 0.5623, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8692843914031982, |
|
"rewards/margins": 0.7691918611526489, |
|
"rewards/rejected": -2.6384763717651367, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.28526563726773096, |
|
"grad_norm": 9.051119804382324, |
|
"learning_rate": 4.4961762529687745e-06, |
|
"logits/chosen": -1.2762900590896606, |
|
"logits/rejected": -1.1670513153076172, |
|
"logps/chosen": -481.23406982421875, |
|
"logps/rejected": -555.0721435546875, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.2087130546569824, |
|
"rewards/margins": 0.9038770794868469, |
|
"rewards/rejected": -3.1125903129577637, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"grad_norm": 7.552842617034912, |
|
"learning_rate": 4.482339865589492e-06, |
|
"logits/chosen": -1.240541934967041, |
|
"logits/rejected": -1.0851424932479858, |
|
"logps/chosen": -478.9158630371094, |
|
"logps/rejected": -492.19097900390625, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.127556562423706, |
|
"rewards/margins": 0.5717890858650208, |
|
"rewards/rejected": -2.699345827102661, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"eval_logits/chosen": -1.1099953651428223, |
|
"eval_logits/rejected": -0.9899115562438965, |
|
"eval_logps/chosen": -433.97100830078125, |
|
"eval_logps/rejected": -478.8385314941406, |
|
"eval_loss": 0.5277644991874695, |
|
"eval_rewards/accuracies": 0.7300000190734863, |
|
"eval_rewards/chosen": -1.693690299987793, |
|
"eval_rewards/margins": 0.6489914059638977, |
|
"eval_rewards/rejected": -2.342681646347046, |
|
"eval_runtime": 1596.8217, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.157, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2904998691442031, |
|
"grad_norm": 5.962321758270264, |
|
"learning_rate": 4.468337953401909e-06, |
|
"logits/chosen": -1.3310126066207886, |
|
"logits/rejected": -1.2818472385406494, |
|
"logps/chosen": -426.1422424316406, |
|
"logps/rejected": -471.98175048828125, |
|
"loss": 0.572, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.5579837560653687, |
|
"rewards/margins": 0.5076408982276917, |
|
"rewards/rejected": -2.065624713897705, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 5.4544477462768555, |
|
"learning_rate": 4.45417168556166e-06, |
|
"logits/chosen": -1.3914196491241455, |
|
"logits/rejected": -1.305474877357483, |
|
"logps/chosen": -344.81463623046875, |
|
"logps/rejected": -411.66412353515625, |
|
"loss": 0.5147, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0368053913116455, |
|
"rewards/margins": 0.6763306856155396, |
|
"rewards/rejected": -1.7131359577178955, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.2957341010206752, |
|
"grad_norm": 12.404394149780273, |
|
"learning_rate": 4.439842244948036e-06, |
|
"logits/chosen": -1.3826755285263062, |
|
"logits/rejected": -1.251558780670166, |
|
"logps/chosen": -408.0195007324219, |
|
"logps/rejected": -476.11114501953125, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4701926708221436, |
|
"rewards/margins": 0.7369771003723145, |
|
"rewards/rejected": -2.207169771194458, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.29835121695891126, |
|
"grad_norm": 28.483901977539062, |
|
"learning_rate": 4.425350828065204e-06, |
|
"logits/chosen": -1.342377781867981, |
|
"logits/rejected": -1.165569543838501, |
|
"logps/chosen": -453.5723571777344, |
|
"logps/rejected": -509.80731201171875, |
|
"loss": 0.4798, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.6950212717056274, |
|
"rewards/margins": 1.041156530380249, |
|
"rewards/rejected": -2.736177921295166, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.30096833289714736, |
|
"grad_norm": 9.420351028442383, |
|
"learning_rate": 4.410698644942303e-06, |
|
"logits/chosen": -1.3900500535964966, |
|
"logits/rejected": -1.2638782262802124, |
|
"logps/chosen": -445.68646240234375, |
|
"logps/rejected": -537.0733642578125, |
|
"loss": 0.4689, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.7342853546142578, |
|
"rewards/margins": 1.1538056135177612, |
|
"rewards/rejected": -2.8880913257598877, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 11.15323543548584, |
|
"learning_rate": 4.395886919032406e-06, |
|
"logits/chosen": -1.3719291687011719, |
|
"logits/rejected": -1.2517584562301636, |
|
"logps/chosen": -444.84375, |
|
"logps/rejected": -515.7125244140625, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8111295700073242, |
|
"rewards/margins": 0.9737447500228882, |
|
"rewards/rejected": -2.784874439239502, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.30620256477361946, |
|
"grad_norm": 11.627602577209473, |
|
"learning_rate": 4.380916887110366e-06, |
|
"logits/chosen": -1.5677311420440674, |
|
"logits/rejected": -1.4189679622650146, |
|
"logps/chosen": -418.11309814453125, |
|
"logps/rejected": -474.2850646972656, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5903016328811646, |
|
"rewards/margins": 0.9656845331192017, |
|
"rewards/rejected": -2.555985927581787, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.30881968071185556, |
|
"grad_norm": 10.718832969665527, |
|
"learning_rate": 4.365789799169539e-06, |
|
"logits/chosen": -1.2974934577941895, |
|
"logits/rejected": -1.3446776866912842, |
|
"logps/chosen": -414.12152099609375, |
|
"logps/rejected": -495.92340087890625, |
|
"loss": 0.5394, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.6758350133895874, |
|
"rewards/margins": 0.7650316953659058, |
|
"rewards/rejected": -2.440866708755493, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3114367966500916, |
|
"grad_norm": 8.005572319030762, |
|
"learning_rate": 4.350506918317416e-06, |
|
"logits/chosen": -1.4092941284179688, |
|
"logits/rejected": -1.264432668685913, |
|
"logps/chosen": -408.0471496582031, |
|
"logps/rejected": -483.52960205078125, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.605602502822876, |
|
"rewards/margins": 0.8261173963546753, |
|
"rewards/rejected": -2.431720018386841, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 7.756717681884766, |
|
"learning_rate": 4.335069520670149e-06, |
|
"logits/chosen": -1.2672417163848877, |
|
"logits/rejected": -1.1932761669158936, |
|
"logps/chosen": -391.0654602050781, |
|
"logps/rejected": -470.7328186035156, |
|
"loss": 0.5724, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.6141713857650757, |
|
"rewards/margins": 0.7146965265274048, |
|
"rewards/rejected": -2.3288679122924805, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"eval_logits/chosen": -1.2473385334014893, |
|
"eval_logits/rejected": -1.1348552703857422, |
|
"eval_logps/chosen": -420.0863342285156, |
|
"eval_logps/rejected": -485.28948974609375, |
|
"eval_loss": 0.5071337819099426, |
|
"eval_rewards/accuracies": 0.7379999756813049, |
|
"eval_rewards/chosen": -1.5548440217971802, |
|
"eval_rewards/margins": 0.8523474335670471, |
|
"eval_rewards/rejected": -2.407191514968872, |
|
"eval_runtime": 1597.3419, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.157, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3166710285265637, |
|
"grad_norm": 5.270259857177734, |
|
"learning_rate": 4.319478895246e-06, |
|
"logits/chosen": -1.3534616231918335, |
|
"logits/rejected": -1.1892000436782837, |
|
"logps/chosen": -396.1217346191406, |
|
"logps/rejected": -454.44378662109375, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.490412950515747, |
|
"rewards/margins": 0.8302758932113647, |
|
"rewards/rejected": -2.3206887245178223, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3192881444647998, |
|
"grad_norm": 12.79388427734375, |
|
"learning_rate": 4.303736343857704e-06, |
|
"logits/chosen": -1.3265999555587769, |
|
"logits/rejected": -1.2219207286834717, |
|
"logps/chosen": -440.6163024902344, |
|
"logps/rejected": -568.152099609375, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.835211992263794, |
|
"rewards/margins": 1.1568537950515747, |
|
"rewards/rejected": -2.992065906524658, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32190526040303585, |
|
"grad_norm": 13.922798156738281, |
|
"learning_rate": 4.287843181003772e-06, |
|
"logits/chosen": -1.3599532842636108, |
|
"logits/rejected": -1.237168550491333, |
|
"logps/chosen": -486.5572204589844, |
|
"logps/rejected": -522.0225219726562, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.9279435873031616, |
|
"rewards/margins": 0.8177730441093445, |
|
"rewards/rejected": -2.7457165718078613, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 7.278261184692383, |
|
"learning_rate": 4.27180073375873e-06, |
|
"logits/chosen": -1.3576252460479736, |
|
"logits/rejected": -1.2608816623687744, |
|
"logps/chosen": -445.9983825683594, |
|
"logps/rejected": -495.5048828125, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.587226152420044, |
|
"rewards/margins": 0.910873293876648, |
|
"rewards/rejected": -2.4980995655059814, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.327139492279508, |
|
"grad_norm": 8.15560531616211, |
|
"learning_rate": 4.255610341662304e-06, |
|
"logits/chosen": -1.3735462427139282, |
|
"logits/rejected": -1.210055947303772, |
|
"logps/chosen": -444.36285400390625, |
|
"logps/rejected": -524.8583374023438, |
|
"loss": 0.5542, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.8914082050323486, |
|
"rewards/margins": 1.0044190883636475, |
|
"rewards/rejected": -2.895827293395996, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.32975660821774405, |
|
"grad_norm": 10.175187110900879, |
|
"learning_rate": 4.2392733566075764e-06, |
|
"logits/chosen": -1.4199110269546509, |
|
"logits/rejected": -1.317662000656128, |
|
"logps/chosen": -492.12139892578125, |
|
"logps/rejected": -561.2523803710938, |
|
"loss": 0.5923, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.361861228942871, |
|
"rewards/margins": 0.8468947410583496, |
|
"rewards/rejected": -3.2087559700012207, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3323737241559801, |
|
"grad_norm": 8.98975944519043, |
|
"learning_rate": 4.2227911427280975e-06, |
|
"logits/chosen": -1.5209752321243286, |
|
"logits/rejected": -1.3899322748184204, |
|
"logps/chosen": -398.7652282714844, |
|
"logps/rejected": -442.14892578125, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4862077236175537, |
|
"rewards/margins": 0.761127769947052, |
|
"rewards/rejected": -2.247335195541382, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 9.442636489868164, |
|
"learning_rate": 4.206165076283983e-06, |
|
"logits/chosen": -1.5363355875015259, |
|
"logits/rejected": -1.428397536277771, |
|
"logps/chosen": -410.4297790527344, |
|
"logps/rejected": -474.6888122558594, |
|
"loss": 0.5101, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6462541818618774, |
|
"rewards/margins": 0.8527911305427551, |
|
"rewards/rejected": -2.4990451335906982, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.33760795603245225, |
|
"grad_norm": 9.517970085144043, |
|
"learning_rate": 4.189396545546995e-06, |
|
"logits/chosen": -1.4206923246383667, |
|
"logits/rejected": -1.3242676258087158, |
|
"logps/chosen": -455.42474365234375, |
|
"logps/rejected": -532.8533325195312, |
|
"loss": 0.5055, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.0629706382751465, |
|
"rewards/margins": 0.9739512205123901, |
|
"rewards/rejected": -3.036921977996826, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"grad_norm": 11.92896842956543, |
|
"learning_rate": 4.172486950684627e-06, |
|
"logits/chosen": -1.458961844444275, |
|
"logits/rejected": -1.4081146717071533, |
|
"logps/chosen": -429.62548828125, |
|
"logps/rejected": -503.53582763671875, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.7603120803833008, |
|
"rewards/margins": 0.771033525466919, |
|
"rewards/rejected": -2.5313456058502197, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"eval_logits/chosen": -1.3403185606002808, |
|
"eval_logits/rejected": -1.242436408996582, |
|
"eval_logps/chosen": -440.03851318359375, |
|
"eval_logps/rejected": -507.2138366699219, |
|
"eval_loss": 0.5013459920883179, |
|
"eval_rewards/accuracies": 0.7434999942779541, |
|
"eval_rewards/chosen": -1.7543656826019287, |
|
"eval_rewards/margins": 0.8720693588256836, |
|
"eval_rewards/rejected": -2.6264350414276123, |
|
"eval_runtime": 1597.2746, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.157, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34284218790892435, |
|
"grad_norm": 7.771608352661133, |
|
"learning_rate": 4.155437703643182e-06, |
|
"logits/chosen": -1.5240622758865356, |
|
"logits/rejected": -1.3838030099868774, |
|
"logps/chosen": -408.34857177734375, |
|
"logps/rejected": -462.7867736816406, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6171270608901978, |
|
"rewards/margins": 0.8352136611938477, |
|
"rewards/rejected": -2.452340602874756, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 13.282800674438477, |
|
"learning_rate": 4.138250228029882e-06, |
|
"logits/chosen": -1.4314453601837158, |
|
"logits/rejected": -1.3674726486206055, |
|
"logps/chosen": -449.40045166015625, |
|
"logps/rejected": -546.0065307617188, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9253730773925781, |
|
"rewards/margins": 0.9314772486686707, |
|
"rewards/rejected": -2.8568501472473145, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3480764197853965, |
|
"grad_norm": 5.933595180511475, |
|
"learning_rate": 4.120925958993994e-06, |
|
"logits/chosen": -1.3017045259475708, |
|
"logits/rejected": -1.251571536064148, |
|
"logps/chosen": -431.25537109375, |
|
"logps/rejected": -517.0537109375, |
|
"loss": 0.557, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.9745893478393555, |
|
"rewards/margins": 0.9014847874641418, |
|
"rewards/rejected": -2.8760738372802734, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35069353572363254, |
|
"grad_norm": 11.526862144470215, |
|
"learning_rate": 4.103466343106999e-06, |
|
"logits/chosen": -1.3678683042526245, |
|
"logits/rejected": -1.2736116647720337, |
|
"logps/chosen": -471.77728271484375, |
|
"logps/rejected": -537.7550048828125, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.0238394737243652, |
|
"rewards/margins": 0.9198586344718933, |
|
"rewards/rejected": -2.9436984062194824, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35331065166186865, |
|
"grad_norm": 10.23589038848877, |
|
"learning_rate": 4.085872838241797e-06, |
|
"logits/chosen": -1.3284608125686646, |
|
"logits/rejected": -1.217164397239685, |
|
"logps/chosen": -450.1647033691406, |
|
"logps/rejected": -507.2977600097656, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.8795936107635498, |
|
"rewards/margins": 0.7713474631309509, |
|
"rewards/rejected": -2.6509411334991455, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 9.470162391662598, |
|
"learning_rate": 4.06814691345098e-06, |
|
"logits/chosen": -1.387274980545044, |
|
"logits/rejected": -1.2676749229431152, |
|
"logps/chosen": -405.6870422363281, |
|
"logps/rejected": -467.76708984375, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.5023095607757568, |
|
"rewards/margins": 0.8411477208137512, |
|
"rewards/rejected": -2.3434574604034424, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.35854488353834074, |
|
"grad_norm": 11.479103088378906, |
|
"learning_rate": 4.050290048844171e-06, |
|
"logits/chosen": -1.4606144428253174, |
|
"logits/rejected": -1.400508165359497, |
|
"logps/chosen": -422.5201721191406, |
|
"logps/rejected": -496.3780212402344, |
|
"loss": 0.5403, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5399492979049683, |
|
"rewards/margins": 0.7900384068489075, |
|
"rewards/rejected": -2.3299877643585205, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3611619994765768, |
|
"grad_norm": 9.197821617126465, |
|
"learning_rate": 4.032303735464422e-06, |
|
"logits/chosen": -1.6123807430267334, |
|
"logits/rejected": -1.459542989730835, |
|
"logps/chosen": -428.04095458984375, |
|
"logps/rejected": -507.18243408203125, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.5614588260650635, |
|
"rewards/margins": 1.0217015743255615, |
|
"rewards/rejected": -2.583160161972046, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3637791154148129, |
|
"grad_norm": 13.988523483276367, |
|
"learning_rate": 4.014189475163727e-06, |
|
"logits/chosen": -1.4627307653427124, |
|
"logits/rejected": -1.3992432355880737, |
|
"logps/chosen": -429.89642333984375, |
|
"logps/rejected": -529.4677124023438, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.753618836402893, |
|
"rewards/margins": 1.0768356323242188, |
|
"rewards/rejected": -2.8304543495178223, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 14.094236373901367, |
|
"learning_rate": 3.995948780477605e-06, |
|
"logits/chosen": -1.6445674896240234, |
|
"logits/rejected": -1.5534647703170776, |
|
"logps/chosen": -442.5335388183594, |
|
"logps/rejected": -506.78253173828125, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.7385400533676147, |
|
"rewards/margins": 0.8896828889846802, |
|
"rewards/rejected": -2.628222942352295, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"eval_logits/chosen": -1.586852788925171, |
|
"eval_logits/rejected": -1.5062702894210815, |
|
"eval_logps/chosen": -428.40972900390625, |
|
"eval_logps/rejected": -505.70770263671875, |
|
"eval_loss": 0.5131703019142151, |
|
"eval_rewards/accuracies": 0.7210000157356262, |
|
"eval_rewards/chosen": -1.6380778551101685, |
|
"eval_rewards/margins": 0.9732955098152161, |
|
"eval_rewards/rejected": -2.6113734245300293, |
|
"eval_runtime": 1597.5064, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.156, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.369013347291285, |
|
"grad_norm": 13.169591903686523, |
|
"learning_rate": 3.977583174498816e-06, |
|
"logits/chosen": -1.6033601760864258, |
|
"logits/rejected": -1.529076099395752, |
|
"logps/chosen": -446.4644470214844, |
|
"logps/rejected": -562.0938720703125, |
|
"loss": 0.4006, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8283302783966064, |
|
"rewards/margins": 1.3763213157653809, |
|
"rewards/rejected": -3.2046515941619873, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3716304632295211, |
|
"grad_norm": 12.735097885131836, |
|
"learning_rate": 3.959094190750172e-06, |
|
"logits/chosen": -1.6629726886749268, |
|
"logits/rejected": -1.5727919340133667, |
|
"logps/chosen": -460.08013916015625, |
|
"logps/rejected": -542.1484375, |
|
"loss": 0.5034, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7491388320922852, |
|
"rewards/margins": 1.091073989868164, |
|
"rewards/rejected": -2.8402130603790283, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37424757916775714, |
|
"grad_norm": 9.543456077575684, |
|
"learning_rate": 3.9404833730564975e-06, |
|
"logits/chosen": -1.5923559665679932, |
|
"logits/rejected": -1.5246838331222534, |
|
"logps/chosen": -397.0897216796875, |
|
"logps/rejected": -477.6280212402344, |
|
"loss": 0.5221, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.4408881664276123, |
|
"rewards/margins": 0.9207174181938171, |
|
"rewards/rejected": -2.361605405807495, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 15.872808456420898, |
|
"learning_rate": 3.921752275415712e-06, |
|
"logits/chosen": -1.6502765417099, |
|
"logits/rejected": -1.5927629470825195, |
|
"logps/chosen": -403.17657470703125, |
|
"logps/rejected": -513.6209106445312, |
|
"loss": 0.4272, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.527881145477295, |
|
"rewards/margins": 1.3093104362487793, |
|
"rewards/rejected": -2.837191581726074, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.37948181104422923, |
|
"grad_norm": 13.932242393493652, |
|
"learning_rate": 3.902902461869079e-06, |
|
"logits/chosen": -1.6036418676376343, |
|
"logits/rejected": -1.5119550228118896, |
|
"logps/chosen": -423.63275146484375, |
|
"logps/rejected": -522.7612915039062, |
|
"loss": 0.5484, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.8413972854614258, |
|
"rewards/margins": 1.1561634540557861, |
|
"rewards/rejected": -2.997560501098633, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38209892698246534, |
|
"grad_norm": 17.43979835510254, |
|
"learning_rate": 3.883935506370605e-06, |
|
"logits/chosen": -1.6155637502670288, |
|
"logits/rejected": -1.546623706817627, |
|
"logps/chosen": -433.8269958496094, |
|
"logps/rejected": -493.6173400878906, |
|
"loss": 0.5628, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8069992065429688, |
|
"rewards/margins": 0.8752773404121399, |
|
"rewards/rejected": -2.682276487350464, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.3847160429207014, |
|
"grad_norm": 6.709349155426025, |
|
"learning_rate": 3.864852992655617e-06, |
|
"logits/chosen": -1.6773513555526733, |
|
"logits/rejected": -1.6156046390533447, |
|
"logps/chosen": -408.98004150390625, |
|
"logps/rejected": -496.5398864746094, |
|
"loss": 0.4636, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.6042149066925049, |
|
"rewards/margins": 0.9984095692634583, |
|
"rewards/rejected": -2.6026244163513184, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 5.378371715545654, |
|
"learning_rate": 3.845656514108516e-06, |
|
"logits/chosen": -1.6842008829116821, |
|
"logits/rejected": -1.5901457071304321, |
|
"logps/chosen": -467.2499084472656, |
|
"logps/rejected": -508.35491943359375, |
|
"loss": 0.5207, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0820114612579346, |
|
"rewards/margins": 0.9600709080696106, |
|
"rewards/rejected": -3.0420823097229004, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.38995027479717354, |
|
"grad_norm": 14.073667526245117, |
|
"learning_rate": 3.826347673629738e-06, |
|
"logits/chosen": -1.6913728713989258, |
|
"logits/rejected": -1.5630801916122437, |
|
"logps/chosen": -425.8887634277344, |
|
"logps/rejected": -511.43194580078125, |
|
"loss": 0.4841, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7265421152114868, |
|
"rewards/margins": 1.118648648262024, |
|
"rewards/rejected": -2.84519100189209, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"grad_norm": 15.713250160217285, |
|
"learning_rate": 3.8069280835019062e-06, |
|
"logits/chosen": -1.631744384765625, |
|
"logits/rejected": -1.5060975551605225, |
|
"logps/chosen": -430.0010681152344, |
|
"logps/rejected": -537.3772583007812, |
|
"loss": 0.4492, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.657666563987732, |
|
"rewards/margins": 1.2653546333312988, |
|
"rewards/rejected": -2.9230213165283203, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"eval_logits/chosen": -1.5949609279632568, |
|
"eval_logits/rejected": -1.4971818923950195, |
|
"eval_logps/chosen": -423.41754150390625, |
|
"eval_logps/rejected": -503.4827575683594, |
|
"eval_loss": 0.5122300386428833, |
|
"eval_rewards/accuracies": 0.7260000109672546, |
|
"eval_rewards/chosen": -1.5881556272506714, |
|
"eval_rewards/margins": 1.0009682178497314, |
|
"eval_rewards/rejected": -2.5891237258911133, |
|
"eval_runtime": 1597.6931, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.156, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39518450667364563, |
|
"grad_norm": 14.919211387634277, |
|
"learning_rate": 3.7873993652552077e-06, |
|
"logits/chosen": -1.6476099491119385, |
|
"logits/rejected": -1.5861533880233765, |
|
"logps/chosen": -370.7307434082031, |
|
"logps/rejected": -437.010986328125, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.370052695274353, |
|
"rewards/margins": 0.7218869924545288, |
|
"rewards/rejected": -2.091939926147461, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 6.893444061279297, |
|
"learning_rate": 3.7677631495319953e-06, |
|
"logits/chosen": -1.7386844158172607, |
|
"logits/rejected": -1.6610181331634521, |
|
"logps/chosen": -358.55792236328125, |
|
"logps/rejected": -410.8779296875, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9495410919189453, |
|
"rewards/margins": 0.7035711407661438, |
|
"rewards/rejected": -1.6531124114990234, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4004187385501178, |
|
"grad_norm": 10.669229507446289, |
|
"learning_rate": 3.748021075950633e-06, |
|
"logits/chosen": -1.6956592798233032, |
|
"logits/rejected": -1.634338617324829, |
|
"logps/chosen": -401.1066589355469, |
|
"logps/rejected": -450.43701171875, |
|
"loss": 0.5677, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.2650973796844482, |
|
"rewards/margins": 0.5999857187271118, |
|
"rewards/rejected": -1.8650833368301392, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.40303585448835383, |
|
"grad_norm": 13.775323867797852, |
|
"learning_rate": 3.7281747929685824e-06, |
|
"logits/chosen": -1.4220095872879028, |
|
"logits/rejected": -1.3044965267181396, |
|
"logps/chosen": -428.1224060058594, |
|
"logps/rejected": -506.9508361816406, |
|
"loss": 0.5483, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9053666591644287, |
|
"rewards/margins": 0.9458476305007935, |
|
"rewards/rejected": -2.8512144088745117, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4056529704265899, |
|
"grad_norm": 14.96325397491455, |
|
"learning_rate": 3.7082259577447604e-06, |
|
"logits/chosen": -1.4775934219360352, |
|
"logits/rejected": -1.3828151226043701, |
|
"logps/chosen": -484.23712158203125, |
|
"logps/rejected": -562.1600341796875, |
|
"loss": 0.4745, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.1311516761779785, |
|
"rewards/margins": 0.9796358942985535, |
|
"rewards/rejected": -3.1107876300811768, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 12.222273826599121, |
|
"learning_rate": 3.6881762360011688e-06, |
|
"logits/chosen": -1.5065914392471313, |
|
"logits/rejected": -1.3439867496490479, |
|
"logps/chosen": -507.56646728515625, |
|
"logps/rejected": -579.8803100585938, |
|
"loss": 0.5187, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.261667013168335, |
|
"rewards/margins": 1.0928138494491577, |
|
"rewards/rejected": -3.3544812202453613, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.410887202303062, |
|
"grad_norm": 9.984075546264648, |
|
"learning_rate": 3.668027301883802e-06, |
|
"logits/chosen": -1.5400466918945312, |
|
"logits/rejected": -1.42914617061615, |
|
"logps/chosen": -428.8944396972656, |
|
"logps/rejected": -516.1829833984375, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7761850357055664, |
|
"rewards/margins": 1.021864891052246, |
|
"rewards/rejected": -2.7980499267578125, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4135043182412981, |
|
"grad_norm": 8.096105575561523, |
|
"learning_rate": 3.64778083782286e-06, |
|
"logits/chosen": -1.4683252573013306, |
|
"logits/rejected": -1.4568402767181396, |
|
"logps/chosen": -420.28662109375, |
|
"logps/rejected": -531.93310546875, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.67800772190094, |
|
"rewards/margins": 0.8264617919921875, |
|
"rewards/rejected": -2.504469394683838, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4161214341795342, |
|
"grad_norm": 8.454867362976074, |
|
"learning_rate": 3.627438534392268e-06, |
|
"logits/chosen": -1.563232421875, |
|
"logits/rejected": -1.5413362979888916, |
|
"logps/chosen": -382.3138122558594, |
|
"logps/rejected": -473.07427978515625, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4439641237258911, |
|
"rewards/margins": 0.8402025103569031, |
|
"rewards/rejected": -2.2841668128967285, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 8.18990421295166, |
|
"learning_rate": 3.607002090168506e-06, |
|
"logits/chosen": -1.374459981918335, |
|
"logits/rejected": -1.3057047128677368, |
|
"logps/chosen": -437.67437744140625, |
|
"logps/rejected": -498.42022705078125, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.7346376180648804, |
|
"rewards/margins": 0.8208419680595398, |
|
"rewards/rejected": -2.5554797649383545, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": -1.2524830102920532, |
|
"eval_logits/rejected": -1.129266381263733, |
|
"eval_logps/chosen": -434.1912536621094, |
|
"eval_logps/rejected": -515.1350708007812, |
|
"eval_loss": 0.49564477801322937, |
|
"eval_rewards/accuracies": 0.7394999861717224, |
|
"eval_rewards/chosen": -1.6958929300308228, |
|
"eval_rewards/margins": 1.0097541809082031, |
|
"eval_rewards/rejected": -2.7056469917297363, |
|
"eval_runtime": 1597.9006, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.156, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4213556660560063, |
|
"grad_norm": 5.6236395835876465, |
|
"learning_rate": 3.586473211588787e-06, |
|
"logits/chosen": -1.3308565616607666, |
|
"logits/rejected": -1.2487056255340576, |
|
"logps/chosen": -416.17218017578125, |
|
"logps/rejected": -541.766357421875, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.7147547006607056, |
|
"rewards/margins": 1.190341591835022, |
|
"rewards/rejected": -2.9050965309143066, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4239727819942423, |
|
"grad_norm": 17.16832160949707, |
|
"learning_rate": 3.5658536128085623e-06, |
|
"logits/chosen": -1.3294860124588013, |
|
"logits/rejected": -1.1633020639419556, |
|
"logps/chosen": -514.1262817382812, |
|
"logps/rejected": -586.5099487304688, |
|
"loss": 0.5903, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.5135364532470703, |
|
"rewards/margins": 1.0195398330688477, |
|
"rewards/rejected": -3.533076047897339, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.4265898979324784, |
|
"grad_norm": 9.197494506835938, |
|
"learning_rate": 3.545145015558399e-06, |
|
"logits/chosen": -1.1497808694839478, |
|
"logits/rejected": -1.1394188404083252, |
|
"logps/chosen": -455.3041076660156, |
|
"logps/rejected": -552.9832763671875, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.2378275394439697, |
|
"rewards/margins": 1.1191097497940063, |
|
"rewards/rejected": -3.3569374084472656, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 5.227996826171875, |
|
"learning_rate": 3.5243491490002056e-06, |
|
"logits/chosen": -1.344987154006958, |
|
"logits/rejected": -1.2656126022338867, |
|
"logps/chosen": -442.0404357910156, |
|
"logps/rejected": -519.5951538085938, |
|
"loss": 0.5867, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.8838779926300049, |
|
"rewards/margins": 0.8285346031188965, |
|
"rewards/rejected": -2.7124123573303223, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.4318241298089505, |
|
"grad_norm": 6.703832149505615, |
|
"learning_rate": 3.503467749582857e-06, |
|
"logits/chosen": -1.4452259540557861, |
|
"logits/rejected": -1.273822546005249, |
|
"logps/chosen": -400.95703125, |
|
"logps/rejected": -432.19775390625, |
|
"loss": 0.5857, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.4979441165924072, |
|
"rewards/margins": 0.6527599096298218, |
|
"rewards/rejected": -2.1507039070129395, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.4344412457471866, |
|
"grad_norm": 11.32523250579834, |
|
"learning_rate": 3.4825025608971947e-06, |
|
"logits/chosen": -1.386530876159668, |
|
"logits/rejected": -1.324684500694275, |
|
"logps/chosen": -355.1617736816406, |
|
"logps/rejected": -427.02001953125, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3329139947891235, |
|
"rewards/margins": 0.6805238723754883, |
|
"rewards/rejected": -2.0134379863739014, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.43705836168542267, |
|
"grad_norm": 7.073009490966797, |
|
"learning_rate": 3.4614553335304407e-06, |
|
"logits/chosen": -1.3409563302993774, |
|
"logits/rejected": -1.1487702131271362, |
|
"logps/chosen": -427.2156677246094, |
|
"logps/rejected": -493.6348571777344, |
|
"loss": 0.4728, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.584120273590088, |
|
"rewards/margins": 0.9814236760139465, |
|
"rewards/rejected": -2.5655438899993896, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 11.820454597473145, |
|
"learning_rate": 3.4403278249200222e-06, |
|
"logits/chosen": -1.262634038925171, |
|
"logits/rejected": -1.0713304281234741, |
|
"logps/chosen": -481.8815002441406, |
|
"logps/rejected": -564.7189331054688, |
|
"loss": 0.4399, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.9938242435455322, |
|
"rewards/margins": 1.2474219799041748, |
|
"rewards/rejected": -3.241246461868286, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44229259356189476, |
|
"grad_norm": 19.43402671813965, |
|
"learning_rate": 3.4191217992068293e-06, |
|
"logits/chosen": -1.2867403030395508, |
|
"logits/rejected": -1.1040995121002197, |
|
"logps/chosen": -548.404296875, |
|
"logps/rejected": -614.572998046875, |
|
"loss": 0.5467, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.703883647918701, |
|
"rewards/margins": 1.1177372932434082, |
|
"rewards/rejected": -3.821620464324951, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"grad_norm": 18.481212615966797, |
|
"learning_rate": 3.3978390270879056e-06, |
|
"logits/chosen": -1.169626235961914, |
|
"logits/rejected": -1.0800492763519287, |
|
"logps/chosen": -525.806884765625, |
|
"logps/rejected": -649.9021606445312, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.1594154834747314, |
|
"rewards/margins": 1.1724364757537842, |
|
"rewards/rejected": -4.331852436065674, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"eval_logits/chosen": -1.1773556470870972, |
|
"eval_logits/rejected": -1.0519527196884155, |
|
"eval_logps/chosen": -568.2142333984375, |
|
"eval_logps/rejected": -668.4868774414062, |
|
"eval_loss": 0.5111355781555176, |
|
"eval_rewards/accuracies": 0.7304999828338623, |
|
"eval_rewards/chosen": -3.036123275756836, |
|
"eval_rewards/margins": 1.203041672706604, |
|
"eval_rewards/rejected": -4.23916482925415, |
|
"eval_runtime": 1598.229, |
|
"eval_samples_per_second": 1.251, |
|
"eval_steps_per_second": 0.156, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4475268254383669, |
|
"grad_norm": 17.149526596069336, |
|
"learning_rate": 3.3764812856685995e-06, |
|
"logits/chosen": -1.3090331554412842, |
|
"logits/rejected": -1.292729139328003, |
|
"logps/chosen": -502.16497802734375, |
|
"logps/rejected": -622.8278198242188, |
|
"loss": 0.5266, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.7771124839782715, |
|
"rewards/margins": 1.0713131427764893, |
|
"rewards/rejected": -3.8484256267547607, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 12.321800231933594, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"logits/chosen": -1.5342642068862915, |
|
"logits/rejected": -1.4036284685134888, |
|
"logps/chosen": -507.0011291503906, |
|
"logps/rejected": -602.5591430664062, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3824760913848877, |
|
"rewards/margins": 1.1078113317489624, |
|
"rewards/rejected": -3.4902870655059814, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45276105731483907, |
|
"grad_norm": 7.4277191162109375, |
|
"learning_rate": 3.3335480345008907e-06, |
|
"logits/chosen": -1.5070991516113281, |
|
"logits/rejected": -1.4292500019073486, |
|
"logps/chosen": -427.82818603515625, |
|
"logps/rejected": -509.09552001953125, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6776756048202515, |
|
"rewards/margins": 1.0597718954086304, |
|
"rewards/rejected": -2.737447500228882, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.4553781732530751, |
|
"grad_norm": 9.2946195602417, |
|
"learning_rate": 3.3119761096666055e-06, |
|
"logits/chosen": -1.5444905757904053, |
|
"logits/rejected": -1.4140485525131226, |
|
"logps/chosen": -468.56182861328125, |
|
"logps/rejected": -515.2850341796875, |
|
"loss": 0.5735, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.9366995096206665, |
|
"rewards/margins": 0.7699880599975586, |
|
"rewards/rejected": -2.7066876888275146, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.45799528919131116, |
|
"grad_norm": 7.504373073577881, |
|
"learning_rate": 3.290336385060832e-06, |
|
"logits/chosen": -1.724323034286499, |
|
"logits/rejected": -1.5582685470581055, |
|
"logps/chosen": -436.670166015625, |
|
"logps/rejected": -503.16705322265625, |
|
"loss": 0.5256, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8786998987197876, |
|
"rewards/margins": 0.8662854433059692, |
|
"rewards/rejected": -2.7449851036071777, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 11.6707181930542, |
|
"learning_rate": 3.268630667594348e-06, |
|
"logits/chosen": -1.552310824394226, |
|
"logits/rejected": -1.5216782093048096, |
|
"logps/chosen": -447.37835693359375, |
|
"logps/rejected": -520.7014770507812, |
|
"loss": 0.507, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9179880619049072, |
|
"rewards/margins": 0.9757116436958313, |
|
"rewards/rejected": -2.8936996459960938, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4632295210677833, |
|
"grad_norm": 10.357820510864258, |
|
"learning_rate": 3.2468607696883147e-06, |
|
"logits/chosen": -1.6096198558807373, |
|
"logits/rejected": -1.5589017868041992, |
|
"logps/chosen": -452.2684631347656, |
|
"logps/rejected": -553.5985717773438, |
|
"loss": 0.4902, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.9917962551116943, |
|
"rewards/margins": 0.9700411558151245, |
|
"rewards/rejected": -2.9618372917175293, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.46584663700601936, |
|
"grad_norm": 8.833939552307129, |
|
"learning_rate": 3.225028509122944e-06, |
|
"logits/chosen": -1.7435325384140015, |
|
"logits/rejected": -1.6519057750701904, |
|
"logps/chosen": -414.03179931640625, |
|
"logps/rejected": -483.228515625, |
|
"loss": 0.5437, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.7762641906738281, |
|
"rewards/margins": 0.8044807314872742, |
|
"rewards/rejected": -2.580744504928589, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.4684637529442554, |
|
"grad_norm": 9.815378189086914, |
|
"learning_rate": 3.2031357088857083e-06, |
|
"logits/chosen": -1.6989177465438843, |
|
"logits/rejected": -1.6307258605957031, |
|
"logps/chosen": -467.5003967285156, |
|
"logps/rejected": -558.3304443359375, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.923940896987915, |
|
"rewards/margins": 0.9426455497741699, |
|
"rewards/rejected": -2.866586446762085, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 10.215496063232422, |
|
"learning_rate": 3.181184197019127e-06, |
|
"logits/chosen": -1.4613163471221924, |
|
"logits/rejected": -1.3699003458023071, |
|
"logps/chosen": -425.10430908203125, |
|
"logps/rejected": -576.7496948242188, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9493353366851807, |
|
"rewards/margins": 1.254817247390747, |
|
"rewards/rejected": -3.2041525840759277, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"eval_logits/chosen": -1.4508103132247925, |
|
"eval_logits/rejected": -1.3478518724441528, |
|
"eval_logps/chosen": -476.96630859375, |
|
"eval_logps/rejected": -563.5121459960938, |
|
"eval_loss": 0.49488988518714905, |
|
"eval_rewards/accuracies": 0.7434999942779541, |
|
"eval_rewards/chosen": -2.123643636703491, |
|
"eval_rewards/margins": 1.0657742023468018, |
|
"eval_rewards/rejected": -3.189417839050293, |
|
"eval_runtime": 1598.3976, |
|
"eval_samples_per_second": 1.251, |
|
"eval_steps_per_second": 0.156, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47369798482072756, |
|
"grad_norm": 19.882471084594727, |
|
"learning_rate": 3.159175806468126e-06, |
|
"logits/chosen": -1.4092731475830078, |
|
"logits/rejected": -1.2594877481460571, |
|
"logps/chosen": -483.1397399902344, |
|
"logps/rejected": -564.5753173828125, |
|
"loss": 0.4771, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.345776081085205, |
|
"rewards/margins": 1.1159356832504272, |
|
"rewards/rejected": -3.4617114067077637, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4763151007589636, |
|
"grad_norm": 13.595392227172852, |
|
"learning_rate": 3.1371123749269804e-06, |
|
"logits/chosen": -1.4161027669906616, |
|
"logits/rejected": -1.3486690521240234, |
|
"logps/chosen": -579.3997192382812, |
|
"logps/rejected": -658.3812255859375, |
|
"loss": 0.588, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.961474895477295, |
|
"rewards/margins": 0.9501525163650513, |
|
"rewards/rejected": -3.9116275310516357, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4789322166971997, |
|
"grad_norm": 16.07357406616211, |
|
"learning_rate": 3.114995744685877e-06, |
|
"logits/chosen": -1.3573070764541626, |
|
"logits/rejected": -1.3251278400421143, |
|
"logps/chosen": -537.996826171875, |
|
"logps/rejected": -629.4193115234375, |
|
"loss": 0.5297, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.90761399269104, |
|
"rewards/margins": 1.0439841747283936, |
|
"rewards/rejected": -3.9515984058380127, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 6.550096035003662, |
|
"learning_rate": 3.0928277624770743e-06, |
|
"logits/chosen": -1.5519769191741943, |
|
"logits/rejected": -1.4119082689285278, |
|
"logps/chosen": -502.74102783203125, |
|
"logps/rejected": -586.8773193359375, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.1812124252319336, |
|
"rewards/margins": 1.1252543926239014, |
|
"rewards/rejected": -3.306466579437256, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.4841664485736718, |
|
"grad_norm": 7.670591354370117, |
|
"learning_rate": 3.070610279320708e-06, |
|
"logits/chosen": -1.464906930923462, |
|
"logits/rejected": -1.32064950466156, |
|
"logps/chosen": -504.308349609375, |
|
"logps/rejected": -599.1265869140625, |
|
"loss": 0.4443, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.1736900806427, |
|
"rewards/margins": 1.1892703771591187, |
|
"rewards/rejected": -3.3629603385925293, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.48678356451190785, |
|
"grad_norm": 9.109015464782715, |
|
"learning_rate": 3.0483451503702264e-06, |
|
"logits/chosen": -1.249629020690918, |
|
"logits/rejected": -1.168210744857788, |
|
"logps/chosen": -578.9190063476562, |
|
"logps/rejected": -669.9890747070312, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.928832530975342, |
|
"rewards/margins": 1.1118719577789307, |
|
"rewards/rejected": -4.040704250335693, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.48940068045014395, |
|
"grad_norm": 10.058290481567383, |
|
"learning_rate": 3.0260342347574916e-06, |
|
"logits/chosen": -1.2091599702835083, |
|
"logits/rejected": -1.0555975437164307, |
|
"logps/chosen": -534.0882568359375, |
|
"logps/rejected": -660.2562255859375, |
|
"loss": 0.4437, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.618621826171875, |
|
"rewards/margins": 1.4526309967041016, |
|
"rewards/rejected": -4.071252822875977, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 10.198968887329102, |
|
"learning_rate": 3.0036793954375358e-06, |
|
"logits/chosen": -1.303740382194519, |
|
"logits/rejected": -1.155242681503296, |
|
"logps/chosen": -507.42529296875, |
|
"logps/rejected": -583.2550048828125, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.374905824661255, |
|
"rewards/margins": 1.1803052425384521, |
|
"rewards/rejected": -3.555210828781128, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49463491232661605, |
|
"grad_norm": 10.858015060424805, |
|
"learning_rate": 2.981282499033009e-06, |
|
"logits/chosen": -1.3813010454177856, |
|
"logits/rejected": -1.2690826654434204, |
|
"logps/chosen": -486.0970153808594, |
|
"logps/rejected": -569.85888671875, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.0988192558288574, |
|
"rewards/margins": 1.0918813943862915, |
|
"rewards/rejected": -3.1907010078430176, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"grad_norm": 10.221612930297852, |
|
"learning_rate": 2.9588454156783163e-06, |
|
"logits/chosen": -1.4239078760147095, |
|
"logits/rejected": -1.2654526233673096, |
|
"logps/chosen": -474.64825439453125, |
|
"logps/rejected": -587.3582763671875, |
|
"loss": 0.4447, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.9338430166244507, |
|
"rewards/margins": 1.3446595668792725, |
|
"rewards/rejected": -3.2785022258758545, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"eval_logits/chosen": -1.2950754165649414, |
|
"eval_logits/rejected": -1.1710810661315918, |
|
"eval_logps/chosen": -468.10113525390625, |
|
"eval_logps/rejected": -559.6229248046875, |
|
"eval_loss": 0.49835336208343506, |
|
"eval_rewards/accuracies": 0.7419999837875366, |
|
"eval_rewards/chosen": -2.0349912643432617, |
|
"eval_rewards/margins": 1.1155344247817993, |
|
"eval_rewards/rejected": -3.1505255699157715, |
|
"eval_runtime": 1598.3655, |
|
"eval_samples_per_second": 1.251, |
|
"eval_steps_per_second": 0.156, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4998691442030882, |
|
"grad_norm": 12.752076148986816, |
|
"learning_rate": 2.9363700188634597e-06, |
|
"logits/chosen": -1.3872668743133545, |
|
"logits/rejected": -1.265047311782837, |
|
"logps/chosen": -489.17291259765625, |
|
"logps/rejected": -553.0140991210938, |
|
"loss": 0.5231, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.242345094680786, |
|
"rewards/margins": 1.008666753768921, |
|
"rewards/rejected": -3.251011610031128, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 10.04758071899414, |
|
"learning_rate": 2.9138581852776053e-06, |
|
"logits/chosen": -1.368890404701233, |
|
"logits/rejected": -1.2567319869995117, |
|
"logps/chosen": -493.54638671875, |
|
"logps/rejected": -586.2965087890625, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.339406728744507, |
|
"rewards/margins": 1.0563156604766846, |
|
"rewards/rejected": -3.3957226276397705, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5051033760795604, |
|
"grad_norm": 9.104912757873535, |
|
"learning_rate": 2.8913117946523805e-06, |
|
"logits/chosen": -1.4171912670135498, |
|
"logits/rejected": -1.2455161809921265, |
|
"logps/chosen": -490.3606872558594, |
|
"logps/rejected": -558.6239013671875, |
|
"loss": 0.4792, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.2399258613586426, |
|
"rewards/margins": 1.0637315511703491, |
|
"rewards/rejected": -3.3036580085754395, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5077204920177963, |
|
"grad_norm": 11.299101829528809, |
|
"learning_rate": 2.8687327296049126e-06, |
|
"logits/chosen": -1.4033154249191284, |
|
"logits/rejected": -1.307733178138733, |
|
"logps/chosen": -491.1643981933594, |
|
"logps/rejected": -589.1052856445312, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.32118558883667, |
|
"rewards/margins": 1.0772285461425781, |
|
"rewards/rejected": -3.398413896560669, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5103376079560324, |
|
"grad_norm": 18.843233108520508, |
|
"learning_rate": 2.8461228754806376e-06, |
|
"logits/chosen": -1.4243779182434082, |
|
"logits/rejected": -1.2675760984420776, |
|
"logps/chosen": -507.2261657714844, |
|
"logps/rejected": -582.9666748046875, |
|
"loss": 0.5307, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.3146495819091797, |
|
"rewards/margins": 1.0160276889801025, |
|
"rewards/rejected": -3.330677032470703, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 9.132381439208984, |
|
"learning_rate": 2.823484120195865e-06, |
|
"logits/chosen": -1.4892734289169312, |
|
"logits/rejected": -1.310719609260559, |
|
"logps/chosen": -494.809814453125, |
|
"logps/rejected": -583.18798828125, |
|
"loss": 0.4256, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.1708030700683594, |
|
"rewards/margins": 1.2277300357818604, |
|
"rewards/rejected": -3.398533582687378, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.5155718398325045, |
|
"grad_norm": 13.682202339172363, |
|
"learning_rate": 2.8008183540801486e-06, |
|
"logits/chosen": -1.2465951442718506, |
|
"logits/rejected": -1.113993525505066, |
|
"logps/chosen": -583.8721313476562, |
|
"logps/rejected": -649.005126953125, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -3.103384494781494, |
|
"rewards/margins": 1.1054545640945435, |
|
"rewards/rejected": -4.208839416503906, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.5181889557707406, |
|
"grad_norm": 13.151285171508789, |
|
"learning_rate": 2.7781274697184353e-06, |
|
"logits/chosen": -1.1965068578720093, |
|
"logits/rejected": -1.2104531526565552, |
|
"logps/chosen": -526.9979248046875, |
|
"logps/rejected": -667.6734619140625, |
|
"loss": 0.5585, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.9773271083831787, |
|
"rewards/margins": 1.2037287950515747, |
|
"rewards/rejected": -4.181056022644043, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5208060717089767, |
|
"grad_norm": 7.820137023925781, |
|
"learning_rate": 2.7554133617930397e-06, |
|
"logits/chosen": -1.4265800714492798, |
|
"logits/rejected": -1.3070918321609497, |
|
"logps/chosen": -457.6055603027344, |
|
"logps/rejected": -541.1328735351562, |
|
"loss": 0.5257, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.098328113555908, |
|
"rewards/margins": 0.9863009452819824, |
|
"rewards/rejected": -3.0846290588378906, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 12.656340599060059, |
|
"learning_rate": 2.7326779269254363e-06, |
|
"logits/chosen": -1.6775627136230469, |
|
"logits/rejected": -1.5273171663284302, |
|
"logps/chosen": -465.67138671875, |
|
"logps/rejected": -513.0640869140625, |
|
"loss": 0.4561, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7640680074691772, |
|
"rewards/margins": 1.0338157415390015, |
|
"rewards/rejected": -2.797883987426758, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"eval_logits/chosen": -1.4695755243301392, |
|
"eval_logits/rejected": -1.3557151556015015, |
|
"eval_logps/chosen": -461.283935546875, |
|
"eval_logps/rejected": -540.4462280273438, |
|
"eval_loss": 0.4929336607456207, |
|
"eval_rewards/accuracies": 0.7419999837875366, |
|
"eval_rewards/chosen": -1.9668195247650146, |
|
"eval_rewards/margins": 0.9919391870498657, |
|
"eval_rewards/rejected": -2.95875883102417, |
|
"eval_runtime": 1598.5788, |
|
"eval_samples_per_second": 1.251, |
|
"eval_steps_per_second": 0.156, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5260403035854488, |
|
"grad_norm": 10.136625289916992, |
|
"learning_rate": 2.7099230635178954e-06, |
|
"logits/chosen": -1.506194829940796, |
|
"logits/rejected": -1.4691191911697388, |
|
"logps/chosen": -475.35992431640625, |
|
"logps/rejected": -562.2814331054688, |
|
"loss": 0.5025, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.106142520904541, |
|
"rewards/margins": 0.9343917965888977, |
|
"rewards/rejected": -3.040534257888794, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.528657419523685, |
|
"grad_norm": 10.457864761352539, |
|
"learning_rate": 2.6871506715949608e-06, |
|
"logits/chosen": -1.5337005853652954, |
|
"logits/rejected": -1.3907279968261719, |
|
"logps/chosen": -485.59637451171875, |
|
"logps/rejected": -580.8443603515625, |
|
"loss": 0.4664, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.3269832134246826, |
|
"rewards/margins": 1.1330835819244385, |
|
"rewards/rejected": -3.4600670337677, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5312745354619209, |
|
"grad_norm": 7.218365669250488, |
|
"learning_rate": 2.6643626526448063e-06, |
|
"logits/chosen": -1.4807672500610352, |
|
"logits/rejected": -1.308292031288147, |
|
"logps/chosen": -573.4922485351562, |
|
"logps/rejected": -676.8171997070312, |
|
"loss": 0.4288, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.787353038787842, |
|
"rewards/margins": 1.456699252128601, |
|
"rewards/rejected": -4.244051933288574, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 14.370759010314941, |
|
"learning_rate": 2.6415609094604562e-06, |
|
"logits/chosen": -1.194697380065918, |
|
"logits/rejected": -1.1315624713897705, |
|
"logps/chosen": -666.68896484375, |
|
"logps/rejected": -775.71240234375, |
|
"loss": 0.5124, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.9625601768493652, |
|
"rewards/margins": 1.2808544635772705, |
|
"rewards/rejected": -5.243414878845215, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5365087673383931, |
|
"grad_norm": 12.651047706604004, |
|
"learning_rate": 2.618747345980904e-06, |
|
"logits/chosen": -1.1674007177352905, |
|
"logits/rejected": -0.9596608877182007, |
|
"logps/chosen": -729.123291015625, |
|
"logps/rejected": -793.8466796875, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -4.807937145233154, |
|
"rewards/margins": 1.1525070667266846, |
|
"rewards/rejected": -5.960444450378418, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5391258832766291, |
|
"grad_norm": 6.089327335357666, |
|
"learning_rate": 2.595923867132136e-06, |
|
"logits/chosen": -1.2956593036651611, |
|
"logits/rejected": -1.1621012687683105, |
|
"logps/chosen": -649.8438720703125, |
|
"logps/rejected": -758.2276000976562, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.689380645751953, |
|
"rewards/margins": 1.3314837217330933, |
|
"rewards/rejected": -5.020864009857178, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5417429992148652, |
|
"grad_norm": 7.40191650390625, |
|
"learning_rate": 2.5730923786680672e-06, |
|
"logits/chosen": -1.2692714929580688, |
|
"logits/rejected": -1.248396635055542, |
|
"logps/chosen": -549.1704711914062, |
|
"logps/rejected": -643.1365966796875, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.976423740386963, |
|
"rewards/margins": 0.864033579826355, |
|
"rewards/rejected": -3.8404572010040283, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 9.540526390075684, |
|
"learning_rate": 2.5502547870114137e-06, |
|
"logits/chosen": -1.3864257335662842, |
|
"logits/rejected": -1.2374814748764038, |
|
"logps/chosen": -515.8956909179688, |
|
"logps/rejected": -582.565185546875, |
|
"loss": 0.5054, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.6055984497070312, |
|
"rewards/margins": 0.9696043729782104, |
|
"rewards/rejected": -3.5752029418945312, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5469772310913373, |
|
"grad_norm": 11.426345825195312, |
|
"learning_rate": 2.527412999094507e-06, |
|
"logits/chosen": -1.3803369998931885, |
|
"logits/rejected": -1.2163236141204834, |
|
"logps/chosen": -550.672607421875, |
|
"logps/rejected": -663.0771484375, |
|
"loss": 0.4469, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.5704407691955566, |
|
"rewards/margins": 1.2492586374282837, |
|
"rewards/rejected": -3.81969952583313, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"grad_norm": 10.400588035583496, |
|
"learning_rate": 2.504568922200064e-06, |
|
"logits/chosen": -1.3159221410751343, |
|
"logits/rejected": -1.1753368377685547, |
|
"logps/chosen": -523.4752197265625, |
|
"logps/rejected": -638.3160400390625, |
|
"loss": 0.5068, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.8226757049560547, |
|
"rewards/margins": 1.3093383312225342, |
|
"rewards/rejected": -4.132014274597168, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"eval_logits/chosen": -1.2426347732543945, |
|
"eval_logits/rejected": -1.1150033473968506, |
|
"eval_logps/chosen": -579.1231079101562, |
|
"eval_logps/rejected": -680.8953857421875, |
|
"eval_loss": 0.49687275290489197, |
|
"eval_rewards/accuracies": 0.7350000143051147, |
|
"eval_rewards/chosen": -3.145211935043335, |
|
"eval_rewards/margins": 1.2180382013320923, |
|
"eval_rewards/rejected": -4.363250255584717, |
|
"eval_runtime": 1598.2336, |
|
"eval_samples_per_second": 1.251, |
|
"eval_steps_per_second": 0.156, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5522114629678094, |
|
"grad_norm": 13.339751243591309, |
|
"learning_rate": 2.4817244638019333e-06, |
|
"logits/chosen": -1.3830143213272095, |
|
"logits/rejected": -1.2277696132659912, |
|
"logps/chosen": -579.9473876953125, |
|
"logps/rejected": -659.97412109375, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.041943073272705, |
|
"rewards/margins": 1.2124286890029907, |
|
"rewards/rejected": -4.254371643066406, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 10.70681095123291, |
|
"learning_rate": 2.4588815314058155e-06, |
|
"logits/chosen": -1.3805171251296997, |
|
"logits/rejected": -1.3095520734786987, |
|
"logps/chosen": -527.185302734375, |
|
"logps/rejected": -602.2315673828125, |
|
"loss": 0.4666, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.8171143531799316, |
|
"rewards/margins": 1.0927484035491943, |
|
"rewards/rejected": -3.909862518310547, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5574456948442816, |
|
"grad_norm": 13.302124977111816, |
|
"learning_rate": 2.4360420323899922e-06, |
|
"logits/chosen": -1.4553115367889404, |
|
"logits/rejected": -1.3466088771820068, |
|
"logps/chosen": -556.26611328125, |
|
"logps/rejected": -608.5457763671875, |
|
"loss": 0.5919, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.8050436973571777, |
|
"rewards/margins": 0.8486245274543762, |
|
"rewards/rejected": -3.653668165206909, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5600628107825176, |
|
"grad_norm": 9.734771728515625, |
|
"learning_rate": 2.4132078738460585e-06, |
|
"logits/chosen": -1.5308691263198853, |
|
"logits/rejected": -1.3970110416412354, |
|
"logps/chosen": -516.7899169921875, |
|
"logps/rejected": -586.8917236328125, |
|
"loss": 0.4794, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.5241827964782715, |
|
"rewards/margins": 1.0863929986953735, |
|
"rewards/rejected": -3.6105754375457764, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5626799267207537, |
|
"grad_norm": 16.153608322143555, |
|
"learning_rate": 2.3903809624196826e-06, |
|
"logits/chosen": -1.4662330150604248, |
|
"logits/rejected": -1.3525559902191162, |
|
"logps/chosen": -481.68035888671875, |
|
"logps/rejected": -539.03515625, |
|
"loss": 0.5497, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.4799540042877197, |
|
"rewards/margins": 0.8992452621459961, |
|
"rewards/rejected": -3.379199266433716, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 13.256193161010742, |
|
"learning_rate": 2.3675632041513978e-06, |
|
"logits/chosen": -1.5737719535827637, |
|
"logits/rejected": -1.3470098972320557, |
|
"logps/chosen": -527.2965698242188, |
|
"logps/rejected": -575.4271240234375, |
|
"loss": 0.4882, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.403984546661377, |
|
"rewards/margins": 1.1281957626342773, |
|
"rewards/rejected": -3.532180070877075, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5679141585972258, |
|
"grad_norm": 13.135820388793945, |
|
"learning_rate": 2.3447565043174533e-06, |
|
"logits/chosen": -1.4236756563186646, |
|
"logits/rejected": -1.263068437576294, |
|
"logps/chosen": -507.3004455566406, |
|
"logps/rejected": -562.6768798828125, |
|
"loss": 0.5195, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.490262746810913, |
|
"rewards/margins": 0.9420592188835144, |
|
"rewards/rejected": -3.4323222637176514, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5705312745354619, |
|
"grad_norm": 11.776784896850586, |
|
"learning_rate": 2.321962767270724e-06, |
|
"logits/chosen": -1.4430488348007202, |
|
"logits/rejected": -1.3176742792129517, |
|
"logps/chosen": -504.53009033203125, |
|
"logps/rejected": -551.927490234375, |
|
"loss": 0.5579, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.546835422515869, |
|
"rewards/margins": 0.8456700444221497, |
|
"rewards/rejected": -3.392504930496216, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.573148390473698, |
|
"grad_norm": 8.283063888549805, |
|
"learning_rate": 2.299183896281692e-06, |
|
"logits/chosen": -1.4085118770599365, |
|
"logits/rejected": -1.3014566898345947, |
|
"logps/chosen": -492.5560607910156, |
|
"logps/rejected": -586.7115478515625, |
|
"loss": 0.5087, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.376844882965088, |
|
"rewards/margins": 0.9645326733589172, |
|
"rewards/rejected": -3.3413777351379395, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 6.720417022705078, |
|
"learning_rate": 2.2764217933795297e-06, |
|
"logits/chosen": -1.529714822769165, |
|
"logits/rejected": -1.415728211402893, |
|
"logps/chosen": -480.97955322265625, |
|
"logps/rejected": -568.6060791015625, |
|
"loss": 0.4839, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.196598529815674, |
|
"rewards/margins": 1.081301212310791, |
|
"rewards/rejected": -3.2778995037078857, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"eval_logits/chosen": -1.3886340856552124, |
|
"eval_logits/rejected": -1.2705532312393188, |
|
"eval_logps/chosen": -502.5681457519531, |
|
"eval_logps/rejected": -588.3314819335938, |
|
"eval_loss": 0.49266016483306885, |
|
"eval_rewards/accuracies": 0.7404999732971191, |
|
"eval_rewards/chosen": -2.379661798477173, |
|
"eval_rewards/margins": 1.0579497814178467, |
|
"eval_rewards/rejected": -3.4376115798950195, |
|
"eval_runtime": 1597.9898, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.156, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5783826223501701, |
|
"grad_norm": 5.171483039855957, |
|
"learning_rate": 2.2536783591932786e-06, |
|
"logits/chosen": -1.5503586530685425, |
|
"logits/rejected": -1.394595742225647, |
|
"logps/chosen": -533.165771484375, |
|
"logps/rejected": -615.0496826171875, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.578037738800049, |
|
"rewards/margins": 1.0000585317611694, |
|
"rewards/rejected": -3.5780959129333496, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5809997382884062, |
|
"grad_norm": 8.2252836227417, |
|
"learning_rate": 2.230955492793149e-06, |
|
"logits/chosen": -1.3457584381103516, |
|
"logits/rejected": -1.2957208156585693, |
|
"logps/chosen": -549.8411865234375, |
|
"logps/rejected": -641.9370727539062, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.760831356048584, |
|
"rewards/margins": 1.0501973628997803, |
|
"rewards/rejected": -3.811028242111206, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5836168542266422, |
|
"grad_norm": 8.77107048034668, |
|
"learning_rate": 2.208255091531947e-06, |
|
"logits/chosen": -1.3714348077774048, |
|
"logits/rejected": -1.294524908065796, |
|
"logps/chosen": -516.0252685546875, |
|
"logps/rejected": -604.4722900390625, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.39119291305542, |
|
"rewards/margins": 1.2340974807739258, |
|
"rewards/rejected": -3.6252903938293457, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 13.218793869018555, |
|
"learning_rate": 2.1855790508866435e-06, |
|
"logits/chosen": -1.4534399509429932, |
|
"logits/rejected": -1.355835199356079, |
|
"logps/chosen": -526.3766479492188, |
|
"logps/rejected": -626.1098022460938, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.2947020530700684, |
|
"rewards/margins": 1.1731860637664795, |
|
"rewards/rejected": -3.467888355255127, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5888510861031143, |
|
"grad_norm": 7.438356876373291, |
|
"learning_rate": 2.162929264300107e-06, |
|
"logits/chosen": -1.4775898456573486, |
|
"logits/rejected": -1.375610113143921, |
|
"logps/chosen": -440.20208740234375, |
|
"logps/rejected": -556.2523193359375, |
|
"loss": 0.3962, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.712977647781372, |
|
"rewards/margins": 1.3840254545211792, |
|
"rewards/rejected": -3.0970029830932617, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5914682020413504, |
|
"grad_norm": 13.876564025878906, |
|
"learning_rate": 2.1403076230230006e-06, |
|
"logits/chosen": -1.4080345630645752, |
|
"logits/rejected": -1.2920736074447632, |
|
"logps/chosen": -475.05548095703125, |
|
"logps/rejected": -532.099365234375, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.039393901824951, |
|
"rewards/margins": 0.848638653755188, |
|
"rewards/rejected": -2.8880326747894287, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5940853179795865, |
|
"grad_norm": 10.589466094970703, |
|
"learning_rate": 2.11771601595586e-06, |
|
"logits/chosen": -1.447265386581421, |
|
"logits/rejected": -1.3221790790557861, |
|
"logps/chosen": -478.97149658203125, |
|
"logps/rejected": -525.7181396484375, |
|
"loss": 0.5126, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9702937602996826, |
|
"rewards/margins": 0.9898223876953125, |
|
"rewards/rejected": -2.960115909576416, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 13.875945091247559, |
|
"learning_rate": 2.0951563294913737e-06, |
|
"logits/chosen": -1.3917208909988403, |
|
"logits/rejected": -1.2089694738388062, |
|
"logps/chosen": -480.66595458984375, |
|
"logps/rejected": -556.7701416015625, |
|
"loss": 0.462, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.19204044342041, |
|
"rewards/margins": 1.0662552118301392, |
|
"rewards/rejected": -3.2582955360412598, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5993195498560586, |
|
"grad_norm": 9.446721076965332, |
|
"learning_rate": 2.0726304473568693e-06, |
|
"logits/chosen": -1.3553434610366821, |
|
"logits/rejected": -1.238797903060913, |
|
"logps/chosen": -493.4701232910156, |
|
"logps/rejected": -565.8324584960938, |
|
"loss": 0.4824, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.3467798233032227, |
|
"rewards/margins": 1.046331524848938, |
|
"rewards/rejected": -3.393110990524292, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"grad_norm": 9.440372467041016, |
|
"learning_rate": 2.050140250457023e-06, |
|
"logits/chosen": -1.4484026432037354, |
|
"logits/rejected": -1.2413018941879272, |
|
"logps/chosen": -548.2637939453125, |
|
"logps/rejected": -634.7371826171875, |
|
"loss": 0.4729, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.7083754539489746, |
|
"rewards/margins": 1.2052128314971924, |
|
"rewards/rejected": -3.913588285446167, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"eval_logits/chosen": -1.2145209312438965, |
|
"eval_logits/rejected": -1.0868196487426758, |
|
"eval_logps/chosen": -549.21240234375, |
|
"eval_logps/rejected": -656.666748046875, |
|
"eval_loss": 0.49239280819892883, |
|
"eval_rewards/accuracies": 0.7404999732971191, |
|
"eval_rewards/chosen": -2.846104383468628, |
|
"eval_rewards/margins": 1.2748597860336304, |
|
"eval_rewards/rejected": -4.120964050292969, |
|
"eval_runtime": 1598.0392, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.156, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6045537817325307, |
|
"grad_norm": 15.882525444030762, |
|
"learning_rate": 2.0276876167168042e-06, |
|
"logits/chosen": -1.1735626459121704, |
|
"logits/rejected": -1.07330322265625, |
|
"logps/chosen": -511.028564453125, |
|
"logps/rejected": -598.4305419921875, |
|
"loss": 0.5852, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.8660171031951904, |
|
"rewards/margins": 1.2002947330474854, |
|
"rewards/rejected": -4.066311836242676, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 8.326827049255371, |
|
"learning_rate": 2.0052744209246682e-06, |
|
"logits/chosen": -1.3162415027618408, |
|
"logits/rejected": -1.1979725360870361, |
|
"logps/chosen": -522.6014404296875, |
|
"logps/rejected": -590.3125610351562, |
|
"loss": 0.5189, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.673841714859009, |
|
"rewards/margins": 1.005782127380371, |
|
"rewards/rejected": -3.6796233654022217, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.6097880136090029, |
|
"grad_norm": 14.694316864013672, |
|
"learning_rate": 1.9829025345760127e-06, |
|
"logits/chosen": -1.321624994277954, |
|
"logits/rejected": -1.2669956684112549, |
|
"logps/chosen": -527.9610595703125, |
|
"logps/rejected": -604.3615112304688, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.490408420562744, |
|
"rewards/margins": 0.8247787356376648, |
|
"rewards/rejected": -3.315187454223633, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.6124051295472389, |
|
"grad_norm": 10.311464309692383, |
|
"learning_rate": 1.9605738257169115e-06, |
|
"logits/chosen": -1.270620346069336, |
|
"logits/rejected": -1.120276927947998, |
|
"logps/chosen": -485.13226318359375, |
|
"logps/rejected": -588.9454956054688, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.5224430561065674, |
|
"rewards/margins": 1.1178371906280518, |
|
"rewards/rejected": -3.6402804851531982, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.615022245485475, |
|
"grad_norm": 10.49399471282959, |
|
"learning_rate": 1.9382901587881275e-06, |
|
"logits/chosen": -1.304638147354126, |
|
"logits/rejected": -1.1775546073913574, |
|
"logps/chosen": -500.91339111328125, |
|
"logps/rejected": -595.765625, |
|
"loss": 0.4158, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.42271089553833, |
|
"rewards/margins": 1.271071195602417, |
|
"rewards/rejected": -3.693782329559326, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 10.7622652053833, |
|
"learning_rate": 1.916053394469437e-06, |
|
"logits/chosen": -1.3254437446594238, |
|
"logits/rejected": -1.1220028400421143, |
|
"logps/chosen": -544.5164184570312, |
|
"logps/rejected": -650.4244384765625, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.812866687774658, |
|
"rewards/margins": 1.2050769329071045, |
|
"rewards/rejected": -4.017943382263184, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.6202564773619471, |
|
"grad_norm": 9.485381126403809, |
|
"learning_rate": 1.8938653895242604e-06, |
|
"logits/chosen": -1.2691552639007568, |
|
"logits/rejected": -1.0826399326324463, |
|
"logps/chosen": -547.7079467773438, |
|
"logps/rejected": -651.6534423828125, |
|
"loss": 0.4229, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.8349509239196777, |
|
"rewards/margins": 1.3276488780975342, |
|
"rewards/rejected": -4.162599563598633, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.6228735933001832, |
|
"grad_norm": 13.950716018676758, |
|
"learning_rate": 1.8717279966446267e-06, |
|
"logits/chosen": -1.116172432899475, |
|
"logits/rejected": -1.0301882028579712, |
|
"logps/chosen": -559.43896484375, |
|
"logps/rejected": -674.9713134765625, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.109766721725464, |
|
"rewards/margins": 1.2503349781036377, |
|
"rewards/rejected": -4.360101699829102, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.6254907092384192, |
|
"grad_norm": 6.874395370483398, |
|
"learning_rate": 1.8496430642964698e-06, |
|
"logits/chosen": -1.1902521848678589, |
|
"logits/rejected": -1.0676952600479126, |
|
"logps/chosen": -581.3973999023438, |
|
"logps/rejected": -685.8600463867188, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.091068983078003, |
|
"rewards/margins": 1.280792474746704, |
|
"rewards/rejected": -4.371861457824707, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 8.19883918762207, |
|
"learning_rate": 1.827612436565286e-06, |
|
"logits/chosen": -1.2159771919250488, |
|
"logits/rejected": -1.0594358444213867, |
|
"logps/chosen": -552.8912353515625, |
|
"logps/rejected": -668.69921875, |
|
"loss": 0.4501, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.900498390197754, |
|
"rewards/margins": 1.363520622253418, |
|
"rewards/rejected": -4.264018535614014, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": -1.125669240951538, |
|
"eval_logits/rejected": -0.9978408813476562, |
|
"eval_logps/chosen": -562.0332641601562, |
|
"eval_logps/rejected": -668.2345581054688, |
|
"eval_loss": 0.489955335855484, |
|
"eval_rewards/accuracies": 0.7429999709129333, |
|
"eval_rewards/chosen": -2.974313735961914, |
|
"eval_rewards/margins": 1.2623279094696045, |
|
"eval_rewards/rejected": -4.236640930175781, |
|
"eval_runtime": 1597.7712, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.156, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6307249411148914, |
|
"grad_norm": 17.752099990844727, |
|
"learning_rate": 1.8056379530021492e-06, |
|
"logits/chosen": -1.2881437540054321, |
|
"logits/rejected": -1.1982684135437012, |
|
"logps/chosen": -541.344482421875, |
|
"logps/rejected": -619.2030029296875, |
|
"loss": 0.5127, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.9722681045532227, |
|
"rewards/margins": 1.0265535116195679, |
|
"rewards/rejected": -3.998821258544922, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.6333420570531274, |
|
"grad_norm": 8.487099647521973, |
|
"learning_rate": 1.7837214484701154e-06, |
|
"logits/chosen": -1.339787244796753, |
|
"logits/rejected": -1.2054253816604614, |
|
"logps/chosen": -510.6474609375, |
|
"logps/rejected": -616.0956420898438, |
|
"loss": 0.4786, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.5842533111572266, |
|
"rewards/margins": 1.2878291606903076, |
|
"rewards/rejected": -3.872082233428955, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6359591729913635, |
|
"grad_norm": 14.060596466064453, |
|
"learning_rate": 1.7618647529910043e-06, |
|
"logits/chosen": -1.3402836322784424, |
|
"logits/rejected": -1.2104113101959229, |
|
"logps/chosen": -508.7401428222656, |
|
"logps/rejected": -625.2578735351562, |
|
"loss": 0.4786, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.520415782928467, |
|
"rewards/margins": 1.2837390899658203, |
|
"rewards/rejected": -3.804154872894287, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 9.59952449798584, |
|
"learning_rate": 1.7400696915925996e-06, |
|
"logits/chosen": -1.3208459615707397, |
|
"logits/rejected": -1.141103982925415, |
|
"logps/chosen": -542.9054565429688, |
|
"logps/rejected": -597.6497802734375, |
|
"loss": 0.5332, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.7234339714050293, |
|
"rewards/margins": 1.1302305459976196, |
|
"rewards/rejected": -3.853663921356201, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6411934048678356, |
|
"grad_norm": 12.0162992477417, |
|
"learning_rate": 1.718338084156254e-06, |
|
"logits/chosen": -1.309754490852356, |
|
"logits/rejected": -1.1585099697113037, |
|
"logps/chosen": -527.2728881835938, |
|
"logps/rejected": -612.1050415039062, |
|
"loss": 0.4561, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.4056506156921387, |
|
"rewards/margins": 1.2328300476074219, |
|
"rewards/rejected": -3.6384806632995605, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6438105208060717, |
|
"grad_norm": 15.278785705566406, |
|
"learning_rate": 1.6966717452649372e-06, |
|
"logits/chosen": -1.446975588798523, |
|
"logits/rejected": -1.282833456993103, |
|
"logps/chosen": -502.6387634277344, |
|
"logps/rejected": -571.8133544921875, |
|
"loss": 0.4497, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2578883171081543, |
|
"rewards/margins": 1.2107127904891968, |
|
"rewards/rejected": -3.4686012268066406, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.6464276367443078, |
|
"grad_norm": 8.051189422607422, |
|
"learning_rate": 1.6750724840517103e-06, |
|
"logits/chosen": -1.3973640203475952, |
|
"logits/rejected": -1.3273109197616577, |
|
"logps/chosen": -461.38665771484375, |
|
"logps/rejected": -567.72509765625, |
|
"loss": 0.5096, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.086717128753662, |
|
"rewards/margins": 1.0224206447601318, |
|
"rewards/rejected": -3.109138011932373, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 11.723711967468262, |
|
"learning_rate": 1.6535421040486686e-06, |
|
"logits/chosen": -1.2049771547317505, |
|
"logits/rejected": -1.1100887060165405, |
|
"logps/chosen": -472.3949279785156, |
|
"logps/rejected": -566.0884399414062, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.191418409347534, |
|
"rewards/margins": 1.2799413204193115, |
|
"rewards/rejected": -3.4713597297668457, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.6516618686207799, |
|
"grad_norm": 18.185462951660156, |
|
"learning_rate": 1.6320824030363458e-06, |
|
"logits/chosen": -1.2619669437408447, |
|
"logits/rejected": -1.2079191207885742, |
|
"logps/chosen": -466.91845703125, |
|
"logps/rejected": -579.8604125976562, |
|
"loss": 0.4609, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.3208956718444824, |
|
"rewards/margins": 1.2844674587249756, |
|
"rewards/rejected": -3.605363130569458, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"grad_norm": 14.226390838623047, |
|
"learning_rate": 1.6106951728936028e-06, |
|
"logits/chosen": -1.3449714183807373, |
|
"logits/rejected": -1.212172508239746, |
|
"logps/chosen": -500.9017028808594, |
|
"logps/rejected": -603.9783325195312, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.4372124671936035, |
|
"rewards/margins": 1.1106388568878174, |
|
"rewards/rejected": -3.547851085662842, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"eval_logits/chosen": -1.1862049102783203, |
|
"eval_logits/rejected": -1.0531891584396362, |
|
"eval_logps/chosen": -510.45111083984375, |
|
"eval_logps/rejected": -612.1486206054688, |
|
"eval_loss": 0.4872073829174042, |
|
"eval_rewards/accuracies": 0.7419999837875366, |
|
"eval_rewards/chosen": -2.4584920406341553, |
|
"eval_rewards/margins": 1.2172898054122925, |
|
"eval_rewards/rejected": -3.675781488418579, |
|
"eval_runtime": 1597.3321, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.157, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.656896100497252, |
|
"grad_norm": 10.693954467773438, |
|
"learning_rate": 1.5893821994479996e-06, |
|
"logits/chosen": -1.3317979574203491, |
|
"logits/rejected": -1.2107937335968018, |
|
"logps/chosen": -518.4515380859375, |
|
"logps/rejected": -599.1994018554688, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.410822629928589, |
|
"rewards/margins": 1.1930694580078125, |
|
"rewards/rejected": -3.6038920879364014, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 10.660543441772461, |
|
"learning_rate": 1.5681452623266868e-06, |
|
"logits/chosen": -1.3140581846237183, |
|
"logits/rejected": -1.059616208076477, |
|
"logps/chosen": -549.7576293945312, |
|
"logps/rejected": -629.7240600585938, |
|
"loss": 0.4838, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.60799503326416, |
|
"rewards/margins": 1.3822691440582275, |
|
"rewards/rejected": -3.9902641773223877, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6621303323737242, |
|
"grad_norm": 9.327082633972168, |
|
"learning_rate": 1.5469861348078014e-06, |
|
"logits/chosen": -1.315731167793274, |
|
"logits/rejected": -1.1592333316802979, |
|
"logps/chosen": -516.8220825195312, |
|
"logps/rejected": -651.5704956054688, |
|
"loss": 0.4178, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.7009530067443848, |
|
"rewards/margins": 1.4539804458618164, |
|
"rewards/rejected": -4.154933929443359, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6647474483119602, |
|
"grad_norm": 14.643211364746094, |
|
"learning_rate": 1.5259065836724035e-06, |
|
"logits/chosen": -1.1933174133300781, |
|
"logits/rejected": -1.119011402130127, |
|
"logps/chosen": -520.9530639648438, |
|
"logps/rejected": -660.1638793945312, |
|
"loss": 0.4109, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.762756824493408, |
|
"rewards/margins": 1.4529297351837158, |
|
"rewards/rejected": -4.215685844421387, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6673645642501963, |
|
"grad_norm": 23.931337356567383, |
|
"learning_rate": 1.5049083690569456e-06, |
|
"logits/chosen": -1.2723230123519897, |
|
"logits/rejected": -1.1774482727050781, |
|
"logps/chosen": -514.2723999023438, |
|
"logps/rejected": -640.9010009765625, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.768399477005005, |
|
"rewards/margins": 1.3103950023651123, |
|
"rewards/rejected": -4.078794002532959, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 10.859006881713867, |
|
"learning_rate": 1.4839932443063057e-06, |
|
"logits/chosen": -1.286709189414978, |
|
"logits/rejected": -1.130676507949829, |
|
"logps/chosen": -553.838134765625, |
|
"logps/rejected": -631.344482421875, |
|
"loss": 0.459, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.619520425796509, |
|
"rewards/margins": 1.3386470079421997, |
|
"rewards/rejected": -3.958167314529419, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6725987961266684, |
|
"grad_norm": 14.106218338012695, |
|
"learning_rate": 1.4631629558273803e-06, |
|
"logits/chosen": -1.3286449909210205, |
|
"logits/rejected": -1.205742597579956, |
|
"logps/chosen": -493.1719665527344, |
|
"logps/rejected": -586.0244140625, |
|
"loss": 0.5866, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.515252113342285, |
|
"rewards/margins": 1.044698715209961, |
|
"rewards/rejected": -3.559950590133667, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6752159120649045, |
|
"grad_norm": 6.197361469268799, |
|
"learning_rate": 1.4424192429432657e-06, |
|
"logits/chosen": -1.4131492376327515, |
|
"logits/rejected": -1.3109676837921143, |
|
"logps/chosen": -469.304931640625, |
|
"logps/rejected": -598.4210815429688, |
|
"loss": 0.4614, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.13527250289917, |
|
"rewards/margins": 1.2581441402435303, |
|
"rewards/rejected": -3.3934166431427, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6778330280031405, |
|
"grad_norm": 13.008953094482422, |
|
"learning_rate": 1.421763837748016e-06, |
|
"logits/chosen": -1.3615756034851074, |
|
"logits/rejected": -1.268027901649475, |
|
"logps/chosen": -481.23358154296875, |
|
"logps/rejected": -605.3997802734375, |
|
"loss": 0.4241, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.3075268268585205, |
|
"rewards/margins": 1.3260997533798218, |
|
"rewards/rejected": -3.6336264610290527, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 10.1073637008667, |
|
"learning_rate": 1.401198464962021e-06, |
|
"logits/chosen": -1.3610389232635498, |
|
"logits/rejected": -1.198563814163208, |
|
"logps/chosen": -530.4500732421875, |
|
"logps/rejected": -605.023681640625, |
|
"loss": 0.4649, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.6202147006988525, |
|
"rewards/margins": 1.1309864521026611, |
|
"rewards/rejected": -3.7512009143829346, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"eval_logits/chosen": -1.2114638090133667, |
|
"eval_logits/rejected": -1.079287052154541, |
|
"eval_logps/chosen": -522.1907958984375, |
|
"eval_logps/rejected": -632.8793334960938, |
|
"eval_loss": 0.48811665177345276, |
|
"eval_rewards/accuracies": 0.7450000047683716, |
|
"eval_rewards/chosen": -2.5758883953094482, |
|
"eval_rewards/margins": 1.307201623916626, |
|
"eval_rewards/rejected": -3.883090019226074, |
|
"eval_runtime": 1597.3459, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.157, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6830672598796127, |
|
"grad_norm": 9.867508888244629, |
|
"learning_rate": 1.3807248417879896e-06, |
|
"logits/chosen": -1.401888132095337, |
|
"logits/rejected": -1.276673436164856, |
|
"logps/chosen": -532.1458740234375, |
|
"logps/rejected": -652.4072265625, |
|
"loss": 0.4363, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.591766357421875, |
|
"rewards/margins": 1.404159426689148, |
|
"rewards/rejected": -3.9959254264831543, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6856843758178487, |
|
"grad_norm": 28.170438766479492, |
|
"learning_rate": 1.3603446777675665e-06, |
|
"logits/chosen": -1.209559440612793, |
|
"logits/rejected": -1.0783087015151978, |
|
"logps/chosen": -564.7611694335938, |
|
"logps/rejected": -670.5452270507812, |
|
"loss": 0.5678, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.0164246559143066, |
|
"rewards/margins": 1.2787444591522217, |
|
"rewards/rejected": -4.295169353485107, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6883014917560848, |
|
"grad_norm": 9.69133472442627, |
|
"learning_rate": 1.3400596746385817e-06, |
|
"logits/chosen": -1.3486610651016235, |
|
"logits/rejected": -1.1891324520111084, |
|
"logps/chosen": -538.7862548828125, |
|
"logps/rejected": -633.6087646484375, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.6963882446289062, |
|
"rewards/margins": 1.2195419073104858, |
|
"rewards/rejected": -3.9159302711486816, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 11.340239524841309, |
|
"learning_rate": 1.3198715261929587e-06, |
|
"logits/chosen": -1.339261531829834, |
|
"logits/rejected": -1.186591386795044, |
|
"logps/chosen": -508.19189453125, |
|
"logps/rejected": -621.9693603515625, |
|
"loss": 0.4294, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.717308759689331, |
|
"rewards/margins": 1.2740724086761475, |
|
"rewards/rejected": -3.9913814067840576, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6935357236325569, |
|
"grad_norm": 8.626651763916016, |
|
"learning_rate": 1.2997819181352823e-06, |
|
"logits/chosen": -1.3360286951065063, |
|
"logits/rejected": -1.1647727489471436, |
|
"logps/chosen": -564.6326904296875, |
|
"logps/rejected": -691.1988525390625, |
|
"loss": 0.4326, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.678508996963501, |
|
"rewards/margins": 1.511307954788208, |
|
"rewards/rejected": -4.189816474914551, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.696152839570793, |
|
"grad_norm": 21.902114868164062, |
|
"learning_rate": 1.2797925279420454e-06, |
|
"logits/chosen": -1.2936923503875732, |
|
"logits/rejected": -1.1569340229034424, |
|
"logps/chosen": -576.9588623046875, |
|
"logps/rejected": -705.35791015625, |
|
"loss": 0.4632, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.0516982078552246, |
|
"rewards/margins": 1.4037137031555176, |
|
"rewards/rejected": -4.4554123878479, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6987699555090291, |
|
"grad_norm": 12.999093055725098, |
|
"learning_rate": 1.2599050247215764e-06, |
|
"logits/chosen": -1.2631757259368896, |
|
"logits/rejected": -1.1437580585479736, |
|
"logps/chosen": -550.2919921875, |
|
"logps/rejected": -659.2994384765625, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.9243297576904297, |
|
"rewards/margins": 1.3196732997894287, |
|
"rewards/rejected": -4.244002819061279, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 15.106201171875, |
|
"learning_rate": 1.2401210690746705e-06, |
|
"logits/chosen": -1.2861721515655518, |
|
"logits/rejected": -1.1344573497772217, |
|
"logps/chosen": -562.5394287109375, |
|
"logps/rejected": -647.7611083984375, |
|
"loss": 0.5272, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.9545741081237793, |
|
"rewards/margins": 1.1845426559448242, |
|
"rewards/rejected": -4.1391167640686035, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7040041873855012, |
|
"grad_norm": 9.087705612182617, |
|
"learning_rate": 1.2204423129559306e-06, |
|
"logits/chosen": -1.3246266841888428, |
|
"logits/rejected": -1.2674678564071655, |
|
"logps/chosen": -540.13037109375, |
|
"logps/rejected": -657.595947265625, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.7469382286071777, |
|
"rewards/margins": 1.2162708044052124, |
|
"rewards/rejected": -3.963209629058838, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"grad_norm": 18.654743194580078, |
|
"learning_rate": 1.20087039953583e-06, |
|
"logits/chosen": -1.3834110498428345, |
|
"logits/rejected": -1.2643133401870728, |
|
"logps/chosen": -513.6646728515625, |
|
"logps/rejected": -604.9307250976562, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.4955801963806152, |
|
"rewards/margins": 1.1764256954193115, |
|
"rewards/rejected": -3.6720058917999268, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"eval_logits/chosen": -1.2295472621917725, |
|
"eval_logits/rejected": -1.1003633737564087, |
|
"eval_logps/chosen": -498.9264831542969, |
|
"eval_logps/rejected": -595.6959228515625, |
|
"eval_loss": 0.4841165840625763, |
|
"eval_rewards/accuracies": 0.7459999918937683, |
|
"eval_rewards/chosen": -2.343245029449463, |
|
"eval_rewards/margins": 1.1680108308792114, |
|
"eval_rewards/rejected": -3.5112557411193848, |
|
"eval_runtime": 1597.0057, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.157, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7092384192619733, |
|
"grad_norm": 19.703975677490234, |
|
"learning_rate": 1.181406963063507e-06, |
|
"logits/chosen": -1.3118432760238647, |
|
"logits/rejected": -1.2396559715270996, |
|
"logps/chosen": -480.10662841796875, |
|
"logps/rejected": -590.1580200195312, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.1435210704803467, |
|
"rewards/margins": 1.1070889234542847, |
|
"rewards/rejected": -3.250609874725342, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 5.974539279937744, |
|
"learning_rate": 1.1620536287303052e-06, |
|
"logits/chosen": -1.4100855588912964, |
|
"logits/rejected": -1.2877388000488281, |
|
"logps/chosen": -500.7962951660156, |
|
"logps/rejected": -562.1715087890625, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.097153902053833, |
|
"rewards/margins": 0.9155322909355164, |
|
"rewards/rejected": -3.012686014175415, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.7144726511384454, |
|
"grad_norm": 9.851229667663574, |
|
"learning_rate": 1.1428120125340717e-06, |
|
"logits/chosen": -1.3525193929672241, |
|
"logits/rejected": -1.19950532913208, |
|
"logps/chosen": -445.181884765625, |
|
"logps/rejected": -556.6793212890625, |
|
"loss": 0.3931, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.911560297012329, |
|
"rewards/margins": 1.5294151306152344, |
|
"rewards/rejected": -3.4409751892089844, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.7170897670766815, |
|
"grad_norm": 9.149617195129395, |
|
"learning_rate": 1.123683721144223e-06, |
|
"logits/chosen": -1.3333719968795776, |
|
"logits/rejected": -1.2258195877075195, |
|
"logps/chosen": -494.1949157714844, |
|
"logps/rejected": -600.24658203125, |
|
"loss": 0.4166, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.106128692626953, |
|
"rewards/margins": 1.385012149810791, |
|
"rewards/rejected": -3.491140842437744, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.7197068830149176, |
|
"grad_norm": 7.582621097564697, |
|
"learning_rate": 1.1046703517675848e-06, |
|
"logits/chosen": -1.3422666788101196, |
|
"logits/rejected": -1.2579666376113892, |
|
"logps/chosen": -470.9822692871094, |
|
"logps/rejected": -587.7799072265625, |
|
"loss": 0.5163, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.159165620803833, |
|
"rewards/margins": 1.106806993484497, |
|
"rewards/rejected": -3.26597261428833, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 12.026503562927246, |
|
"learning_rate": 1.085773492015028e-06, |
|
"logits/chosen": -1.3507201671600342, |
|
"logits/rejected": -1.1788911819458008, |
|
"logps/chosen": -452.8814392089844, |
|
"logps/rejected": -557.76416015625, |
|
"loss": 0.4254, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.065864324569702, |
|
"rewards/margins": 1.3924720287322998, |
|
"rewards/rejected": -3.458336353302002, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.7249411148913897, |
|
"grad_norm": 16.423751831054688, |
|
"learning_rate": 1.0669947197689034e-06, |
|
"logits/chosen": -1.3033778667449951, |
|
"logits/rejected": -1.1522514820098877, |
|
"logps/chosen": -498.0997619628906, |
|
"logps/rejected": -590.888427734375, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.2554616928100586, |
|
"rewards/margins": 1.2149760723114014, |
|
"rewards/rejected": -3.470437526702881, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.7275582308296258, |
|
"grad_norm": 8.218092918395996, |
|
"learning_rate": 1.048335603051291e-06, |
|
"logits/chosen": -1.2994598150253296, |
|
"logits/rejected": -1.1608821153640747, |
|
"logps/chosen": -527.0745849609375, |
|
"logps/rejected": -638.7930908203125, |
|
"loss": 0.4518, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.3942160606384277, |
|
"rewards/margins": 1.3985862731933594, |
|
"rewards/rejected": -3.792802333831787, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.7301753467678618, |
|
"grad_norm": 12.542786598205566, |
|
"learning_rate": 1.0297976998930665e-06, |
|
"logits/chosen": -1.2890033721923828, |
|
"logits/rejected": -1.1566731929779053, |
|
"logps/chosen": -490.1463928222656, |
|
"logps/rejected": -611.6651611328125, |
|
"loss": 0.4472, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.296391010284424, |
|
"rewards/margins": 1.492492437362671, |
|
"rewards/rejected": -3.7888832092285156, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 10.247793197631836, |
|
"learning_rate": 1.0113825582038078e-06, |
|
"logits/chosen": -1.329484224319458, |
|
"logits/rejected": -1.2037220001220703, |
|
"logps/chosen": -503.18951416015625, |
|
"logps/rejected": -605.0428466796875, |
|
"loss": 0.4617, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.380659580230713, |
|
"rewards/margins": 1.1856248378753662, |
|
"rewards/rejected": -3.5662841796875, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"eval_logits/chosen": -1.195982813835144, |
|
"eval_logits/rejected": -1.0626633167266846, |
|
"eval_logps/chosen": -499.5495910644531, |
|
"eval_logps/rejected": -606.4032592773438, |
|
"eval_loss": 0.4832090735435486, |
|
"eval_rewards/accuracies": 0.7459999918937683, |
|
"eval_rewards/chosen": -2.3494763374328613, |
|
"eval_rewards/margins": 1.2688524723052979, |
|
"eval_rewards/rejected": -3.61832857131958, |
|
"eval_runtime": 1596.6931, |
|
"eval_samples_per_second": 1.253, |
|
"eval_steps_per_second": 0.157, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.735409578644334, |
|
"grad_norm": 20.173498153686523, |
|
"learning_rate": 9.930917156425477e-07, |
|
"logits/chosen": -1.2981245517730713, |
|
"logits/rejected": -1.1898633241653442, |
|
"logps/chosen": -514.4564819335938, |
|
"logps/rejected": -633.3460693359375, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.5339112281799316, |
|
"rewards/margins": 1.2178757190704346, |
|
"rewards/rejected": -3.751786708831787, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.73802669458257, |
|
"grad_norm": 20.3377742767334, |
|
"learning_rate": 9.749266994893756e-07, |
|
"logits/chosen": -1.2338041067123413, |
|
"logits/rejected": -1.084707498550415, |
|
"logps/chosen": -491.3687438964844, |
|
"logps/rejected": -575.6991577148438, |
|
"loss": 0.5842, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.5079922676086426, |
|
"rewards/margins": 0.9769840240478516, |
|
"rewards/rejected": -3.484976291656494, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.7406438105208061, |
|
"grad_norm": 10.66334056854248, |
|
"learning_rate": 9.56889026517913e-07, |
|
"logits/chosen": -1.2951042652130127, |
|
"logits/rejected": -1.1750590801239014, |
|
"logps/chosen": -511.2039489746094, |
|
"logps/rejected": -602.0439453125, |
|
"loss": 0.4889, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.499505043029785, |
|
"rewards/margins": 1.1682642698287964, |
|
"rewards/rejected": -3.66776967048645, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 8.688248634338379, |
|
"learning_rate": 9.389802028686617e-07, |
|
"logits/chosen": -1.371790885925293, |
|
"logits/rejected": -1.270684003829956, |
|
"logps/chosen": -503.0506896972656, |
|
"logps/rejected": -553.471923828125, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.423825979232788, |
|
"rewards/margins": 0.8341992497444153, |
|
"rewards/rejected": -3.2580254077911377, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.7458780423972782, |
|
"grad_norm": 13.514219284057617, |
|
"learning_rate": 9.212017239232427e-07, |
|
"logits/chosen": -1.2997102737426758, |
|
"logits/rejected": -1.1494895219802856, |
|
"logps/chosen": -497.5753479003906, |
|
"logps/rejected": -604.2395629882812, |
|
"loss": 0.4702, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.2467424869537354, |
|
"rewards/margins": 1.2961435317993164, |
|
"rewards/rejected": -3.542886257171631, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.7484951583355143, |
|
"grad_norm": 10.475621223449707, |
|
"learning_rate": 9.03555074179533e-07, |
|
"logits/chosen": -1.2712374925613403, |
|
"logits/rejected": -1.236365795135498, |
|
"logps/chosen": -477.6539611816406, |
|
"logps/rejected": -613.723876953125, |
|
"loss": 0.4366, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.210069179534912, |
|
"rewards/margins": 1.3023548126220703, |
|
"rewards/rejected": -3.5124244689941406, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.7511122742737504, |
|
"grad_norm": 11.980389595031738, |
|
"learning_rate": 8.860417271277067e-07, |
|
"logits/chosen": -1.3676173686981201, |
|
"logits/rejected": -1.303006887435913, |
|
"logps/chosen": -500.90008544921875, |
|
"logps/rejected": -591.3121948242188, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.3170719146728516, |
|
"rewards/margins": 0.9840759038925171, |
|
"rewards/rejected": -3.3011481761932373, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 8.44219970703125, |
|
"learning_rate": 8.686631451272029e-07, |
|
"logits/chosen": -1.323055624961853, |
|
"logits/rejected": -1.1673837900161743, |
|
"logps/chosen": -512.1810302734375, |
|
"logps/rejected": -617.2755126953125, |
|
"loss": 0.4958, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.588519811630249, |
|
"rewards/margins": 1.3053323030471802, |
|
"rewards/rejected": -3.8938522338867188, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.7563465061502225, |
|
"grad_norm": 6.950828552246094, |
|
"learning_rate": 8.514207792846168e-07, |
|
"logits/chosen": -1.3387699127197266, |
|
"logits/rejected": -1.2082509994506836, |
|
"logps/chosen": -511.2001953125, |
|
"logps/rejected": -603.291748046875, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.6037611961364746, |
|
"rewards/margins": 1.2057433128356934, |
|
"rewards/rejected": -3.809504747390747, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"grad_norm": 7.15659236907959, |
|
"learning_rate": 8.343160693325356e-07, |
|
"logits/chosen": -1.2071359157562256, |
|
"logits/rejected": -1.0866135358810425, |
|
"logps/chosen": -522.898193359375, |
|
"logps/rejected": -642.6585083007812, |
|
"loss": 0.4916, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.6741576194763184, |
|
"rewards/margins": 1.2413873672485352, |
|
"rewards/rejected": -3.9155445098876953, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"eval_logits/chosen": -1.1417629718780518, |
|
"eval_logits/rejected": -1.0031887292861938, |
|
"eval_logps/chosen": -531.7142333984375, |
|
"eval_logps/rejected": -636.2195434570312, |
|
"eval_loss": 0.4799574017524719, |
|
"eval_rewards/accuracies": 0.7455000281333923, |
|
"eval_rewards/chosen": -2.6711227893829346, |
|
"eval_rewards/margins": 1.2453694343566895, |
|
"eval_rewards/rejected": -3.916492223739624, |
|
"eval_runtime": 1597.0819, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.157, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7615807380266946, |
|
"grad_norm": 8.567606925964355, |
|
"learning_rate": 8.173504435093174e-07, |
|
"logits/chosen": -1.228169560432434, |
|
"logits/rejected": -1.035468339920044, |
|
"logps/chosen": -499.9391174316406, |
|
"logps/rejected": -596.357666015625, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.584282398223877, |
|
"rewards/margins": 1.2857377529144287, |
|
"rewards/rejected": -3.8700199127197266, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 8.268507957458496, |
|
"learning_rate": 8.00525318439836e-07, |
|
"logits/chosen": -1.2574290037155151, |
|
"logits/rejected": -1.1231721639633179, |
|
"logps/chosen": -538.8193969726562, |
|
"logps/rejected": -632.8656005859375, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.62135648727417, |
|
"rewards/margins": 1.0157769918441772, |
|
"rewards/rejected": -3.637133836746216, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7668149699031667, |
|
"grad_norm": 6.73305606842041, |
|
"learning_rate": 7.838420990171927e-07, |
|
"logits/chosen": -1.3410282135009766, |
|
"logits/rejected": -1.1771111488342285, |
|
"logps/chosen": -512.6897583007812, |
|
"logps/rejected": -595.9611206054688, |
|
"loss": 0.5007, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.4339559078216553, |
|
"rewards/margins": 1.0879731178283691, |
|
"rewards/rejected": -3.5219292640686035, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7694320858414028, |
|
"grad_norm": 8.820892333984375, |
|
"learning_rate": 7.673021782854084e-07, |
|
"logits/chosen": -1.195052981376648, |
|
"logits/rejected": -1.0439643859863281, |
|
"logps/chosen": -517.55810546875, |
|
"logps/rejected": -611.39111328125, |
|
"loss": 0.4715, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.5697178840637207, |
|
"rewards/margins": 1.3570950031280518, |
|
"rewards/rejected": -3.9268131256103516, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7720492017796389, |
|
"grad_norm": 10.797639846801758, |
|
"learning_rate": 7.509069373231039e-07, |
|
"logits/chosen": -1.2065623998641968, |
|
"logits/rejected": -1.0761216878890991, |
|
"logps/chosen": -521.6849365234375, |
|
"logps/rejected": -586.7667236328125, |
|
"loss": 0.5756, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.6908392906188965, |
|
"rewards/margins": 0.898908257484436, |
|
"rewards/rejected": -3.589747667312622, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 9.844982147216797, |
|
"learning_rate": 7.346577451281822e-07, |
|
"logits/chosen": -1.219063639640808, |
|
"logits/rejected": -1.133622407913208, |
|
"logps/chosen": -516.6390991210938, |
|
"logps/rejected": -624.2025146484375, |
|
"loss": 0.4668, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.544440269470215, |
|
"rewards/margins": 1.317181944847107, |
|
"rewards/rejected": -3.8616223335266113, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.777283433656111, |
|
"grad_norm": 12.88412094116211, |
|
"learning_rate": 7.185559585035138e-07, |
|
"logits/chosen": -1.269226312637329, |
|
"logits/rejected": -1.0988438129425049, |
|
"logps/chosen": -547.8538208007812, |
|
"logps/rejected": -646.4948120117188, |
|
"loss": 0.4824, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.6517109870910645, |
|
"rewards/margins": 1.145399570465088, |
|
"rewards/rejected": -3.7971103191375732, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7799005495943471, |
|
"grad_norm": 8.924067497253418, |
|
"learning_rate": 7.026029219436504e-07, |
|
"logits/chosen": -1.2982590198516846, |
|
"logits/rejected": -1.1203540563583374, |
|
"logps/chosen": -504.6836853027344, |
|
"logps/rejected": -623.8721313476562, |
|
"loss": 0.4545, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.5236408710479736, |
|
"rewards/margins": 1.3211889266967773, |
|
"rewards/rejected": -3.844829559326172, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7825176655325831, |
|
"grad_norm": 6.5459513664245605, |
|
"learning_rate": 6.867999675225523e-07, |
|
"logits/chosen": -1.3317945003509521, |
|
"logits/rejected": -1.1817572116851807, |
|
"logps/chosen": -483.4960021972656, |
|
"logps/rejected": -588.3883056640625, |
|
"loss": 0.4648, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.526824712753296, |
|
"rewards/margins": 1.1909806728363037, |
|
"rewards/rejected": -3.7178053855895996, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 8.477150917053223, |
|
"learning_rate": 6.711484147823663e-07, |
|
"logits/chosen": -1.2142189741134644, |
|
"logits/rejected": -1.1248340606689453, |
|
"logps/chosen": -488.151123046875, |
|
"logps/rejected": -618.1202392578125, |
|
"loss": 0.4708, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.5399701595306396, |
|
"rewards/margins": 1.251422643661499, |
|
"rewards/rejected": -3.7913928031921387, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"eval_logits/chosen": -1.1355363130569458, |
|
"eval_logits/rejected": -0.9962058663368225, |
|
"eval_logps/chosen": -526.2620849609375, |
|
"eval_logps/rejected": -623.4008178710938, |
|
"eval_loss": 0.4796713590621948, |
|
"eval_rewards/accuracies": 0.7475000023841858, |
|
"eval_rewards/chosen": -2.61660099029541, |
|
"eval_rewards/margins": 1.1717036962509155, |
|
"eval_rewards/rejected": -3.788304328918457, |
|
"eval_runtime": 1596.8532, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.157, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7877518974090553, |
|
"grad_norm": 9.99060344696045, |
|
"learning_rate": 6.556495706232413e-07, |
|
"logits/chosen": -1.2248764038085938, |
|
"logits/rejected": -1.1362669467926025, |
|
"logps/chosen": -536.1866455078125, |
|
"logps/rejected": -627.3114013671875, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.6758596897125244, |
|
"rewards/margins": 1.0958218574523926, |
|
"rewards/rejected": -3.771681308746338, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7903690133472913, |
|
"grad_norm": 10.53675365447998, |
|
"learning_rate": 6.403047291942057e-07, |
|
"logits/chosen": -1.1557037830352783, |
|
"logits/rejected": -0.9910370111465454, |
|
"logps/chosen": -491.5957946777344, |
|
"logps/rejected": -585.2208251953125, |
|
"loss": 0.4845, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.6533892154693604, |
|
"rewards/margins": 1.1808980703353882, |
|
"rewards/rejected": -3.834287166595459, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7929861292855274, |
|
"grad_norm": 12.331343650817871, |
|
"learning_rate": 6.251151717851023e-07, |
|
"logits/chosen": -1.2338765859603882, |
|
"logits/rejected": -1.136103630065918, |
|
"logps/chosen": -477.06243896484375, |
|
"logps/rejected": -573.8302001953125, |
|
"loss": 0.4934, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.4373462200164795, |
|
"rewards/margins": 1.100510835647583, |
|
"rewards/rejected": -3.5378570556640625, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 6.849348545074463, |
|
"learning_rate": 6.100821667196041e-07, |
|
"logits/chosen": -1.4195866584777832, |
|
"logits/rejected": -1.1471283435821533, |
|
"logps/chosen": -517.2559814453125, |
|
"logps/rejected": -562.2097778320312, |
|
"loss": 0.4849, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.4172825813293457, |
|
"rewards/margins": 1.1046142578125, |
|
"rewards/rejected": -3.5218968391418457, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7982203611619995, |
|
"grad_norm": 7.553493022918701, |
|
"learning_rate": 5.952069692493062e-07, |
|
"logits/chosen": -1.2353808879852295, |
|
"logits/rejected": -1.117495059967041, |
|
"logps/chosen": -466.36297607421875, |
|
"logps/rejected": -601.0093994140625, |
|
"loss": 0.4162, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.381345272064209, |
|
"rewards/margins": 1.3122522830963135, |
|
"rewards/rejected": -3.6935970783233643, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8008374771002356, |
|
"grad_norm": 9.807490348815918, |
|
"learning_rate": 5.80490821448918e-07, |
|
"logits/chosen": -1.1880605220794678, |
|
"logits/rejected": -1.173678994178772, |
|
"logps/chosen": -516.3980102539062, |
|
"logps/rejected": -701.3677978515625, |
|
"loss": 0.4261, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.5393073558807373, |
|
"rewards/margins": 1.4251797199249268, |
|
"rewards/rejected": -3.9644875526428223, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8034545930384716, |
|
"grad_norm": 8.029556274414062, |
|
"learning_rate": 5.659349521125459e-07, |
|
"logits/chosen": -1.3551298379898071, |
|
"logits/rejected": -1.2928274869918823, |
|
"logps/chosen": -548.3309326171875, |
|
"logps/rejected": -636.1687622070312, |
|
"loss": 0.5164, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.670652389526367, |
|
"rewards/margins": 1.083081603050232, |
|
"rewards/rejected": -3.7537341117858887, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 10.774998664855957, |
|
"learning_rate": 5.5154057665109e-07, |
|
"logits/chosen": -1.3124372959136963, |
|
"logits/rejected": -1.1519577503204346, |
|
"logps/chosen": -552.2019653320312, |
|
"logps/rejected": -666.1383666992188, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.9138247966766357, |
|
"rewards/margins": 1.403515338897705, |
|
"rewards/rejected": -4.31734037399292, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.8086888249149438, |
|
"grad_norm": 6.534247398376465, |
|
"learning_rate": 5.373088969907586e-07, |
|
"logits/chosen": -1.3402431011199951, |
|
"logits/rejected": -1.147871732711792, |
|
"logps/chosen": -560.8900756835938, |
|
"logps/rejected": -637.8880004882812, |
|
"loss": 0.4401, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.8420605659484863, |
|
"rewards/margins": 1.2714219093322754, |
|
"rewards/rejected": -4.113482475280762, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"grad_norm": 7.741410732269287, |
|
"learning_rate": 5.23241101472709e-07, |
|
"logits/chosen": -1.2486029863357544, |
|
"logits/rejected": -1.1143968105316162, |
|
"logps/chosen": -550.2741088867188, |
|
"logps/rejected": -650.1898803710938, |
|
"loss": 0.4804, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.740609645843506, |
|
"rewards/margins": 1.2194688320159912, |
|
"rewards/rejected": -3.960078001022339, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"eval_logits/chosen": -1.1341181993484497, |
|
"eval_logits/rejected": -0.9953464865684509, |
|
"eval_logps/chosen": -546.843505859375, |
|
"eval_logps/rejected": -656.7727661132812, |
|
"eval_loss": 0.4807169735431671, |
|
"eval_rewards/accuracies": 0.7475000023841858, |
|
"eval_rewards/chosen": -2.822416067123413, |
|
"eval_rewards/margins": 1.2996082305908203, |
|
"eval_rewards/rejected": -4.122024059295654, |
|
"eval_runtime": 1596.5373, |
|
"eval_samples_per_second": 1.253, |
|
"eval_steps_per_second": 0.157, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8139230567914159, |
|
"grad_norm": 12.117105484008789, |
|
"learning_rate": 5.09338364753818e-07, |
|
"logits/chosen": -1.3339240550994873, |
|
"logits/rejected": -1.1453027725219727, |
|
"logps/chosen": -560.3875122070312, |
|
"logps/rejected": -672.76513671875, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.750267267227173, |
|
"rewards/margins": 1.282500982284546, |
|
"rewards/rejected": -4.032768249511719, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 9.4435396194458, |
|
"learning_rate": 4.956018477086005e-07, |
|
"logits/chosen": -1.2970499992370605, |
|
"logits/rejected": -1.1291049718856812, |
|
"logps/chosen": -554.2510986328125, |
|
"logps/rejected": -649.1993408203125, |
|
"loss": 0.5269, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.851862668991089, |
|
"rewards/margins": 1.2229183912277222, |
|
"rewards/rejected": -4.07478141784668, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.819157288667888, |
|
"grad_norm": 14.918078422546387, |
|
"learning_rate": 4.820326973322764e-07, |
|
"logits/chosen": -1.207421064376831, |
|
"logits/rejected": -1.115781307220459, |
|
"logps/chosen": -536.1851806640625, |
|
"logps/rejected": -655.1953735351562, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.8535518646240234, |
|
"rewards/margins": 1.2501590251922607, |
|
"rewards/rejected": -4.103711128234863, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.821774404606124, |
|
"grad_norm": 14.224996566772461, |
|
"learning_rate": 4.686320466449981e-07, |
|
"logits/chosen": -1.1797192096710205, |
|
"logits/rejected": -0.9966877102851868, |
|
"logps/chosen": -504.07745361328125, |
|
"logps/rejected": -650.1423950195312, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.6490511894226074, |
|
"rewards/margins": 1.4823657274246216, |
|
"rewards/rejected": -4.131417274475098, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.8243915205443602, |
|
"grad_norm": 7.642600059509277, |
|
"learning_rate": 4.554010145972418e-07, |
|
"logits/chosen": -1.366541862487793, |
|
"logits/rejected": -1.1694148778915405, |
|
"logps/chosen": -543.9080200195312, |
|
"logps/rejected": -658.1217041015625, |
|
"loss": 0.5434, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.8025176525115967, |
|
"rewards/margins": 1.2542707920074463, |
|
"rewards/rejected": -4.056788444519043, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 10.804971694946289, |
|
"learning_rate": 4.4234070597637455e-07, |
|
"logits/chosen": -1.1909892559051514, |
|
"logits/rejected": -1.0947834253311157, |
|
"logps/chosen": -546.0935668945312, |
|
"logps/rejected": -647.5870361328125, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.70656156539917, |
|
"rewards/margins": 1.1345676183700562, |
|
"rewards/rejected": -3.8411293029785156, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.8296257524208323, |
|
"grad_norm": 6.038959980010986, |
|
"learning_rate": 4.2945221131440783e-07, |
|
"logits/chosen": -1.1933271884918213, |
|
"logits/rejected": -0.9797855615615845, |
|
"logps/chosen": -524.9500732421875, |
|
"logps/rejected": -634.7277221679688, |
|
"loss": 0.4177, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.5928843021392822, |
|
"rewards/margins": 1.4146864414215088, |
|
"rewards/rejected": -4.007569789886475, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.8322428683590684, |
|
"grad_norm": 8.643757820129395, |
|
"learning_rate": 4.167366067969381e-07, |
|
"logits/chosen": -1.3083521127700806, |
|
"logits/rejected": -1.2275665998458862, |
|
"logps/chosen": -480.93267822265625, |
|
"logps/rejected": -616.6661376953125, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.5809030532836914, |
|
"rewards/margins": 1.1199270486831665, |
|
"rewards/rejected": -3.7008299827575684, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.8348599842973043, |
|
"grad_norm": 5.735963821411133, |
|
"learning_rate": 4.041949541732826e-07, |
|
"logits/chosen": -1.306793451309204, |
|
"logits/rejected": -1.2211034297943115, |
|
"logps/chosen": -528.7879638671875, |
|
"logps/rejected": -633.7890625, |
|
"loss": 0.5016, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.649538040161133, |
|
"rewards/margins": 1.1934170722961426, |
|
"rewards/rejected": -3.8429553508758545, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 11.755797386169434, |
|
"learning_rate": 3.9182830066782614e-07, |
|
"logits/chosen": -1.2019577026367188, |
|
"logits/rejected": -1.1839076280593872, |
|
"logps/chosen": -523.3551025390625, |
|
"logps/rejected": -663.1622314453125, |
|
"loss": 0.4866, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.657108783721924, |
|
"rewards/margins": 1.273402452468872, |
|
"rewards/rejected": -3.930511474609375, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": -1.1640751361846924, |
|
"eval_logits/rejected": -1.0276466608047485, |
|
"eval_logps/chosen": -519.5614013671875, |
|
"eval_logps/rejected": -623.51025390625, |
|
"eval_loss": 0.4776689112186432, |
|
"eval_rewards/accuracies": 0.7475000023841858, |
|
"eval_rewards/chosen": -2.5495944023132324, |
|
"eval_rewards/margins": 1.2398039102554321, |
|
"eval_rewards/rejected": -3.789398431777954, |
|
"eval_runtime": 1596.1554, |
|
"eval_samples_per_second": 1.253, |
|
"eval_steps_per_second": 0.157, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8400942161737766, |
|
"grad_norm": 6.5980448722839355, |
|
"learning_rate": 3.796376788925771e-07, |
|
"logits/chosen": -1.2055485248565674, |
|
"logits/rejected": -1.1355664730072021, |
|
"logps/chosen": -506.8805236816406, |
|
"logps/rejected": -596.1102294921875, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.4550302028656006, |
|
"rewards/margins": 1.1328961849212646, |
|
"rewards/rejected": -3.5879263877868652, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.8427113321120125, |
|
"grad_norm": 7.025763034820557, |
|
"learning_rate": 3.676241067609465e-07, |
|
"logits/chosen": -1.2890058755874634, |
|
"logits/rejected": -1.1607505083084106, |
|
"logps/chosen": -542.5242919921875, |
|
"logps/rejected": -612.6026611328125, |
|
"loss": 0.5185, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.488643169403076, |
|
"rewards/margins": 1.1322394609451294, |
|
"rewards/rejected": -3.620882749557495, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.8453284480502486, |
|
"grad_norm": 11.517923355102539, |
|
"learning_rate": 3.5578858740274976e-07, |
|
"logits/chosen": -1.2237865924835205, |
|
"logits/rejected": -1.122482419013977, |
|
"logps/chosen": -517.73974609375, |
|
"logps/rejected": -605.0042114257812, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.5605525970458984, |
|
"rewards/margins": 0.999243438243866, |
|
"rewards/rejected": -3.5597965717315674, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 10.88183307647705, |
|
"learning_rate": 3.44132109080447e-07, |
|
"logits/chosen": -1.4292676448822021, |
|
"logits/rejected": -1.2410900592803955, |
|
"logps/chosen": -495.89813232421875, |
|
"logps/rejected": -589.0921630859375, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.3068690299987793, |
|
"rewards/margins": 1.3076789379119873, |
|
"rewards/rejected": -3.6145482063293457, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.8505626799267207, |
|
"grad_norm": 11.31247615814209, |
|
"learning_rate": 3.3265564510662344e-07, |
|
"logits/chosen": -1.3701411485671997, |
|
"logits/rejected": -1.213030457496643, |
|
"logps/chosen": -523.7352294921875, |
|
"logps/rejected": -627.9140625, |
|
"loss": 0.433, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.305351495742798, |
|
"rewards/margins": 1.2927907705307007, |
|
"rewards/rejected": -3.598142147064209, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8531797958649568, |
|
"grad_norm": 15.092782974243164, |
|
"learning_rate": 3.213601537627195e-07, |
|
"logits/chosen": -1.2755637168884277, |
|
"logits/rejected": -1.1552997827529907, |
|
"logps/chosen": -517.78369140625, |
|
"logps/rejected": -615.5398559570312, |
|
"loss": 0.5176, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.607300281524658, |
|
"rewards/margins": 1.1482493877410889, |
|
"rewards/rejected": -3.755549669265747, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.8557969118031928, |
|
"grad_norm": 18.888704299926758, |
|
"learning_rate": 3.1024657821901063e-07, |
|
"logits/chosen": -1.3271687030792236, |
|
"logits/rejected": -1.2444849014282227, |
|
"logps/chosen": -477.8196716308594, |
|
"logps/rejected": -587.2836303710938, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.301666498184204, |
|
"rewards/margins": 1.2528338432312012, |
|
"rewards/rejected": -3.554500102996826, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 10.203365325927734, |
|
"learning_rate": 2.9931584645585654e-07, |
|
"logits/chosen": -1.245596170425415, |
|
"logits/rejected": -1.222507357597351, |
|
"logps/chosen": -513.1766357421875, |
|
"logps/rejected": -632.6079711914062, |
|
"loss": 0.4915, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.386977434158325, |
|
"rewards/margins": 1.1650269031524658, |
|
"rewards/rejected": -3.55200457572937, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.861031143679665, |
|
"grad_norm": 4.975100040435791, |
|
"learning_rate": 2.885688711862136e-07, |
|
"logits/chosen": -1.2813748121261597, |
|
"logits/rejected": -1.2710316181182861, |
|
"logps/chosen": -519.4554443359375, |
|
"logps/rejected": -658.6768188476562, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.585131883621216, |
|
"rewards/margins": 1.4114031791687012, |
|
"rewards/rejected": -3.996534824371338, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"grad_norm": 6.192923069000244, |
|
"learning_rate": 2.7800654977942486e-07, |
|
"logits/chosen": -1.268638014793396, |
|
"logits/rejected": -1.1421396732330322, |
|
"logps/chosen": -508.80584716796875, |
|
"logps/rejected": -629.664306640625, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.516002893447876, |
|
"rewards/margins": 1.2464697360992432, |
|
"rewards/rejected": -3.762472629547119, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"eval_logits/chosen": -1.1608073711395264, |
|
"eval_logits/rejected": -1.0241122245788574, |
|
"eval_logps/chosen": -520.3804321289062, |
|
"eval_logps/rejected": -625.6535034179688, |
|
"eval_loss": 0.47857987880706787, |
|
"eval_rewards/accuracies": 0.7480000257492065, |
|
"eval_rewards/chosen": -2.5577852725982666, |
|
"eval_rewards/margins": 1.2530462741851807, |
|
"eval_rewards/rejected": -3.8108315467834473, |
|
"eval_runtime": 1596.9029, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.157, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.8662653755561371, |
|
"grad_norm": 14.262938499450684, |
|
"learning_rate": 2.6762976418628797e-07, |
|
"logits/chosen": -1.2763694524765015, |
|
"logits/rejected": -1.12501859664917, |
|
"logps/chosen": -479.6934509277344, |
|
"logps/rejected": -549.134765625, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.4996352195739746, |
|
"rewards/margins": 1.1307785511016846, |
|
"rewards/rejected": -3.630413770675659, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 6.411558628082275, |
|
"learning_rate": 2.5743938086541354e-07, |
|
"logits/chosen": -1.277998447418213, |
|
"logits/rejected": -1.1551088094711304, |
|
"logps/chosen": -519.344482421875, |
|
"logps/rejected": -614.8687744140625, |
|
"loss": 0.5019, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.5514492988586426, |
|
"rewards/margins": 1.2041881084442139, |
|
"rewards/rejected": -3.7556369304656982, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.8714996074326092, |
|
"grad_norm": 8.243268966674805, |
|
"learning_rate": 2.4743625071087574e-07, |
|
"logits/chosen": -1.444610834121704, |
|
"logits/rejected": -1.2527801990509033, |
|
"logps/chosen": -515.1294555664062, |
|
"logps/rejected": -632.4005737304688, |
|
"loss": 0.4532, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.376555919647217, |
|
"rewards/margins": 1.4679479598999023, |
|
"rewards/rejected": -3.844503879547119, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8741167233708453, |
|
"grad_norm": 10.0430908203125, |
|
"learning_rate": 2.3762120898116498e-07, |
|
"logits/chosen": -1.2998971939086914, |
|
"logits/rejected": -1.1857765913009644, |
|
"logps/chosen": -535.6951293945312, |
|
"logps/rejected": -634.7950439453125, |
|
"loss": 0.495, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.6694915294647217, |
|
"rewards/margins": 1.0715022087097168, |
|
"rewards/rejected": -3.7409939765930176, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.8767338393090814, |
|
"grad_norm": 9.759672164916992, |
|
"learning_rate": 2.2799507522944048e-07, |
|
"logits/chosen": -1.2516978979110718, |
|
"logits/rejected": -1.143761396408081, |
|
"logps/chosen": -506.92779541015625, |
|
"logps/rejected": -630.4886474609375, |
|
"loss": 0.4682, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.4124372005462646, |
|
"rewards/margins": 1.3180114030838013, |
|
"rewards/rejected": -3.7304489612579346, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 10.614029884338379, |
|
"learning_rate": 2.1855865323510056e-07, |
|
"logits/chosen": -1.3051875829696655, |
|
"logits/rejected": -1.0945546627044678, |
|
"logps/chosen": -524.2318115234375, |
|
"logps/rejected": -672.577880859375, |
|
"loss": 0.4244, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.515920639038086, |
|
"rewards/margins": 1.5423296689987183, |
|
"rewards/rejected": -4.058249473571777, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8819680711855535, |
|
"grad_norm": 7.248552322387695, |
|
"learning_rate": 2.0931273093666575e-07, |
|
"logits/chosen": -1.2330162525177002, |
|
"logits/rejected": -1.0763533115386963, |
|
"logps/chosen": -503.97393798828125, |
|
"logps/rejected": -626.6051025390625, |
|
"loss": 0.4239, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.638221025466919, |
|
"rewards/margins": 1.4292933940887451, |
|
"rewards/rejected": -4.067514419555664, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8845851871237895, |
|
"grad_norm": 15.31811237335205, |
|
"learning_rate": 2.002580803659873e-07, |
|
"logits/chosen": -1.244128942489624, |
|
"logits/rejected": -1.1012144088745117, |
|
"logps/chosen": -522.0372924804688, |
|
"logps/rejected": -629.9783325195312, |
|
"loss": 0.4693, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.676211357116699, |
|
"rewards/margins": 1.2609608173370361, |
|
"rewards/rejected": -3.9371724128723145, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8872023030620256, |
|
"grad_norm": 5.824941635131836, |
|
"learning_rate": 1.913954575837826e-07, |
|
"logits/chosen": -1.3020130395889282, |
|
"logits/rejected": -1.055781602859497, |
|
"logps/chosen": -542.0621337890625, |
|
"logps/rejected": -616.4013061523438, |
|
"loss": 0.4735, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.7002296447753906, |
|
"rewards/margins": 1.2402583360671997, |
|
"rewards/rejected": -3.9404876232147217, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 9.155735969543457, |
|
"learning_rate": 1.827256026165028e-07, |
|
"logits/chosen": -1.3028671741485596, |
|
"logits/rejected": -1.1115076541900635, |
|
"logps/chosen": -565.5799560546875, |
|
"logps/rejected": -653.9213256835938, |
|
"loss": 0.4272, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.5059080123901367, |
|
"rewards/margins": 1.455091118812561, |
|
"rewards/rejected": -3.960999011993408, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"eval_logits/chosen": -1.1445426940917969, |
|
"eval_logits/rejected": -1.0071464776992798, |
|
"eval_logps/chosen": -536.8281860351562, |
|
"eval_logps/rejected": -647.4435424804688, |
|
"eval_loss": 0.47965455055236816, |
|
"eval_rewards/accuracies": 0.7459999918937683, |
|
"eval_rewards/chosen": -2.722262382507324, |
|
"eval_rewards/margins": 1.306469440460205, |
|
"eval_rewards/rejected": -4.0287322998046875, |
|
"eval_runtime": 1597.1181, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.157, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8924365349384977, |
|
"grad_norm": 9.588942527770996, |
|
"learning_rate": 1.7424923939454274e-07, |
|
"logits/chosen": -1.2525078058242798, |
|
"logits/rejected": -1.0694096088409424, |
|
"logps/chosen": -553.6488037109375, |
|
"logps/rejected": -648.3309326171875, |
|
"loss": 0.4282, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.723491668701172, |
|
"rewards/margins": 1.380472183227539, |
|
"rewards/rejected": -4.103963851928711, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8950536508767338, |
|
"grad_norm": 19.193740844726562, |
|
"learning_rate": 1.6596707569179304e-07, |
|
"logits/chosen": -1.3545329570770264, |
|
"logits/rejected": -1.1915156841278076, |
|
"logps/chosen": -562.5875244140625, |
|
"logps/rejected": -653.5098876953125, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.815075635910034, |
|
"rewards/margins": 1.2605375051498413, |
|
"rewards/rejected": -4.075612545013428, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.8976707668149699, |
|
"grad_norm": 14.299762725830078, |
|
"learning_rate": 1.578798030665385e-07, |
|
"logits/chosen": -1.283125638961792, |
|
"logits/rejected": -1.0879403352737427, |
|
"logps/chosen": -551.2545166015625, |
|
"logps/rejected": -682.09228515625, |
|
"loss": 0.4447, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.7663490772247314, |
|
"rewards/margins": 1.4793423414230347, |
|
"rewards/rejected": -4.245691776275635, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 8.79478645324707, |
|
"learning_rate": 1.499880968037165e-07, |
|
"logits/chosen": -1.2685706615447998, |
|
"logits/rejected": -1.1165311336517334, |
|
"logps/chosen": -530.1332397460938, |
|
"logps/rejected": -616.2289428710938, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.7463672161102295, |
|
"rewards/margins": 1.2237727642059326, |
|
"rewards/rejected": -3.970139980316162, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.902904998691442, |
|
"grad_norm": 14.623788833618164, |
|
"learning_rate": 1.4229261585852805e-07, |
|
"logits/chosen": -1.298165202140808, |
|
"logits/rejected": -1.205263614654541, |
|
"logps/chosen": -537.5665283203125, |
|
"logps/rejected": -645.2786865234375, |
|
"loss": 0.4557, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.692805528640747, |
|
"rewards/margins": 1.3347504138946533, |
|
"rewards/rejected": -4.0275559425354, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.9055221146296781, |
|
"grad_norm": 11.80216121673584, |
|
"learning_rate": 1.3479400280141886e-07, |
|
"logits/chosen": -1.2136515378952026, |
|
"logits/rejected": -1.1657497882843018, |
|
"logps/chosen": -525.5947265625, |
|
"logps/rejected": -662.1110229492188, |
|
"loss": 0.4657, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.7938077449798584, |
|
"rewards/margins": 1.385801076889038, |
|
"rewards/rejected": -4.1796088218688965, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.9081392305679141, |
|
"grad_norm": 9.422633171081543, |
|
"learning_rate": 1.2749288376442044e-07, |
|
"logits/chosen": -1.3240694999694824, |
|
"logits/rejected": -1.106979489326477, |
|
"logps/chosen": -565.2491455078125, |
|
"logps/rejected": -643.6472778320312, |
|
"loss": 0.4631, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.6884024143218994, |
|
"rewards/margins": 1.3586628437042236, |
|
"rewards/rejected": -4.047064781188965, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 9.699939727783203, |
|
"learning_rate": 1.203898683888713e-07, |
|
"logits/chosen": -1.306654691696167, |
|
"logits/rejected": -1.1677879095077515, |
|
"logps/chosen": -532.5252685546875, |
|
"logps/rejected": -648.9597778320312, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.886373519897461, |
|
"rewards/margins": 1.1954319477081299, |
|
"rewards/rejected": -4.081805229187012, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.9133734624443863, |
|
"grad_norm": 11.356287002563477, |
|
"learning_rate": 1.1348554977451132e-07, |
|
"logits/chosen": -1.3395811319351196, |
|
"logits/rejected": -1.1923797130584717, |
|
"logps/chosen": -551.8304443359375, |
|
"logps/rejected": -638.4985961914062, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.699251651763916, |
|
"rewards/margins": 1.195854902267456, |
|
"rewards/rejected": -3.895106792449951, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"grad_norm": 10.900007247924805, |
|
"learning_rate": 1.0678050442995802e-07, |
|
"logits/chosen": -1.306223750114441, |
|
"logits/rejected": -1.088254690170288, |
|
"logps/chosen": -554.8663330078125, |
|
"logps/rejected": -634.9073486328125, |
|
"loss": 0.5272, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.7420237064361572, |
|
"rewards/margins": 1.2511793375015259, |
|
"rewards/rejected": -3.9932029247283936, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"eval_logits/chosen": -1.160068154335022, |
|
"eval_logits/rejected": -1.023296594619751, |
|
"eval_logps/chosen": -536.0448608398438, |
|
"eval_logps/rejected": -647.7730102539062, |
|
"eval_loss": 0.4797233045101166, |
|
"eval_rewards/accuracies": 0.746999979019165, |
|
"eval_rewards/chosen": -2.7144289016723633, |
|
"eval_rewards/margins": 1.3175978660583496, |
|
"eval_rewards/rejected": -4.032026767730713, |
|
"eval_runtime": 1597.7222, |
|
"eval_samples_per_second": 1.252, |
|
"eval_steps_per_second": 0.156, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9186076943208584, |
|
"grad_norm": 9.065542221069336, |
|
"learning_rate": 1.0027529222456755e-07, |
|
"logits/chosen": -1.2851347923278809, |
|
"logits/rejected": -1.0965713262557983, |
|
"logps/chosen": -514.8778076171875, |
|
"logps/rejected": -634.3299560546875, |
|
"loss": 0.4078, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.6071436405181885, |
|
"rewards/margins": 1.371249794960022, |
|
"rewards/rejected": -3.978393077850342, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 12.434161186218262, |
|
"learning_rate": 9.397045634168766e-08, |
|
"logits/chosen": -1.3080555200576782, |
|
"logits/rejected": -1.222429633140564, |
|
"logps/chosen": -531.6901245117188, |
|
"logps/rejected": -675.6932373046875, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.632573366165161, |
|
"rewards/margins": 1.43732750415802, |
|
"rewards/rejected": -4.069900989532471, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.9238419261973305, |
|
"grad_norm": 16.84272003173828, |
|
"learning_rate": 8.78665232332998e-08, |
|
"logits/chosen": -1.243849515914917, |
|
"logits/rejected": -1.1529252529144287, |
|
"logps/chosen": -504.6124572753906, |
|
"logps/rejected": -625.7620849609375, |
|
"loss": 0.474, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.7395050525665283, |
|
"rewards/margins": 1.2195281982421875, |
|
"rewards/rejected": -3.959033489227295, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.9264590421355666, |
|
"grad_norm": 8.225701332092285, |
|
"learning_rate": 8.196400257606208e-08, |
|
"logits/chosen": -1.348672866821289, |
|
"logits/rejected": -1.1730903387069702, |
|
"logps/chosen": -544.8364868164062, |
|
"logps/rejected": -697.6771240234375, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.6183691024780273, |
|
"rewards/margins": 1.5808098316192627, |
|
"rewards/rejected": -4.199179172515869, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.9290761580738026, |
|
"grad_norm": 11.679130554199219, |
|
"learning_rate": 7.626338722875076e-08, |
|
"logits/chosen": -1.2883799076080322, |
|
"logits/rejected": -1.2092903852462769, |
|
"logps/chosen": -517.6767578125, |
|
"logps/rejected": -647.9732666015625, |
|
"loss": 0.4827, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.6364307403564453, |
|
"rewards/margins": 1.2941230535507202, |
|
"rewards/rejected": -3.930554151535034, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 5.509209632873535, |
|
"learning_rate": 7.076515319110688e-08, |
|
"logits/chosen": -1.283998727798462, |
|
"logits/rejected": -1.1587374210357666, |
|
"logps/chosen": -517.294677734375, |
|
"logps/rejected": -615.6148071289062, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.6106066703796387, |
|
"rewards/margins": 1.3990845680236816, |
|
"rewards/rejected": -4.00969123840332, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.9343103899502748, |
|
"grad_norm": 7.372361660003662, |
|
"learning_rate": 6.54697595640899e-08, |
|
"logits/chosen": -1.3087977170944214, |
|
"logits/rejected": -1.178120493888855, |
|
"logps/chosen": -560.3231201171875, |
|
"logps/rejected": -660.3911743164062, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.6826512813568115, |
|
"rewards/margins": 1.2511831521987915, |
|
"rewards/rejected": -3.9338345527648926, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.9369275058885108, |
|
"grad_norm": 9.634334564208984, |
|
"learning_rate": 6.037764851154426e-08, |
|
"logits/chosen": -1.3032505512237549, |
|
"logits/rejected": -1.231890320777893, |
|
"logps/chosen": -522.193603515625, |
|
"logps/rejected": -652.8558349609375, |
|
"loss": 0.4929, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.5825421810150146, |
|
"rewards/margins": 1.2584049701690674, |
|
"rewards/rejected": -3.840946912765503, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.9395446218267469, |
|
"grad_norm": 7.305212497711182, |
|
"learning_rate": 5.548924522327748e-08, |
|
"logits/chosen": -1.2807940244674683, |
|
"logits/rejected": -1.1494871377944946, |
|
"logps/chosen": -517.2708740234375, |
|
"logps/rejected": -632.7682495117188, |
|
"loss": 0.4594, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.5563392639160156, |
|
"rewards/margins": 1.3009912967681885, |
|
"rewards/rejected": -3.857330799102783, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 13.96353530883789, |
|
"learning_rate": 5.0804957879556915e-08, |
|
"logits/chosen": -1.198561429977417, |
|
"logits/rejected": -1.0997329950332642, |
|
"logps/chosen": -484.185546875, |
|
"logps/rejected": -620.9610595703125, |
|
"loss": 0.4441, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.587231397628784, |
|
"rewards/margins": 1.3218923807144165, |
|
"rewards/rejected": -3.909123659133911, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"eval_logits/chosen": -1.164100170135498, |
|
"eval_logits/rejected": -1.0277760028839111, |
|
"eval_logps/chosen": -529.1943969726562, |
|
"eval_logps/rejected": -639.7042846679688, |
|
"eval_loss": 0.4790266156196594, |
|
"eval_rewards/accuracies": 0.746999979019165, |
|
"eval_rewards/chosen": -2.6459240913391113, |
|
"eval_rewards/margins": 1.3054152727127075, |
|
"eval_rewards/rejected": -3.9513394832611084, |
|
"eval_runtime": 1598.6642, |
|
"eval_samples_per_second": 1.251, |
|
"eval_steps_per_second": 0.156, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.944778853703219, |
|
"grad_norm": 9.906332969665527, |
|
"learning_rate": 4.632517761702815e-08, |
|
"logits/chosen": -1.2350178956985474, |
|
"logits/rejected": -1.0819157361984253, |
|
"logps/chosen": -498.882568359375, |
|
"logps/rejected": -640.2724609375, |
|
"loss": 0.4371, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.635540008544922, |
|
"rewards/margins": 1.536821722984314, |
|
"rewards/rejected": -4.172361850738525, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.9473959696414551, |
|
"grad_norm": 12.568668365478516, |
|
"learning_rate": 4.205027849605359e-08, |
|
"logits/chosen": -1.2538177967071533, |
|
"logits/rejected": -1.1405234336853027, |
|
"logps/chosen": -525.0173950195312, |
|
"logps/rejected": -613.74560546875, |
|
"loss": 0.5461, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.771757125854492, |
|
"rewards/margins": 1.1880303621292114, |
|
"rewards/rejected": -3.959787368774414, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.9500130855796912, |
|
"grad_norm": 8.876080513000488, |
|
"learning_rate": 3.798061746947995e-08, |
|
"logits/chosen": -1.373834252357483, |
|
"logits/rejected": -1.228161334991455, |
|
"logps/chosen": -527.4785766601562, |
|
"logps/rejected": -619.109375, |
|
"loss": 0.4818, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.6347904205322266, |
|
"rewards/margins": 1.3071154356002808, |
|
"rewards/rejected": -3.941905975341797, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 9.864373207092285, |
|
"learning_rate": 3.411653435283158e-08, |
|
"logits/chosen": -1.29331636428833, |
|
"logits/rejected": -1.077043056488037, |
|
"logps/chosen": -534.7817993164062, |
|
"logps/rejected": -597.4425048828125, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.6166446208953857, |
|
"rewards/margins": 1.1832177639007568, |
|
"rewards/rejected": -3.7998623847961426, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.9552473174561633, |
|
"grad_norm": 12.852518081665039, |
|
"learning_rate": 3.04583517959367e-08, |
|
"logits/chosen": -1.339290976524353, |
|
"logits/rejected": -1.1780240535736084, |
|
"logps/chosen": -494.443603515625, |
|
"logps/rejected": -590.2618408203125, |
|
"loss": 0.4506, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.4245123863220215, |
|
"rewards/margins": 1.2823518514633179, |
|
"rewards/rejected": -3.70686411857605, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.9578644333943994, |
|
"grad_norm": 9.657252311706543, |
|
"learning_rate": 2.7006375255985984e-08, |
|
"logits/chosen": -1.281280755996704, |
|
"logits/rejected": -1.2413251399993896, |
|
"logps/chosen": -539.2777099609375, |
|
"logps/rejected": -645.9277954101562, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.7454452514648438, |
|
"rewards/margins": 1.0612398386001587, |
|
"rewards/rejected": -3.8066844940185547, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.9604815493326354, |
|
"grad_norm": 12.267367362976074, |
|
"learning_rate": 2.3760892972027328e-08, |
|
"logits/chosen": -1.3983352184295654, |
|
"logits/rejected": -1.225462794303894, |
|
"logps/chosen": -544.6994018554688, |
|
"logps/rejected": -638.8689575195312, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.7232279777526855, |
|
"rewards/margins": 1.2808904647827148, |
|
"rewards/rejected": -4.0041184425354, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 13.94206714630127, |
|
"learning_rate": 2.072217594089765e-08, |
|
"logits/chosen": -1.2447559833526611, |
|
"logits/rejected": -1.2226978540420532, |
|
"logps/chosen": -527.1612548828125, |
|
"logps/rejected": -661.6229248046875, |
|
"loss": 0.3918, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.6566879749298096, |
|
"rewards/margins": 1.467563271522522, |
|
"rewards/rejected": -4.124251365661621, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.9657157812091076, |
|
"grad_norm": 8.289405822753906, |
|
"learning_rate": 1.789047789459375e-08, |
|
"logits/chosen": -1.3621820211410522, |
|
"logits/rejected": -1.1523287296295166, |
|
"logps/chosen": -576.5960693359375, |
|
"logps/rejected": -663.8076782226562, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.597318172454834, |
|
"rewards/margins": 1.3620169162750244, |
|
"rewards/rejected": -3.9593348503112793, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"grad_norm": 7.745994567871094, |
|
"learning_rate": 1.5266035279088708e-08, |
|
"logits/chosen": -1.1971657276153564, |
|
"logits/rejected": -1.0677430629730225, |
|
"logps/chosen": -573.20361328125, |
|
"logps/rejected": -680.00244140625, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.7669663429260254, |
|
"rewards/margins": 1.330165147781372, |
|
"rewards/rejected": -4.097131729125977, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"eval_logits/chosen": -1.1687482595443726, |
|
"eval_logits/rejected": -1.0329276323318481, |
|
"eval_logps/chosen": -527.3952026367188, |
|
"eval_logps/rejected": -637.1880493164062, |
|
"eval_loss": 0.47885680198669434, |
|
"eval_rewards/accuracies": 0.7480000257492065, |
|
"eval_rewards/chosen": -2.627932548522949, |
|
"eval_rewards/margins": 1.298244595527649, |
|
"eval_rewards/rejected": -3.9261767864227295, |
|
"eval_runtime": 1598.696, |
|
"eval_samples_per_second": 1.251, |
|
"eval_steps_per_second": 0.156, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9709500130855797, |
|
"grad_norm": 18.376056671142578, |
|
"learning_rate": 1.2849067234584623e-08, |
|
"logits/chosen": -1.182948112487793, |
|
"logits/rejected": -1.10740065574646, |
|
"logps/chosen": -496.62152099609375, |
|
"logps/rejected": -624.2833862304688, |
|
"loss": 0.4808, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.603543281555176, |
|
"rewards/margins": 1.3529905080795288, |
|
"rewards/rejected": -3.956533908843994, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 9.365938186645508, |
|
"learning_rate": 1.0639775577218625e-08, |
|
"logits/chosen": -1.1770192384719849, |
|
"logits/rejected": -1.0035854578018188, |
|
"logps/chosen": -516.5242309570312, |
|
"logps/rejected": -607.8884887695312, |
|
"loss": 0.5282, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.6737780570983887, |
|
"rewards/margins": 1.2736929655075073, |
|
"rewards/rejected": -3.9474711418151855, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.9761842449620518, |
|
"grad_norm": 11.24964427947998, |
|
"learning_rate": 8.638344782207486e-09, |
|
"logits/chosen": -1.1972987651824951, |
|
"logits/rejected": -1.0941554307937622, |
|
"logps/chosen": -499.6304626464844, |
|
"logps/rejected": -600.4740600585938, |
|
"loss": 0.4853, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.5446014404296875, |
|
"rewards/margins": 1.2359497547149658, |
|
"rewards/rejected": -3.7805511951446533, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.9788013609002879, |
|
"grad_norm": 10.543977737426758, |
|
"learning_rate": 6.84494196844715e-09, |
|
"logits/chosen": -1.2653145790100098, |
|
"logits/rejected": -1.1376771926879883, |
|
"logps/chosen": -532.8594970703125, |
|
"logps/rejected": -669.3706665039062, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.608578681945801, |
|
"rewards/margins": 1.4806853532791138, |
|
"rewards/rejected": -4.089264392852783, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.9814184768385239, |
|
"grad_norm": 8.994680404663086, |
|
"learning_rate": 5.259716884556121e-09, |
|
"logits/chosen": -1.3201556205749512, |
|
"logits/rejected": -1.1730735301971436, |
|
"logps/chosen": -524.9544677734375, |
|
"logps/rejected": -640.7349853515625, |
|
"loss": 0.451, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.6050658226013184, |
|
"rewards/margins": 1.3061447143554688, |
|
"rewards/rejected": -3.911210536956787, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 9.462129592895508, |
|
"learning_rate": 3.882801896372967e-09, |
|
"logits/chosen": -1.3206380605697632, |
|
"logits/rejected": -1.223382830619812, |
|
"logps/chosen": -523.351318359375, |
|
"logps/rejected": -627.8754272460938, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.58086895942688, |
|
"rewards/margins": 1.3636963367462158, |
|
"rewards/rejected": -3.9445652961730957, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.9866527087149961, |
|
"grad_norm": 11.428803443908691, |
|
"learning_rate": 2.7143119759026614e-09, |
|
"logits/chosen": -1.3305742740631104, |
|
"logits/rejected": -1.1541904211044312, |
|
"logps/chosen": -536.9759521484375, |
|
"logps/rejected": -636.7061157226562, |
|
"loss": 0.4246, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.528337001800537, |
|
"rewards/margins": 1.2519800662994385, |
|
"rewards/rejected": -3.7803173065185547, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9892698246532321, |
|
"grad_norm": 10.81966781616211, |
|
"learning_rate": 1.754344691717591e-09, |
|
"logits/chosen": -1.2256147861480713, |
|
"logits/rejected": -1.1796106100082397, |
|
"logps/chosen": -514.3230590820312, |
|
"logps/rejected": -642.4959716796875, |
|
"loss": 0.5022, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.645564556121826, |
|
"rewards/margins": 1.0614073276519775, |
|
"rewards/rejected": -3.7069716453552246, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9918869405914682, |
|
"grad_norm": 16.201265335083008, |
|
"learning_rate": 1.0029802008096335e-09, |
|
"logits/chosen": -1.2533369064331055, |
|
"logits/rejected": -1.0869061946868896, |
|
"logps/chosen": -542.9913330078125, |
|
"logps/rejected": -650.4954833984375, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.687774181365967, |
|
"rewards/margins": 1.3033173084259033, |
|
"rewards/rejected": -3.99109148979187, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 7.363870143890381, |
|
"learning_rate": 4.602812418974534e-10, |
|
"logits/chosen": -1.3605427742004395, |
|
"logits/rejected": -1.2153841257095337, |
|
"logps/chosen": -546.0870971679688, |
|
"logps/rejected": -652.19921875, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.6433169841766357, |
|
"rewards/margins": 1.3066623210906982, |
|
"rewards/rejected": -3.949979305267334, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"eval_logits/chosen": -1.1657898426055908, |
|
"eval_logits/rejected": -1.0296279191970825, |
|
"eval_logps/chosen": -526.756103515625, |
|
"eval_logps/rejected": -636.4028930664062, |
|
"eval_loss": 0.4788345396518707, |
|
"eval_rewards/accuracies": 0.7475000023841858, |
|
"eval_rewards/chosen": -2.62154221534729, |
|
"eval_rewards/margins": 1.2967824935913086, |
|
"eval_rewards/rejected": -3.9183249473571777, |
|
"eval_runtime": 1598.3049, |
|
"eval_samples_per_second": 1.251, |
|
"eval_steps_per_second": 0.156, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9971211724679403, |
|
"grad_norm": 12.966708183288574, |
|
"learning_rate": 1.2629313018819312e-10, |
|
"logits/chosen": -1.2771844863891602, |
|
"logits/rejected": -1.147637128829956, |
|
"logps/chosen": -510.83001708984375, |
|
"logps/rejected": -608.63037109375, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.5848309993743896, |
|
"rewards/margins": 1.134263277053833, |
|
"rewards/rejected": -3.7190945148468018, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"grad_norm": 16.982664108276367, |
|
"learning_rate": 1.0437535929996855e-12, |
|
"logits/chosen": -1.262603998184204, |
|
"logits/rejected": -1.0868072509765625, |
|
"logps/chosen": -551.4014892578125, |
|
"logps/rejected": -662.4216918945312, |
|
"loss": 0.4568, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.6468563079833984, |
|
"rewards/margins": 1.5578194856643677, |
|
"rewards/rejected": -4.204675674438477, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.517807064771465, |
|
"train_runtime": 164396.369, |
|
"train_samples_per_second": 0.372, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|