|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333333e-09, |
|
"logits/chosen": -2.3438680171966553, |
|
"logits/rejected": -2.200690984725952, |
|
"logps/chosen": -309.19024658203125, |
|
"logps/rejected": -222.5582275390625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.3084700107574463, |
|
"logits/rejected": -2.1888935565948486, |
|
"logps/chosen": -313.1751708984375, |
|
"logps/rejected": -276.1279602050781, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0006199586787261069, |
|
"rewards/margins": 0.0007922492804937065, |
|
"rewards/rejected": -0.00017229063087143004, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.069239377975464, |
|
"logits/rejected": -2.046480655670166, |
|
"logps/chosen": -235.33877563476562, |
|
"logps/rejected": -250.28262329101562, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.000233887491049245, |
|
"rewards/margins": -0.0002934989461209625, |
|
"rewards/rejected": 0.0005273864371702075, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -2.1968424320220947, |
|
"logits/rejected": -2.0520730018615723, |
|
"logps/chosen": -270.2682189941406, |
|
"logps/rejected": -251.5027618408203, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0023205596953630447, |
|
"rewards/margins": 0.0016283988952636719, |
|
"rewards/rejected": 0.000692160683684051, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.2101526260375977, |
|
"logits/rejected": -2.1605677604675293, |
|
"logps/chosen": -279.98785400390625, |
|
"logps/rejected": -269.2066650390625, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.00826999731361866, |
|
"rewards/margins": 0.004588194657117128, |
|
"rewards/rejected": 0.003681803122162819, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -2.202209711074829, |
|
"logits/rejected": -2.1607909202575684, |
|
"logps/chosen": -260.29913330078125, |
|
"logps/rejected": -255.05734252929688, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.01528189517557621, |
|
"rewards/margins": 0.01289013959467411, |
|
"rewards/rejected": 0.0023917562793940306, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.155121326446533, |
|
"logits/rejected": -2.055530548095703, |
|
"logps/chosen": -284.8448181152344, |
|
"logps/rejected": -260.4853515625, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.023422595113515854, |
|
"rewards/margins": 0.029890310019254684, |
|
"rewards/rejected": -0.006467717699706554, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -2.1490182876586914, |
|
"logits/rejected": -2.0071780681610107, |
|
"logps/chosen": -291.1217041015625, |
|
"logps/rejected": -274.3982238769531, |
|
"loss": 0.6654, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.006606037728488445, |
|
"rewards/margins": 0.06380081921815872, |
|
"rewards/rejected": -0.07040686160326004, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.025543212890625, |
|
"logits/rejected": -1.9235265254974365, |
|
"logps/chosen": -319.5644836425781, |
|
"logps/rejected": -308.890869140625, |
|
"loss": 0.6484, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.13454711437225342, |
|
"rewards/margins": 0.1107935681939125, |
|
"rewards/rejected": -0.24534066021442413, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -1.9773271083831787, |
|
"logits/rejected": -1.933540940284729, |
|
"logps/chosen": -332.3374938964844, |
|
"logps/rejected": -305.792724609375, |
|
"loss": 0.6488, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.2452891618013382, |
|
"rewards/margins": 0.1369139850139618, |
|
"rewards/rejected": -0.3822031617164612, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999732492681437e-07, |
|
"logits/chosen": -1.8573644161224365, |
|
"logits/rejected": -1.8359777927398682, |
|
"logps/chosen": -306.6253967285156, |
|
"logps/rejected": -350.30572509765625, |
|
"loss": 0.6339, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.32189178466796875, |
|
"rewards/margins": 0.19727933406829834, |
|
"rewards/rejected": -0.5191711187362671, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -1.854963779449463, |
|
"eval_logits/rejected": -1.7265539169311523, |
|
"eval_logps/chosen": -322.45428466796875, |
|
"eval_logps/rejected": -318.7289123535156, |
|
"eval_loss": 0.6366350054740906, |
|
"eval_rewards/accuracies": 0.6765872836112976, |
|
"eval_rewards/chosen": -0.42509153485298157, |
|
"eval_rewards/margins": 0.2029310166835785, |
|
"eval_rewards/rejected": -0.6280225515365601, |
|
"eval_runtime": 245.1911, |
|
"eval_samples_per_second": 8.157, |
|
"eval_steps_per_second": 0.257, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996723692767926e-07, |
|
"logits/chosen": -1.9280967712402344, |
|
"logits/rejected": -1.7659218311309814, |
|
"logps/chosen": -304.70416259765625, |
|
"logps/rejected": -303.22015380859375, |
|
"loss": 0.6252, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3900960385799408, |
|
"rewards/margins": 0.21217259764671326, |
|
"rewards/rejected": -0.6022686958312988, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990375746213598e-07, |
|
"logits/chosen": -1.9455022811889648, |
|
"logits/rejected": -1.77316415309906, |
|
"logps/chosen": -319.1278381347656, |
|
"logps/rejected": -316.08233642578125, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.39358627796173096, |
|
"rewards/margins": 0.2644176185131073, |
|
"rewards/rejected": -0.6580039262771606, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980697142834314e-07, |
|
"logits/chosen": -2.112504005432129, |
|
"logits/rejected": -1.9311736822128296, |
|
"logps/chosen": -345.83428955078125, |
|
"logps/rejected": -329.4554748535156, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4626695513725281, |
|
"rewards/margins": 0.18274100124835968, |
|
"rewards/rejected": -0.6454105973243713, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967700826904229e-07, |
|
"logits/chosen": -2.019052505493164, |
|
"logits/rejected": -1.936741828918457, |
|
"logps/chosen": -315.3255920410156, |
|
"logps/rejected": -319.93902587890625, |
|
"loss": 0.595, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6618548631668091, |
|
"rewards/margins": 0.29689091444015503, |
|
"rewards/rejected": -0.9587456583976746, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.951404179843962e-07, |
|
"logits/chosen": -1.9618381261825562, |
|
"logits/rejected": -1.9211444854736328, |
|
"logps/chosen": -349.3891906738281, |
|
"logps/rejected": -336.864013671875, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.4716704487800598, |
|
"rewards/margins": 0.22611722350120544, |
|
"rewards/rejected": -0.6977876424789429, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931828996974498e-07, |
|
"logits/chosen": -2.0160911083221436, |
|
"logits/rejected": -1.9039357900619507, |
|
"logps/chosen": -307.0707092285156, |
|
"logps/rejected": -351.3368225097656, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5011919140815735, |
|
"rewards/margins": 0.29305344820022583, |
|
"rewards/rejected": -0.7942453622817993, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.909001458367866e-07, |
|
"logits/chosen": -2.044952869415283, |
|
"logits/rejected": -1.9001652002334595, |
|
"logps/chosen": -353.18634033203125, |
|
"logps/rejected": -322.8375549316406, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4935056269168854, |
|
"rewards/margins": 0.3311043679714203, |
|
"rewards/rejected": -0.8246100544929504, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882952093833627e-07, |
|
"logits/chosen": -1.9955902099609375, |
|
"logits/rejected": -1.8852081298828125, |
|
"logps/chosen": -354.67779541015625, |
|
"logps/rejected": -409.36767578125, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9336503744125366, |
|
"rewards/margins": 0.49948063492774963, |
|
"rewards/rejected": -1.4331310987472534, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.853715742087946e-07, |
|
"logits/chosen": -2.175471067428589, |
|
"logits/rejected": -1.9974597692489624, |
|
"logps/chosen": -358.2491760253906, |
|
"logps/rejected": -364.2205505371094, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6146177053451538, |
|
"rewards/margins": 0.5364774465560913, |
|
"rewards/rejected": -1.1510951519012451, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.821331504159906e-07, |
|
"logits/chosen": -1.9217967987060547, |
|
"logits/rejected": -1.864315390586853, |
|
"logps/chosen": -347.8321228027344, |
|
"logps/rejected": -396.397216796875, |
|
"loss": 0.5801, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9297510385513306, |
|
"rewards/margins": 0.4349847435951233, |
|
"rewards/rejected": -1.3647358417510986, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.886601448059082, |
|
"eval_logits/rejected": -1.7790945768356323, |
|
"eval_logps/chosen": -373.3334655761719, |
|
"eval_logps/rejected": -405.0862121582031, |
|
"eval_loss": 0.5760898590087891, |
|
"eval_rewards/accuracies": 0.7242063283920288, |
|
"eval_rewards/chosen": -0.9338834285736084, |
|
"eval_rewards/margins": 0.5577120184898376, |
|
"eval_rewards/rejected": -1.4915955066680908, |
|
"eval_runtime": 244.0521, |
|
"eval_samples_per_second": 8.195, |
|
"eval_steps_per_second": 0.258, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.785842691097342e-07, |
|
"logits/chosen": -1.9147882461547852, |
|
"logits/rejected": -1.7938772439956665, |
|
"logps/chosen": -381.3200378417969, |
|
"logps/rejected": -368.9107360839844, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8299940228462219, |
|
"rewards/margins": 0.4266335964202881, |
|
"rewards/rejected": -1.2566276788711548, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7472967660421603e-07, |
|
"logits/chosen": -1.8929469585418701, |
|
"logits/rejected": -1.888225793838501, |
|
"logps/chosen": -338.57611083984375, |
|
"logps/rejected": -366.98126220703125, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6195345520973206, |
|
"rewards/margins": 0.3649435043334961, |
|
"rewards/rejected": -0.9844779968261719, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.705745280752585e-07, |
|
"logits/chosen": -1.7569427490234375, |
|
"logits/rejected": -1.6396926641464233, |
|
"logps/chosen": -387.4503479003906, |
|
"logps/rejected": -401.4142150878906, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8091585040092468, |
|
"rewards/margins": 0.5702639818191528, |
|
"rewards/rejected": -1.379422664642334, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6612438066572555e-07, |
|
"logits/chosen": -1.488446593284607, |
|
"logits/rejected": -1.3490570783615112, |
|
"logps/chosen": -340.69622802734375, |
|
"logps/rejected": -346.3163757324219, |
|
"loss": 0.5711, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7310938835144043, |
|
"rewards/margins": 0.6003143191337585, |
|
"rewards/rejected": -1.3314082622528076, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6138518605333664e-07, |
|
"logits/chosen": -1.5929442644119263, |
|
"logits/rejected": -1.5048617124557495, |
|
"logps/chosen": -386.0514221191406, |
|
"logps/rejected": -422.6902770996094, |
|
"loss": 0.5572, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9384471774101257, |
|
"rewards/margins": 0.6384426951408386, |
|
"rewards/rejected": -1.5768897533416748, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5636328249082514e-07, |
|
"logits/chosen": -1.654510498046875, |
|
"logits/rejected": -1.6068557500839233, |
|
"logps/chosen": -364.17596435546875, |
|
"logps/rejected": -406.23736572265625, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.179985523223877, |
|
"rewards/margins": 0.42138758301734924, |
|
"rewards/rejected": -1.6013730764389038, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.510653863290871e-07, |
|
"logits/chosen": -1.6021859645843506, |
|
"logits/rejected": -1.461978554725647, |
|
"logps/chosen": -407.7553405761719, |
|
"logps/rejected": -452.82501220703125, |
|
"loss": 0.5428, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1438275575637817, |
|
"rewards/margins": 0.6944350004196167, |
|
"rewards/rejected": -1.8382627964019775, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4549858303465737e-07, |
|
"logits/chosen": -1.666479468345642, |
|
"logits/rejected": -1.5147731304168701, |
|
"logps/chosen": -397.7020568847656, |
|
"logps/rejected": -426.46966552734375, |
|
"loss": 0.55, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.1143194437026978, |
|
"rewards/margins": 0.6110407114028931, |
|
"rewards/rejected": -1.7253602743148804, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.396703177135261e-07, |
|
"logits/chosen": -1.5770277976989746, |
|
"logits/rejected": -1.3010271787643433, |
|
"logps/chosen": -389.9909362792969, |
|
"logps/rejected": -386.58642578125, |
|
"loss": 0.5299, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0810632705688477, |
|
"rewards/margins": 0.5923460721969604, |
|
"rewards/rejected": -1.6734092235565186, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.335883851539693e-07, |
|
"logits/chosen": -1.2049105167388916, |
|
"logits/rejected": -0.9145506024360657, |
|
"logps/chosen": -372.0166320800781, |
|
"logps/rejected": -415.6207580566406, |
|
"loss": 0.5298, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.15445876121521, |
|
"rewards/margins": 0.6882535815238953, |
|
"rewards/rejected": -1.8427120447158813, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -1.193803071975708, |
|
"eval_logits/rejected": -0.9729028940200806, |
|
"eval_logps/chosen": -375.13653564453125, |
|
"eval_logps/rejected": -417.95367431640625, |
|
"eval_loss": 0.550506591796875, |
|
"eval_rewards/accuracies": 0.7400793433189392, |
|
"eval_rewards/chosen": -0.9519141316413879, |
|
"eval_rewards/margins": 0.6683558821678162, |
|
"eval_rewards/rejected": -1.620270013809204, |
|
"eval_runtime": 244.8113, |
|
"eval_samples_per_second": 8.17, |
|
"eval_steps_per_second": 0.257, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.272609194017105e-07, |
|
"logits/chosen": -1.1300534009933472, |
|
"logits/rejected": -1.0067355632781982, |
|
"logps/chosen": -343.98272705078125, |
|
"logps/rejected": -458.59185791015625, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.8752749562263489, |
|
"rewards/margins": 0.8713703155517578, |
|
"rewards/rejected": -1.746645212173462, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2069638288135547e-07, |
|
"logits/chosen": -0.5048955678939819, |
|
"logits/rejected": -0.22074377536773682, |
|
"logps/chosen": -470.4259338378906, |
|
"logps/rejected": -543.0339965820312, |
|
"loss": 0.5284, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6318514347076416, |
|
"rewards/margins": 0.9790364503860474, |
|
"rewards/rejected": -2.6108880043029785, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.139035550786494e-07, |
|
"logits/chosen": -0.7544638514518738, |
|
"logits/rejected": -0.46497392654418945, |
|
"logps/chosen": -424.0389099121094, |
|
"logps/rejected": -432.882080078125, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.4958339929580688, |
|
"rewards/margins": 0.6142513155937195, |
|
"rewards/rejected": -2.1100850105285645, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0689152079869306e-07, |
|
"logits/chosen": -1.2732598781585693, |
|
"logits/rejected": -1.1444041728973389, |
|
"logps/chosen": -405.2666015625, |
|
"logps/rejected": -401.07354736328125, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1170111894607544, |
|
"rewards/margins": 0.5194820165634155, |
|
"rewards/rejected": -1.6364930868148804, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.99669658015821e-07, |
|
"logits/chosen": -1.1732494831085205, |
|
"logits/rejected": -1.0453077554702759, |
|
"logps/chosen": -429.7718811035156, |
|
"logps/rejected": -471.4634704589844, |
|
"loss": 0.5373, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.258093237876892, |
|
"rewards/margins": 0.7526389956474304, |
|
"rewards/rejected": -2.0107321739196777, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92247625331392e-07, |
|
"logits/chosen": -0.8629885911941528, |
|
"logits/rejected": -0.6935967803001404, |
|
"logps/chosen": -421.68194580078125, |
|
"logps/rejected": -475.949462890625, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.5934243202209473, |
|
"rewards/margins": 0.631182074546814, |
|
"rewards/rejected": -2.224606513977051, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.846353490562664e-07, |
|
"logits/chosen": -0.7160054445266724, |
|
"logits/rejected": -0.41435980796813965, |
|
"logps/chosen": -399.6954345703125, |
|
"logps/rejected": -486.19757080078125, |
|
"loss": 0.5105, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2889573574066162, |
|
"rewards/margins": 0.8785942792892456, |
|
"rewards/rejected": -2.1675515174865723, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.768430099352445e-07, |
|
"logits/chosen": -1.0206403732299805, |
|
"logits/rejected": -0.7304887771606445, |
|
"logps/chosen": -433.85137939453125, |
|
"logps/rejected": -487.86126708984375, |
|
"loss": 0.5303, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4668599367141724, |
|
"rewards/margins": 0.7815066576004028, |
|
"rewards/rejected": -2.2483668327331543, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6888102953122304e-07, |
|
"logits/chosen": -0.7131624817848206, |
|
"logits/rejected": -0.43439167737960815, |
|
"logps/chosen": -434.3667907714844, |
|
"logps/rejected": -473.67620849609375, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4395004510879517, |
|
"rewards/margins": 0.8123389482498169, |
|
"rewards/rejected": -2.2518393993377686, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.607600562872785e-07, |
|
"logits/chosen": -0.5696905255317688, |
|
"logits/rejected": -0.11699406057596207, |
|
"logps/chosen": -451.731689453125, |
|
"logps/rejected": -470.5762634277344, |
|
"loss": 0.5055, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.5301005840301514, |
|
"rewards/margins": 0.6669865846633911, |
|
"rewards/rejected": -2.197086811065674, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -0.03764622285962105, |
|
"eval_logits/rejected": 0.29014527797698975, |
|
"eval_logps/chosen": -418.03948974609375, |
|
"eval_logps/rejected": -474.5049743652344, |
|
"eval_loss": 0.5331180691719055, |
|
"eval_rewards/accuracies": 0.7539682388305664, |
|
"eval_rewards/chosen": -1.3809435367584229, |
|
"eval_rewards/margins": 0.8048391938209534, |
|
"eval_rewards/rejected": -2.1857824325561523, |
|
"eval_runtime": 244.4235, |
|
"eval_samples_per_second": 8.183, |
|
"eval_steps_per_second": 0.258, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5249095128531856e-07, |
|
"logits/chosen": -0.3684214949607849, |
|
"logits/rejected": -0.036270398646593094, |
|
"logps/chosen": -420.4697265625, |
|
"logps/rejected": -425.89837646484375, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.3041114807128906, |
|
"rewards/margins": 0.469282865524292, |
|
"rewards/rejected": -1.773394227027893, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4408477372034736e-07, |
|
"logits/chosen": -0.5258590579032898, |
|
"logits/rejected": -0.11696865409612656, |
|
"logps/chosen": -331.28082275390625, |
|
"logps/rejected": -395.2735290527344, |
|
"loss": 0.5456, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0080857276916504, |
|
"rewards/margins": 0.560020387172699, |
|
"rewards/rejected": -1.5681060552597046, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3555276610977276e-07, |
|
"logits/chosen": -0.45506519079208374, |
|
"logits/rejected": -0.2455427199602127, |
|
"logps/chosen": -372.3184509277344, |
|
"logps/rejected": -409.4679870605469, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0017720460891724, |
|
"rewards/margins": 0.5574255585670471, |
|
"rewards/rejected": -1.5591974258422852, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.269063392575352e-07, |
|
"logits/chosen": -0.26316189765930176, |
|
"logits/rejected": 0.11150024086236954, |
|
"logps/chosen": -400.49072265625, |
|
"logps/rejected": -426.67657470703125, |
|
"loss": 0.534, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.2625601291656494, |
|
"rewards/margins": 0.5866714119911194, |
|
"rewards/rejected": -1.8492317199707031, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1815705699316964e-07, |
|
"logits/chosen": 0.445736825466156, |
|
"logits/rejected": 0.7627506852149963, |
|
"logps/chosen": -389.2279968261719, |
|
"logps/rejected": -451.03521728515625, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5224748849868774, |
|
"rewards/margins": 0.645032525062561, |
|
"rewards/rejected": -2.1675071716308594, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0931662070620794e-07, |
|
"logits/chosen": 0.20439806580543518, |
|
"logits/rejected": 0.6606889963150024, |
|
"logps/chosen": -455.30035400390625, |
|
"logps/rejected": -485.322021484375, |
|
"loss": 0.5352, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.5770175457000732, |
|
"rewards/margins": 0.6814727783203125, |
|
"rewards/rejected": -2.2584900856018066, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.003968536966078e-07, |
|
"logits/chosen": 0.2584216892719269, |
|
"logits/rejected": 0.6647650003433228, |
|
"logps/chosen": -412.4117736816406, |
|
"logps/rejected": -455.8914489746094, |
|
"loss": 0.5332, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.4638701677322388, |
|
"rewards/margins": 0.5353850722312927, |
|
"rewards/rejected": -1.9992549419403076, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9140968536213693e-07, |
|
"logits/chosen": 0.15033851563930511, |
|
"logits/rejected": 0.30757248401641846, |
|
"logps/chosen": -370.2145690917969, |
|
"logps/rejected": -433.5589904785156, |
|
"loss": 0.5567, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2743232250213623, |
|
"rewards/margins": 0.5514111518859863, |
|
"rewards/rejected": -1.8257343769073486, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.823671352438608e-07, |
|
"logits/chosen": -0.019240472465753555, |
|
"logits/rejected": 0.541126012802124, |
|
"logps/chosen": -384.51373291015625, |
|
"logps/rejected": -420.33648681640625, |
|
"loss": 0.527, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.236613392829895, |
|
"rewards/margins": 0.6580051183700562, |
|
"rewards/rejected": -1.894618272781372, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.73281296951072e-07, |
|
"logits/chosen": 0.26845166087150574, |
|
"logits/rejected": 0.9440711140632629, |
|
"logps/chosen": -447.5386657714844, |
|
"logps/rejected": -504.5428771972656, |
|
"loss": 0.5243, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.576322317123413, |
|
"rewards/margins": 0.9059449434280396, |
|
"rewards/rejected": -2.482267141342163, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 0.7244722843170166, |
|
"eval_logits/rejected": 1.116690993309021, |
|
"eval_logps/chosen": -433.9210205078125, |
|
"eval_logps/rejected": -491.7053527832031, |
|
"eval_loss": 0.5239810347557068, |
|
"eval_rewards/accuracies": 0.77182537317276, |
|
"eval_rewards/chosen": -1.5397586822509766, |
|
"eval_rewards/margins": 0.8180281519889832, |
|
"eval_rewards/rejected": -2.3577868938446045, |
|
"eval_runtime": 244.6169, |
|
"eval_samples_per_second": 8.176, |
|
"eval_steps_per_second": 0.258, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.641643219871597e-07, |
|
"logits/chosen": 0.5857471823692322, |
|
"logits/rejected": 1.1579170227050781, |
|
"logps/chosen": -432.2608337402344, |
|
"logps/rejected": -453.0177307128906, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.56296706199646, |
|
"rewards/margins": 0.7662748694419861, |
|
"rewards/rejected": -2.329241991043091, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.550284034980507e-07, |
|
"logits/chosen": 0.5627486705780029, |
|
"logits/rejected": 1.0138986110687256, |
|
"logps/chosen": -440.6780700683594, |
|
"logps/rejected": -490.7980041503906, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.5351483821868896, |
|
"rewards/margins": 0.7626742124557495, |
|
"rewards/rejected": -2.2978224754333496, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4588575996495794e-07, |
|
"logits/chosen": 0.5618034601211548, |
|
"logits/rejected": 1.1389967203140259, |
|
"logps/chosen": -458.1664123535156, |
|
"logps/rejected": -529.7509765625, |
|
"loss": 0.5147, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.6149412393569946, |
|
"rewards/margins": 0.7950725555419922, |
|
"rewards/rejected": -2.4100139141082764, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.367486188632446e-07, |
|
"logits/chosen": 0.46996626257896423, |
|
"logits/rejected": 1.0377795696258545, |
|
"logps/chosen": -429.85540771484375, |
|
"logps/rejected": -497.40948486328125, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4811718463897705, |
|
"rewards/margins": 0.8862358927726746, |
|
"rewards/rejected": -2.367408037185669, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.276292003092593e-07, |
|
"logits/chosen": 0.3401317596435547, |
|
"logits/rejected": 0.842154324054718, |
|
"logps/chosen": -437.26739501953125, |
|
"logps/rejected": -500.60089111328125, |
|
"loss": 0.5413, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.5497123003005981, |
|
"rewards/margins": 0.8867634534835815, |
|
"rewards/rejected": -2.4364757537841797, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.185397007170141e-07, |
|
"logits/chosen": 0.17786632478237152, |
|
"logits/rejected": 0.7487555742263794, |
|
"logps/chosen": -402.21453857421875, |
|
"logps/rejected": -413.3843688964844, |
|
"loss": 0.5198, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.3434484004974365, |
|
"rewards/margins": 0.6699635982513428, |
|
"rewards/rejected": -2.0134117603302, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.094922764865619e-07, |
|
"logits/chosen": 0.33579394221305847, |
|
"logits/rejected": 0.8120689392089844, |
|
"logps/chosen": -394.34857177734375, |
|
"logps/rejected": -451.68878173828125, |
|
"loss": 0.5383, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.341185450553894, |
|
"rewards/margins": 0.6223596930503845, |
|
"rewards/rejected": -1.9635450839996338, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0049902774588797e-07, |
|
"logits/chosen": 0.6950188875198364, |
|
"logits/rejected": 1.2489306926727295, |
|
"logps/chosen": -420.9115295410156, |
|
"logps/rejected": -468.81402587890625, |
|
"loss": 0.5353, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3789879083633423, |
|
"rewards/margins": 0.7200512886047363, |
|
"rewards/rejected": -2.099039316177368, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9157198216806238e-07, |
|
"logits/chosen": 0.6935154795646667, |
|
"logits/rejected": 0.9982309341430664, |
|
"logps/chosen": -409.898193359375, |
|
"logps/rejected": -451.1421813964844, |
|
"loss": 0.5394, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.388021469116211, |
|
"rewards/margins": 0.5696064829826355, |
|
"rewards/rejected": -1.9576278924942017, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8272307888529274e-07, |
|
"logits/chosen": 0.2730625867843628, |
|
"logits/rejected": 0.6284732818603516, |
|
"logps/chosen": -443.95013427734375, |
|
"logps/rejected": -519.79541015625, |
|
"loss": 0.5024, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.3900865316390991, |
|
"rewards/margins": 0.8703911900520325, |
|
"rewards/rejected": -2.2604775428771973, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 0.8469038605690002, |
|
"eval_logits/rejected": 1.3223692178726196, |
|
"eval_logps/chosen": -446.7126770019531, |
|
"eval_logps/rejected": -509.12152099609375, |
|
"eval_loss": 0.5212409496307373, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -1.6676758527755737, |
|
"eval_rewards/margins": 0.8642725944519043, |
|
"eval_rewards/rejected": -2.5319488048553467, |
|
"eval_runtime": 243.8512, |
|
"eval_samples_per_second": 8.202, |
|
"eval_steps_per_second": 0.258, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7396415252139288e-07, |
|
"logits/chosen": 0.7108888626098633, |
|
"logits/rejected": 1.4451757669448853, |
|
"logps/chosen": -478.5006408691406, |
|
"logps/rejected": -478.9517517089844, |
|
"loss": 0.5186, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.6936050653457642, |
|
"rewards/margins": 0.7683889865875244, |
|
"rewards/rejected": -2.461993932723999, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6530691736402316e-07, |
|
"logits/chosen": 0.623712420463562, |
|
"logits/rejected": 1.2085224390029907, |
|
"logps/chosen": -436.6463928222656, |
|
"logps/rejected": -483.9029235839844, |
|
"loss": 0.5119, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5746710300445557, |
|
"rewards/margins": 0.7840819954872131, |
|
"rewards/rejected": -2.358752965927124, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5676295169786864e-07, |
|
"logits/chosen": 1.255614995956421, |
|
"logits/rejected": 1.7882163524627686, |
|
"logps/chosen": -416.83563232421875, |
|
"logps/rejected": -496.5362243652344, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4195477962493896, |
|
"rewards/margins": 1.038696527481079, |
|
"rewards/rejected": -2.4582440853118896, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.483436823197092e-07, |
|
"logits/chosen": 1.1485587358474731, |
|
"logits/rejected": 1.5099462270736694, |
|
"logps/chosen": -407.3171691894531, |
|
"logps/rejected": -482.505615234375, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.5105384588241577, |
|
"rewards/margins": 0.8905227780342102, |
|
"rewards/rejected": -2.401061534881592, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4006036925609243e-07, |
|
"logits/chosen": 0.8679726719856262, |
|
"logits/rejected": 1.5178191661834717, |
|
"logps/chosen": -471.46868896484375, |
|
"logps/rejected": -529.3899536132812, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6612011194229126, |
|
"rewards/margins": 0.8902603983879089, |
|
"rewards/rejected": -2.5514614582061768, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.319240907040458e-07, |
|
"logits/chosen": 0.9074466824531555, |
|
"logits/rejected": 1.4612318277359009, |
|
"logps/chosen": -458.69842529296875, |
|
"logps/rejected": -467.8643493652344, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7074607610702515, |
|
"rewards/margins": 0.5886183977127075, |
|
"rewards/rejected": -2.296079158782959, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.239457282149695e-07, |
|
"logits/chosen": 0.826758086681366, |
|
"logits/rejected": 1.512880563735962, |
|
"logps/chosen": -418.04949951171875, |
|
"logps/rejected": -474.4520568847656, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4526798725128174, |
|
"rewards/margins": 0.9233277440071106, |
|
"rewards/rejected": -2.376007556915283, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1613595214152711e-07, |
|
"logits/chosen": 0.6957104802131653, |
|
"logits/rejected": 1.0907188653945923, |
|
"logps/chosen": -422.57086181640625, |
|
"logps/rejected": -443.7021484375, |
|
"loss": 0.5358, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4303207397460938, |
|
"rewards/margins": 0.5159063339233398, |
|
"rewards/rejected": -1.9462270736694336, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0850520736699362e-07, |
|
"logits/chosen": 0.5629902482032776, |
|
"logits/rejected": 1.4120800495147705, |
|
"logps/chosen": -379.07037353515625, |
|
"logps/rejected": -417.822021484375, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.299447774887085, |
|
"rewards/margins": 0.9052375555038452, |
|
"rewards/rejected": -2.2046852111816406, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0106369933615042e-07, |
|
"logits/chosen": 0.5293871164321899, |
|
"logits/rejected": 1.561988115310669, |
|
"logps/chosen": -415.15570068359375, |
|
"logps/rejected": -517.3580322265625, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4450310468673706, |
|
"rewards/margins": 1.3134677410125732, |
|
"rewards/rejected": -2.7584986686706543, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": 1.0176714658737183, |
|
"eval_logits/rejected": 1.5164849758148193, |
|
"eval_logps/chosen": -432.87799072265625, |
|
"eval_logps/rejected": -497.0490417480469, |
|
"eval_loss": 0.5155569911003113, |
|
"eval_rewards/accuracies": 0.7579365372657776, |
|
"eval_rewards/chosen": -1.5293281078338623, |
|
"eval_rewards/margins": 0.8818953633308411, |
|
"eval_rewards/rejected": -2.4112234115600586, |
|
"eval_runtime": 243.7233, |
|
"eval_samples_per_second": 8.206, |
|
"eval_steps_per_second": 0.258, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.382138040640714e-08, |
|
"logits/chosen": 0.7708175182342529, |
|
"logits/rejected": 1.4804003238677979, |
|
"logps/chosen": -430.60675048828125, |
|
"logps/rejected": -460.25506591796875, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.51383376121521, |
|
"rewards/margins": 0.781708836555481, |
|
"rewards/rejected": -2.2955427169799805, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.678793653740632e-08, |
|
"logits/chosen": 0.9884228706359863, |
|
"logits/rejected": 1.4830214977264404, |
|
"logps/chosen": -411.5726013183594, |
|
"logps/rejected": -486.2874450683594, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.49359929561615, |
|
"rewards/margins": 0.8712177276611328, |
|
"rewards/rejected": -2.3648171424865723, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.997277433690983e-08, |
|
"logits/chosen": 0.7098981738090515, |
|
"logits/rejected": 1.4574624300003052, |
|
"logps/chosen": -431.80694580078125, |
|
"logps/rejected": -440.78448486328125, |
|
"loss": 0.533, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.4108504056930542, |
|
"rewards/margins": 0.6520459651947021, |
|
"rewards/rejected": -2.062896490097046, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.338500848029602e-08, |
|
"logits/chosen": 0.6488819122314453, |
|
"logits/rejected": 1.1337546110153198, |
|
"logps/chosen": -441.52020263671875, |
|
"logps/rejected": -506.21246337890625, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4171245098114014, |
|
"rewards/margins": 0.8641043901443481, |
|
"rewards/rejected": -2.28122878074646, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.70334495204884e-08, |
|
"logits/chosen": 0.6074367761611938, |
|
"logits/rejected": 1.0385777950286865, |
|
"logps/chosen": -400.0601806640625, |
|
"logps/rejected": -490.62701416015625, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.4023693799972534, |
|
"rewards/margins": 0.8330108523368835, |
|
"rewards/rejected": -2.2353804111480713, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.092659210462231e-08, |
|
"logits/chosen": 0.4711516499519348, |
|
"logits/rejected": 0.9624277353286743, |
|
"logps/chosen": -422.23480224609375, |
|
"logps/rejected": -472.27325439453125, |
|
"loss": 0.4845, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3671127557754517, |
|
"rewards/margins": 0.813764214515686, |
|
"rewards/rejected": -2.180877208709717, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.507260361320737e-08, |
|
"logits/chosen": 0.6579657793045044, |
|
"logits/rejected": 1.0575228929519653, |
|
"logps/chosen": -438.59564208984375, |
|
"logps/rejected": -502.49407958984375, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.4378478527069092, |
|
"rewards/margins": 0.8018784523010254, |
|
"rewards/rejected": -2.2397265434265137, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.947931323697982e-08, |
|
"logits/chosen": 0.7942476868629456, |
|
"logits/rejected": 1.3635971546173096, |
|
"logps/chosen": -420.66888427734375, |
|
"logps/rejected": -464.9019470214844, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.438265085220337, |
|
"rewards/margins": 0.8271247148513794, |
|
"rewards/rejected": -2.265389919281006, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.415420150605398e-08, |
|
"logits/chosen": 0.7373208999633789, |
|
"logits/rejected": 0.9085140228271484, |
|
"logps/chosen": -412.26177978515625, |
|
"logps/rejected": -484.4659118652344, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5069735050201416, |
|
"rewards/margins": 0.7849918603897095, |
|
"rewards/rejected": -2.2919652462005615, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9104390285376374e-08, |
|
"logits/chosen": 0.6553566455841064, |
|
"logits/rejected": 1.0975841283798218, |
|
"logps/chosen": -468.61328125, |
|
"logps/rejected": -519.8575439453125, |
|
"loss": 0.5048, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.4834080934524536, |
|
"rewards/margins": 0.8748735189437866, |
|
"rewards/rejected": -2.3582816123962402, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 0.8796811103820801, |
|
"eval_logits/rejected": 1.3869489431381226, |
|
"eval_logps/chosen": -427.48309326171875, |
|
"eval_logps/rejected": -493.06396484375, |
|
"eval_loss": 0.5121396780014038, |
|
"eval_rewards/accuracies": 0.7698412537574768, |
|
"eval_rewards/chosen": -1.4753795862197876, |
|
"eval_rewards/margins": 0.8959933519363403, |
|
"eval_rewards/rejected": -2.371372699737549, |
|
"eval_runtime": 244.0597, |
|
"eval_samples_per_second": 8.195, |
|
"eval_steps_per_second": 0.258, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.433663324986208e-08, |
|
"logits/chosen": 0.6968400478363037, |
|
"logits/rejected": 1.4569685459136963, |
|
"logps/chosen": -442.61041259765625, |
|
"logps/rejected": -456.3666076660156, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.6009132862091064, |
|
"rewards/margins": 0.6439798474311829, |
|
"rewards/rejected": -2.2448930740356445, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9857306851953897e-08, |
|
"logits/chosen": 0.6378097534179688, |
|
"logits/rejected": 1.441167950630188, |
|
"logps/chosen": -432.9095764160156, |
|
"logps/rejected": -471.34710693359375, |
|
"loss": 0.5392, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.4161417484283447, |
|
"rewards/margins": 0.9558764696121216, |
|
"rewards/rejected": -2.372018337249756, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.567240179368185e-08, |
|
"logits/chosen": 0.8674399256706238, |
|
"logits/rejected": 0.9316266179084778, |
|
"logps/chosen": -408.4041442871094, |
|
"logps/rejected": -497.81317138671875, |
|
"loss": 0.4902, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5210860967636108, |
|
"rewards/margins": 0.7341635823249817, |
|
"rewards/rejected": -2.2552497386932373, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1787515014630357e-08, |
|
"logits/chosen": 0.8804190754890442, |
|
"logits/rejected": 1.1247572898864746, |
|
"logps/chosen": -414.587646484375, |
|
"logps/rejected": -463.87310791015625, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.527111291885376, |
|
"rewards/margins": 0.6108843088150024, |
|
"rewards/rejected": -2.137995481491089, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.820784220652766e-08, |
|
"logits/chosen": 0.5601187944412231, |
|
"logits/rejected": 1.2288376092910767, |
|
"logps/chosen": -427.48760986328125, |
|
"logps/rejected": -458.49951171875, |
|
"loss": 0.5159, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.374366044998169, |
|
"rewards/margins": 0.8709263801574707, |
|
"rewards/rejected": -2.2452924251556396, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4938170864468636e-08, |
|
"logits/chosen": 0.8801227807998657, |
|
"logits/rejected": 1.2111625671386719, |
|
"logps/chosen": -430.5556640625, |
|
"logps/rejected": -500.516845703125, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.518132209777832, |
|
"rewards/margins": 0.838158905506134, |
|
"rewards/rejected": -2.3562910556793213, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1982873884064465e-08, |
|
"logits/chosen": 0.8533649444580078, |
|
"logits/rejected": 1.03411865234375, |
|
"logps/chosen": -372.19580078125, |
|
"logps/rejected": -463.11114501953125, |
|
"loss": 0.5198, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.3652175664901733, |
|
"rewards/margins": 0.9399977922439575, |
|
"rewards/rejected": -2.30521559715271, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.345903713082304e-09, |
|
"logits/chosen": 0.5068883895874023, |
|
"logits/rejected": 1.1840332746505737, |
|
"logps/chosen": -432.7618103027344, |
|
"logps/rejected": -479.28369140625, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4607574939727783, |
|
"rewards/margins": 0.7478801012039185, |
|
"rewards/rejected": -2.2086377143859863, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.030787065396865e-09, |
|
"logits/chosen": 0.3581078350543976, |
|
"logits/rejected": 0.8491541743278503, |
|
"logps/chosen": -418.20379638671875, |
|
"logps/rejected": -493.193115234375, |
|
"loss": 0.5266, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.417425274848938, |
|
"rewards/margins": 0.8820412755012512, |
|
"rewards/rejected": -2.299466609954834, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.04062020432286e-09, |
|
"logits/chosen": 0.7375041246414185, |
|
"logits/rejected": 1.3644684553146362, |
|
"logps/chosen": -425.05047607421875, |
|
"logps/rejected": -497.4638671875, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4368913173675537, |
|
"rewards/margins": 0.9173351526260376, |
|
"rewards/rejected": -2.3542263507843018, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": 0.9411238431930542, |
|
"eval_logits/rejected": 1.4498772621154785, |
|
"eval_logps/chosen": -425.3929748535156, |
|
"eval_logps/rejected": -490.2650146484375, |
|
"eval_loss": 0.5108779072761536, |
|
"eval_rewards/accuracies": 0.773809552192688, |
|
"eval_rewards/chosen": -1.4544785022735596, |
|
"eval_rewards/margins": 0.888904333114624, |
|
"eval_rewards/rejected": -2.3433828353881836, |
|
"eval_runtime": 244.5086, |
|
"eval_samples_per_second": 8.18, |
|
"eval_steps_per_second": 0.258, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.3780648016376866e-09, |
|
"logits/chosen": 0.824137806892395, |
|
"logits/rejected": 1.4844555854797363, |
|
"logps/chosen": -434.61865234375, |
|
"logps/rejected": -485.60528564453125, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.451385736465454, |
|
"rewards/margins": 0.8570237159729004, |
|
"rewards/rejected": -2.3084096908569336, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0453443778310766e-09, |
|
"logits/chosen": 0.5803619623184204, |
|
"logits/rejected": 1.347163200378418, |
|
"logps/chosen": -456.23150634765625, |
|
"logps/rejected": -499.64178466796875, |
|
"loss": 0.5039, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.5105242729187012, |
|
"rewards/margins": 0.9060274362564087, |
|
"rewards/rejected": -2.416551351547241, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0442413283435758e-09, |
|
"logits/chosen": 0.4976336359977722, |
|
"logits/rejected": 1.3075406551361084, |
|
"logps/chosen": -391.784423828125, |
|
"logps/rejected": -485.1461486816406, |
|
"loss": 0.4874, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.2704774141311646, |
|
"rewards/margins": 1.2178288698196411, |
|
"rewards/rejected": -2.4883062839508057, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.760945397705828e-10, |
|
"logits/chosen": 0.5646733045578003, |
|
"logits/rejected": 1.1906431913375854, |
|
"logps/chosen": -441.12298583984375, |
|
"logps/rejected": -493.2767639160156, |
|
"loss": 0.4969, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.583254337310791, |
|
"rewards/margins": 0.746857762336731, |
|
"rewards/rejected": -2.3301119804382324, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.17975992204056e-11, |
|
"logits/chosen": 0.8913224935531616, |
|
"logits/rejected": 1.4200079441070557, |
|
"logps/chosen": -431.2433166503906, |
|
"logps/rejected": -494.87744140625, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6311872005462646, |
|
"rewards/margins": 0.8244245648384094, |
|
"rewards/rejected": -2.4556117057800293, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5487770005670517, |
|
"train_runtime": 16595.3532, |
|
"train_samples_per_second": 3.684, |
|
"train_steps_per_second": 0.058 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|