|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994767137624281, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333333e-09, |
|
"logits/chosen": -4.156338691711426, |
|
"logits/rejected": -4.146947383880615, |
|
"logps/chosen": -276.527099609375, |
|
"logps/rejected": -253.10324096679688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -4.072511672973633, |
|
"logits/rejected": -4.162118911743164, |
|
"logps/chosen": -398.7220764160156, |
|
"logps/rejected": -310.1617736816406, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.0006214036839082837, |
|
"rewards/margins": 0.0005763211520388722, |
|
"rewards/rejected": 4.508249185164459e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -4.09468412399292, |
|
"logits/rejected": -4.120311737060547, |
|
"logps/chosen": -304.5065612792969, |
|
"logps/rejected": -286.7430725097656, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0015097814612090588, |
|
"rewards/margins": -0.0002035536599578336, |
|
"rewards/rejected": 0.001713335164822638, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -4.106563568115234, |
|
"logits/rejected": -4.1498003005981445, |
|
"logps/chosen": -371.30047607421875, |
|
"logps/rejected": -341.87310791015625, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.012897265143692493, |
|
"rewards/margins": 0.008245532400906086, |
|
"rewards/rejected": 0.0046517327427864075, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -4.051548480987549, |
|
"logits/rejected": -4.108038425445557, |
|
"logps/chosen": -362.06304931640625, |
|
"logps/rejected": -345.527099609375, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.03629542142152786, |
|
"rewards/margins": 0.018107738345861435, |
|
"rewards/rejected": 0.018187683075666428, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -4.083974361419678, |
|
"logits/rejected": -4.089003562927246, |
|
"logps/chosen": -328.7674865722656, |
|
"logps/rejected": -337.3554382324219, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.07210709154605865, |
|
"rewards/margins": 0.03594246506690979, |
|
"rewards/rejected": 0.03616461902856827, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -4.070071220397949, |
|
"logits/rejected": -4.125251293182373, |
|
"logps/chosen": -364.65863037109375, |
|
"logps/rejected": -336.72119140625, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.14206922054290771, |
|
"rewards/margins": 0.05489688366651535, |
|
"rewards/rejected": 0.08717232942581177, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -4.12211799621582, |
|
"logits/rejected": -4.217160701751709, |
|
"logps/chosen": -381.5180358886719, |
|
"logps/rejected": -344.3231506347656, |
|
"loss": 0.6475, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.19605371356010437, |
|
"rewards/margins": 0.1255684494972229, |
|
"rewards/rejected": 0.07048525661230087, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -3.8739821910858154, |
|
"logits/rejected": -3.94097900390625, |
|
"logps/chosen": -425.6156311035156, |
|
"logps/rejected": -385.5126037597656, |
|
"loss": 0.6165, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.13462719321250916, |
|
"rewards/margins": 0.24346613883972168, |
|
"rewards/rejected": -0.10883896052837372, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -3.8873085975646973, |
|
"logits/rejected": -3.9699835777282715, |
|
"logps/chosen": -428.96026611328125, |
|
"logps/rejected": -417.627197265625, |
|
"loss": 0.617, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.02063266560435295, |
|
"rewards/margins": 0.2929636836051941, |
|
"rewards/rejected": -0.27233099937438965, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999732492681437e-07, |
|
"logits/chosen": -3.9544475078582764, |
|
"logits/rejected": -3.9958584308624268, |
|
"logps/chosen": -388.5050964355469, |
|
"logps/rejected": -426.59765625, |
|
"loss": 0.6051, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.17338475584983826, |
|
"rewards/margins": 0.28527265787124634, |
|
"rewards/rejected": -0.458657443523407, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -3.8776931762695312, |
|
"eval_logits/rejected": -3.979984760284424, |
|
"eval_logps/chosen": -407.94769287109375, |
|
"eval_logps/rejected": -389.8706970214844, |
|
"eval_loss": 0.5848276615142822, |
|
"eval_rewards/accuracies": 0.6919999718666077, |
|
"eval_rewards/chosen": -0.22141031920909882, |
|
"eval_rewards/margins": 0.3733499050140381, |
|
"eval_rewards/rejected": -0.5947602391242981, |
|
"eval_runtime": 202.6123, |
|
"eval_samples_per_second": 9.871, |
|
"eval_steps_per_second": 0.617, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996723692767926e-07, |
|
"logits/chosen": -4.082424640655518, |
|
"logits/rejected": -4.156807899475098, |
|
"logps/chosen": -365.0426025390625, |
|
"logps/rejected": -362.7803649902344, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3058136999607086, |
|
"rewards/margins": 0.49873948097229004, |
|
"rewards/rejected": -0.8045531511306763, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990375746213598e-07, |
|
"logits/chosen": -4.234003067016602, |
|
"logits/rejected": -4.319502830505371, |
|
"logps/chosen": -424.35980224609375, |
|
"logps/rejected": -453.1839904785156, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.47503072023391724, |
|
"rewards/margins": 0.55732262134552, |
|
"rewards/rejected": -1.0323532819747925, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980697142834314e-07, |
|
"logits/chosen": -4.125961780548096, |
|
"logits/rejected": -4.213648319244385, |
|
"logps/chosen": -462.6040954589844, |
|
"logps/rejected": -459.0274963378906, |
|
"loss": 0.5456, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.616773247718811, |
|
"rewards/margins": 0.5351831912994385, |
|
"rewards/rejected": -1.15195631980896, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967700826904229e-07, |
|
"logits/chosen": -4.07401704788208, |
|
"logits/rejected": -4.130145072937012, |
|
"logps/chosen": -381.79718017578125, |
|
"logps/rejected": -404.578369140625, |
|
"loss": 0.5092, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7604867219924927, |
|
"rewards/margins": 0.5355066657066345, |
|
"rewards/rejected": -1.2959933280944824, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.951404179843962e-07, |
|
"logits/chosen": -3.923344850540161, |
|
"logits/rejected": -3.991914749145508, |
|
"logps/chosen": -435.2518005371094, |
|
"logps/rejected": -472.1197204589844, |
|
"loss": 0.5597, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4754977226257324, |
|
"rewards/margins": 0.5571349859237671, |
|
"rewards/rejected": -1.032632827758789, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931828996974498e-07, |
|
"logits/chosen": -3.7589492797851562, |
|
"logits/rejected": -3.853282928466797, |
|
"logps/chosen": -413.491455078125, |
|
"logps/rejected": -455.9912109375, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14779019355773926, |
|
"rewards/margins": 0.467332661151886, |
|
"rewards/rejected": -0.6151228547096252, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.909001458367866e-07, |
|
"logits/chosen": -3.895158290863037, |
|
"logits/rejected": -4.054537773132324, |
|
"logps/chosen": -404.14532470703125, |
|
"logps/rejected": -403.3351135253906, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5085878968238831, |
|
"rewards/margins": 0.7046168446540833, |
|
"rewards/rejected": -1.2132047414779663, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882952093833627e-07, |
|
"logits/chosen": -3.9119632244110107, |
|
"logits/rejected": -3.9340500831604004, |
|
"logps/chosen": -415.5750427246094, |
|
"logps/rejected": -543.4241333007812, |
|
"loss": 0.4752, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0291075706481934, |
|
"rewards/margins": 0.9823731184005737, |
|
"rewards/rejected": -2.0114803314208984, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.853715742087946e-07, |
|
"logits/chosen": -3.9852688312530518, |
|
"logits/rejected": -4.081984519958496, |
|
"logps/chosen": -480.1775817871094, |
|
"logps/rejected": -522.3671264648438, |
|
"loss": 0.4535, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1266310214996338, |
|
"rewards/margins": 0.9308539628982544, |
|
"rewards/rejected": -2.0574851036071777, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.821331504159906e-07, |
|
"logits/chosen": -4.0369672775268555, |
|
"logits/rejected": -4.075179576873779, |
|
"logps/chosen": -509.94189453125, |
|
"logps/rejected": -613.8836059570312, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5363746881484985, |
|
"rewards/margins": 0.8696328401565552, |
|
"rewards/rejected": -2.406007766723633, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -3.8175740242004395, |
|
"eval_logits/rejected": -3.9154281616210938, |
|
"eval_logps/chosen": -545.1560668945312, |
|
"eval_logps/rejected": -580.6380004882812, |
|
"eval_loss": 0.5025292634963989, |
|
"eval_rewards/accuracies": 0.7160000205039978, |
|
"eval_rewards/chosen": -1.5934933423995972, |
|
"eval_rewards/margins": 0.9089404940605164, |
|
"eval_rewards/rejected": -2.5024337768554688, |
|
"eval_runtime": 203.658, |
|
"eval_samples_per_second": 9.82, |
|
"eval_steps_per_second": 0.614, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.785842691097342e-07, |
|
"logits/chosen": -3.9959654808044434, |
|
"logits/rejected": -4.14572811126709, |
|
"logps/chosen": -542.1124267578125, |
|
"logps/rejected": -513.849853515625, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1273362636566162, |
|
"rewards/margins": 1.0702550411224365, |
|
"rewards/rejected": -2.1975910663604736, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7472967660421603e-07, |
|
"logits/chosen": -3.90177583694458, |
|
"logits/rejected": -3.9888691902160645, |
|
"logps/chosen": -448.62091064453125, |
|
"logps/rejected": -494.3854064941406, |
|
"loss": 0.4834, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.7570297718048096, |
|
"rewards/margins": 0.9269709587097168, |
|
"rewards/rejected": -1.6840009689331055, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.705745280752585e-07, |
|
"logits/chosen": -3.9159817695617676, |
|
"logits/rejected": -4.030351161956787, |
|
"logps/chosen": -501.67938232421875, |
|
"logps/rejected": -505.40618896484375, |
|
"loss": 0.4834, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1143962144851685, |
|
"rewards/margins": 0.7790688276290894, |
|
"rewards/rejected": -1.8934648036956787, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6612438066572555e-07, |
|
"logits/chosen": -3.872812271118164, |
|
"logits/rejected": -3.9638118743896484, |
|
"logps/chosen": -486.70751953125, |
|
"logps/rejected": -500.75518798828125, |
|
"loss": 0.4416, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2733886241912842, |
|
"rewards/margins": 0.9924365282058716, |
|
"rewards/rejected": -2.2658252716064453, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.6138518605333664e-07, |
|
"logits/chosen": -3.8824219703674316, |
|
"logits/rejected": -3.9245293140411377, |
|
"logps/chosen": -505.47711181640625, |
|
"logps/rejected": -631.7078247070312, |
|
"loss": 0.4569, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.439082145690918, |
|
"rewards/margins": 1.2419004440307617, |
|
"rewards/rejected": -2.680982828140259, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5636328249082514e-07, |
|
"logits/chosen": -4.006925106048584, |
|
"logits/rejected": -4.075568199157715, |
|
"logps/chosen": -515.6424560546875, |
|
"logps/rejected": -610.078125, |
|
"loss": 0.4646, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.567647933959961, |
|
"rewards/margins": 0.9515784382820129, |
|
"rewards/rejected": -2.519226551055908, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.510653863290871e-07, |
|
"logits/chosen": -4.042003154754639, |
|
"logits/rejected": -4.134153842926025, |
|
"logps/chosen": -530.5443115234375, |
|
"logps/rejected": -585.4014892578125, |
|
"loss": 0.4713, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.408792495727539, |
|
"rewards/margins": 1.231185793876648, |
|
"rewards/rejected": -2.6399781703948975, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4549858303465737e-07, |
|
"logits/chosen": -3.96376371383667, |
|
"logits/rejected": -3.9938507080078125, |
|
"logps/chosen": -508.6741638183594, |
|
"logps/rejected": -597.8483276367188, |
|
"loss": 0.4507, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4071502685546875, |
|
"rewards/margins": 1.0225669145584106, |
|
"rewards/rejected": -2.4297173023223877, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.396703177135261e-07, |
|
"logits/chosen": -3.989154815673828, |
|
"logits/rejected": -4.069916248321533, |
|
"logps/chosen": -498.7647399902344, |
|
"logps/rejected": -517.9674072265625, |
|
"loss": 0.452, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.4257739782333374, |
|
"rewards/margins": 0.752226710319519, |
|
"rewards/rejected": -2.1780009269714355, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.335883851539693e-07, |
|
"logits/chosen": -3.9300217628479004, |
|
"logits/rejected": -4.022156238555908, |
|
"logps/chosen": -441.078369140625, |
|
"logps/rejected": -534.4451904296875, |
|
"loss": 0.4489, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3399455547332764, |
|
"rewards/margins": 1.284125566482544, |
|
"rewards/rejected": -2.6240711212158203, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -3.670259714126587, |
|
"eval_logits/rejected": -3.7593655586242676, |
|
"eval_logps/chosen": -542.0072021484375, |
|
"eval_logps/rejected": -591.3610229492188, |
|
"eval_loss": 0.46141302585601807, |
|
"eval_rewards/accuracies": 0.7760000228881836, |
|
"eval_rewards/chosen": -1.5620052814483643, |
|
"eval_rewards/margins": 1.0476588010787964, |
|
"eval_rewards/rejected": -2.609663963317871, |
|
"eval_runtime": 204.4704, |
|
"eval_samples_per_second": 9.781, |
|
"eval_steps_per_second": 0.611, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.272609194017105e-07, |
|
"logits/chosen": -3.7294070720672607, |
|
"logits/rejected": -3.789606809616089, |
|
"logps/chosen": -506.30218505859375, |
|
"logps/rejected": -687.495849609375, |
|
"loss": 0.4155, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.452553629875183, |
|
"rewards/margins": 1.6206657886505127, |
|
"rewards/rejected": -3.0732195377349854, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.2069638288135547e-07, |
|
"logits/chosen": -3.735316753387451, |
|
"logits/rejected": -3.802316188812256, |
|
"logps/chosen": -556.4932861328125, |
|
"logps/rejected": -685.213134765625, |
|
"loss": 0.4397, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5745762586593628, |
|
"rewards/margins": 1.468933343887329, |
|
"rewards/rejected": -3.0435097217559814, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.139035550786494e-07, |
|
"logits/chosen": -3.783989667892456, |
|
"logits/rejected": -3.8774330615997314, |
|
"logps/chosen": -530.0714111328125, |
|
"logps/rejected": -536.76123046875, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6511234045028687, |
|
"rewards/margins": 1.1192381381988525, |
|
"rewards/rejected": -2.7703614234924316, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0689152079869306e-07, |
|
"logits/chosen": -3.9041965007781982, |
|
"logits/rejected": -3.946739912033081, |
|
"logps/chosen": -430.56365966796875, |
|
"logps/rejected": -504.49786376953125, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.466640591621399, |
|
"rewards/margins": 0.9467592239379883, |
|
"rewards/rejected": -2.4133996963500977, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.99669658015821e-07, |
|
"logits/chosen": -3.7613883018493652, |
|
"logits/rejected": -3.790003538131714, |
|
"logps/chosen": -495.4722595214844, |
|
"logps/rejected": -638.2640380859375, |
|
"loss": 0.4831, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.7402899265289307, |
|
"rewards/margins": 1.1566716432571411, |
|
"rewards/rejected": -2.8969614505767822, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.92247625331392e-07, |
|
"logits/chosen": -3.937546968460083, |
|
"logits/rejected": -3.993028163909912, |
|
"logps/chosen": -548.022216796875, |
|
"logps/rejected": -609.3221435546875, |
|
"loss": 0.4675, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.875231385231018, |
|
"rewards/margins": 0.993303656578064, |
|
"rewards/rejected": -2.868535280227661, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.846353490562664e-07, |
|
"logits/chosen": -3.8289928436279297, |
|
"logits/rejected": -3.8607897758483887, |
|
"logps/chosen": -511.55853271484375, |
|
"logps/rejected": -626.59814453125, |
|
"loss": 0.4382, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6782903671264648, |
|
"rewards/margins": 0.9424189329147339, |
|
"rewards/rejected": -2.6207094192504883, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.768430099352445e-07, |
|
"logits/chosen": -3.8665966987609863, |
|
"logits/rejected": -3.9191222190856934, |
|
"logps/chosen": -600.85400390625, |
|
"logps/rejected": -636.7755126953125, |
|
"loss": 0.4414, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.972818374633789, |
|
"rewards/margins": 0.9349247217178345, |
|
"rewards/rejected": -2.907742977142334, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6888102953122304e-07, |
|
"logits/chosen": -3.8797574043273926, |
|
"logits/rejected": -3.9397029876708984, |
|
"logps/chosen": -541.1205444335938, |
|
"logps/rejected": -635.4256591796875, |
|
"loss": 0.4324, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.8883755207061768, |
|
"rewards/margins": 1.541088581085205, |
|
"rewards/rejected": -3.4294638633728027, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.607600562872785e-07, |
|
"logits/chosen": -3.781221389770508, |
|
"logits/rejected": -3.8847899436950684, |
|
"logps/chosen": -629.6137084960938, |
|
"logps/rejected": -632.5733032226562, |
|
"loss": 0.4359, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.1249642372131348, |
|
"rewards/margins": 1.0126584768295288, |
|
"rewards/rejected": -3.137622833251953, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -3.6220741271972656, |
|
"eval_logits/rejected": -3.702164888381958, |
|
"eval_logps/chosen": -594.591796875, |
|
"eval_logps/rejected": -651.9946899414062, |
|
"eval_loss": 0.4466642141342163, |
|
"eval_rewards/accuracies": 0.7680000066757202, |
|
"eval_rewards/chosen": -2.087851047515869, |
|
"eval_rewards/margins": 1.1281490325927734, |
|
"eval_rewards/rejected": -3.2160003185272217, |
|
"eval_runtime": 201.9384, |
|
"eval_samples_per_second": 9.904, |
|
"eval_steps_per_second": 0.619, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5249095128531856e-07, |
|
"logits/chosen": -3.7463316917419434, |
|
"logits/rejected": -3.8346214294433594, |
|
"logps/chosen": -645.6383666992188, |
|
"logps/rejected": -699.442626953125, |
|
"loss": 0.4482, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9998807907104492, |
|
"rewards/margins": 1.0837665796279907, |
|
"rewards/rejected": -3.0836472511291504, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4408477372034736e-07, |
|
"logits/chosen": -3.732384204864502, |
|
"logits/rejected": -3.813812255859375, |
|
"logps/chosen": -501.06536865234375, |
|
"logps/rejected": -586.9138793945312, |
|
"loss": 0.4336, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9944934844970703, |
|
"rewards/margins": 1.117249846458435, |
|
"rewards/rejected": -3.111743211746216, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3555276610977276e-07, |
|
"logits/chosen": -3.799959659576416, |
|
"logits/rejected": -3.7980499267578125, |
|
"logps/chosen": -466.7644958496094, |
|
"logps/rejected": -580.1270751953125, |
|
"loss": 0.4684, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.807948350906372, |
|
"rewards/margins": 1.1490823030471802, |
|
"rewards/rejected": -2.9570305347442627, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.269063392575352e-07, |
|
"logits/chosen": -3.8081250190734863, |
|
"logits/rejected": -3.8595759868621826, |
|
"logps/chosen": -476.84326171875, |
|
"logps/rejected": -597.8814697265625, |
|
"loss": 0.4459, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5761287212371826, |
|
"rewards/margins": 1.3166580200195312, |
|
"rewards/rejected": -2.892787218093872, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1815705699316964e-07, |
|
"logits/chosen": -3.71815824508667, |
|
"logits/rejected": -3.7602291107177734, |
|
"logps/chosen": -502.3971252441406, |
|
"logps/rejected": -605.4251708984375, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5930547714233398, |
|
"rewards/margins": 1.314320683479309, |
|
"rewards/rejected": -2.9073758125305176, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.0931662070620794e-07, |
|
"logits/chosen": -3.741687774658203, |
|
"logits/rejected": -3.811591625213623, |
|
"logps/chosen": -549.6846923828125, |
|
"logps/rejected": -649.2257080078125, |
|
"loss": 0.4345, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.719300627708435, |
|
"rewards/margins": 1.1254762411117554, |
|
"rewards/rejected": -2.8447766304016113, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.003968536966078e-07, |
|
"logits/chosen": -3.716046094894409, |
|
"logits/rejected": -3.803307056427002, |
|
"logps/chosen": -572.4447631835938, |
|
"logps/rejected": -625.49755859375, |
|
"loss": 0.4451, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.8884143829345703, |
|
"rewards/margins": 1.2486246824264526, |
|
"rewards/rejected": -3.1370389461517334, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9140968536213693e-07, |
|
"logits/chosen": -3.867417573928833, |
|
"logits/rejected": -3.8436825275421143, |
|
"logps/chosen": -478.40789794921875, |
|
"logps/rejected": -613.2086791992188, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.025106906890869, |
|
"rewards/margins": 1.0343272686004639, |
|
"rewards/rejected": -3.059434175491333, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.823671352438608e-07, |
|
"logits/chosen": -3.8223514556884766, |
|
"logits/rejected": -3.864264965057373, |
|
"logps/chosen": -514.0059814453125, |
|
"logps/rejected": -626.1476440429688, |
|
"loss": 0.4292, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7328517436981201, |
|
"rewards/margins": 1.1822339296340942, |
|
"rewards/rejected": -2.915085554122925, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.73281296951072e-07, |
|
"logits/chosen": -3.8355319499969482, |
|
"logits/rejected": -3.8868179321289062, |
|
"logps/chosen": -587.6881103515625, |
|
"logps/rejected": -719.8489990234375, |
|
"loss": 0.4271, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.061047315597534, |
|
"rewards/margins": 1.5319583415985107, |
|
"rewards/rejected": -3.593005418777466, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -3.7408037185668945, |
|
"eval_logits/rejected": -3.8189327716827393, |
|
"eval_logps/chosen": -591.3005981445312, |
|
"eval_logps/rejected": -652.2026977539062, |
|
"eval_loss": 0.4440613090991974, |
|
"eval_rewards/accuracies": 0.7839999794960022, |
|
"eval_rewards/chosen": -2.054938793182373, |
|
"eval_rewards/margins": 1.1631413698196411, |
|
"eval_rewards/rejected": -3.2180800437927246, |
|
"eval_runtime": 203.8018, |
|
"eval_samples_per_second": 9.813, |
|
"eval_steps_per_second": 0.613, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.641643219871597e-07, |
|
"logits/chosen": -3.8902289867401123, |
|
"logits/rejected": -4.004373550415039, |
|
"logps/chosen": -559.3378295898438, |
|
"logps/rejected": -618.6632690429688, |
|
"loss": 0.3992, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.054337739944458, |
|
"rewards/margins": 1.3023302555084229, |
|
"rewards/rejected": -3.3566677570343018, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.550284034980507e-07, |
|
"logits/chosen": -3.8437469005584717, |
|
"logits/rejected": -3.888261318206787, |
|
"logps/chosen": -559.3350830078125, |
|
"logps/rejected": -661.4388427734375, |
|
"loss": 0.4473, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9623550176620483, |
|
"rewards/margins": 1.2001652717590332, |
|
"rewards/rejected": -3.162520170211792, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4588575996495794e-07, |
|
"logits/chosen": -3.9187228679656982, |
|
"logits/rejected": -3.973421096801758, |
|
"logps/chosen": -602.2457885742188, |
|
"logps/rejected": -682.3179931640625, |
|
"loss": 0.42, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9900497198104858, |
|
"rewards/margins": 1.2919167280197144, |
|
"rewards/rejected": -3.2819664478302, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.367486188632446e-07, |
|
"logits/chosen": -3.9012649059295654, |
|
"logits/rejected": -3.986586093902588, |
|
"logps/chosen": -567.7922973632812, |
|
"logps/rejected": -682.8541259765625, |
|
"loss": 0.4298, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.8885074853897095, |
|
"rewards/margins": 1.357795000076294, |
|
"rewards/rejected": -3.2463021278381348, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.276292003092593e-07, |
|
"logits/chosen": -3.9662468433380127, |
|
"logits/rejected": -4.023472785949707, |
|
"logps/chosen": -548.5009765625, |
|
"logps/rejected": -603.1859130859375, |
|
"loss": 0.4332, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.9097919464111328, |
|
"rewards/margins": 1.235344648361206, |
|
"rewards/rejected": -3.145136594772339, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.185397007170141e-07, |
|
"logits/chosen": -3.96891713142395, |
|
"logits/rejected": -3.992366075515747, |
|
"logps/chosen": -527.6655883789062, |
|
"logps/rejected": -594.5096435546875, |
|
"loss": 0.4661, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.0592868328094482, |
|
"rewards/margins": 1.0083281993865967, |
|
"rewards/rejected": -3.067615032196045, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.094922764865619e-07, |
|
"logits/chosen": -3.8889777660369873, |
|
"logits/rejected": -3.9203734397888184, |
|
"logps/chosen": -530.451904296875, |
|
"logps/rejected": -649.4442749023438, |
|
"loss": 0.4476, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.085118532180786, |
|
"rewards/margins": 1.273771047592163, |
|
"rewards/rejected": -3.3588898181915283, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0049902774588797e-07, |
|
"logits/chosen": -3.9504013061523438, |
|
"logits/rejected": -4.046439170837402, |
|
"logps/chosen": -551.9801635742188, |
|
"logps/rejected": -631.9030151367188, |
|
"loss": 0.4411, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0798585414886475, |
|
"rewards/margins": 1.2492122650146484, |
|
"rewards/rejected": -3.329070568084717, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9157198216806238e-07, |
|
"logits/chosen": -3.846717119216919, |
|
"logits/rejected": -3.8746533393859863, |
|
"logps/chosen": -514.6439208984375, |
|
"logps/rejected": -652.5971069335938, |
|
"loss": 0.4229, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.9687875509262085, |
|
"rewards/margins": 1.090827465057373, |
|
"rewards/rejected": -3.059614896774292, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8272307888529274e-07, |
|
"logits/chosen": -3.835319995880127, |
|
"logits/rejected": -3.88202166557312, |
|
"logps/chosen": -595.4498291015625, |
|
"logps/rejected": -732.9796142578125, |
|
"loss": 0.4181, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.6396028995513916, |
|
"rewards/margins": 1.44252610206604, |
|
"rewards/rejected": -3.0821290016174316, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -3.717047929763794, |
|
"eval_logits/rejected": -3.7949790954589844, |
|
"eval_logps/chosen": -584.56982421875, |
|
"eval_logps/rejected": -647.177734375, |
|
"eval_loss": 0.43660393357276917, |
|
"eval_rewards/accuracies": 0.7760000228881836, |
|
"eval_rewards/chosen": -1.9876312017440796, |
|
"eval_rewards/margins": 1.1801990270614624, |
|
"eval_rewards/rejected": -3.167830467224121, |
|
"eval_runtime": 203.6736, |
|
"eval_samples_per_second": 9.82, |
|
"eval_steps_per_second": 0.614, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7396415252139288e-07, |
|
"logits/chosen": -3.9154458045959473, |
|
"logits/rejected": -3.974060535430908, |
|
"logps/chosen": -533.2646484375, |
|
"logps/rejected": -585.8008422851562, |
|
"loss": 0.4206, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9508171081542969, |
|
"rewards/margins": 1.1387238502502441, |
|
"rewards/rejected": -3.089540958404541, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6530691736402316e-07, |
|
"logits/chosen": -3.8785858154296875, |
|
"logits/rejected": -3.912694215774536, |
|
"logps/chosen": -563.8421630859375, |
|
"logps/rejected": -652.9039306640625, |
|
"loss": 0.4436, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.181243658065796, |
|
"rewards/margins": 1.337597131729126, |
|
"rewards/rejected": -3.518840789794922, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5676295169786864e-07, |
|
"logits/chosen": -3.9640355110168457, |
|
"logits/rejected": -4.0246195793151855, |
|
"logps/chosen": -543.6168823242188, |
|
"logps/rejected": -720.9603271484375, |
|
"loss": 0.3924, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.9450584650039673, |
|
"rewards/margins": 1.83893620967865, |
|
"rewards/rejected": -3.783994674682617, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.483436823197092e-07, |
|
"logits/chosen": -3.8995678424835205, |
|
"logits/rejected": -3.9963104724884033, |
|
"logps/chosen": -529.6903686523438, |
|
"logps/rejected": -631.6978149414062, |
|
"loss": 0.4512, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.9272207021713257, |
|
"rewards/margins": 1.3727694749832153, |
|
"rewards/rejected": -3.29999041557312, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4006036925609243e-07, |
|
"logits/chosen": -3.972506046295166, |
|
"logits/rejected": -4.015632152557373, |
|
"logps/chosen": -595.8973388671875, |
|
"logps/rejected": -727.6198120117188, |
|
"loss": 0.4132, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.108734369277954, |
|
"rewards/margins": 1.5102851390838623, |
|
"rewards/rejected": -3.6190192699432373, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.319240907040458e-07, |
|
"logits/chosen": -3.9822299480438232, |
|
"logits/rejected": -4.0792555809021, |
|
"logps/chosen": -610.338134765625, |
|
"logps/rejected": -650.5155639648438, |
|
"loss": 0.4142, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.233393669128418, |
|
"rewards/margins": 1.272410273551941, |
|
"rewards/rejected": -3.5058040618896484, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.239457282149695e-07, |
|
"logits/chosen": -4.008191108703613, |
|
"logits/rejected": -4.095564842224121, |
|
"logps/chosen": -588.9998168945312, |
|
"logps/rejected": -686.9320068359375, |
|
"loss": 0.4315, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.217425584793091, |
|
"rewards/margins": 1.2320573329925537, |
|
"rewards/rejected": -3.4494826793670654, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1613595214152711e-07, |
|
"logits/chosen": -3.9681782722473145, |
|
"logits/rejected": -4.009424686431885, |
|
"logps/chosen": -520.8330688476562, |
|
"logps/rejected": -595.3616943359375, |
|
"loss": 0.4556, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8725841045379639, |
|
"rewards/margins": 1.0434370040893555, |
|
"rewards/rejected": -2.9160211086273193, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0850520736699362e-07, |
|
"logits/chosen": -3.910592555999756, |
|
"logits/rejected": -3.9710609912872314, |
|
"logps/chosen": -484.9183044433594, |
|
"logps/rejected": -601.7095336914062, |
|
"loss": 0.408, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8240716457366943, |
|
"rewards/margins": 1.474691390991211, |
|
"rewards/rejected": -3.2987632751464844, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0106369933615042e-07, |
|
"logits/chosen": -3.870868682861328, |
|
"logits/rejected": -3.974864959716797, |
|
"logps/chosen": -537.2079467773438, |
|
"logps/rejected": -636.1268310546875, |
|
"loss": 0.4, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.947430968284607, |
|
"rewards/margins": 1.452516794204712, |
|
"rewards/rejected": -3.3999481201171875, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -3.7969613075256348, |
|
"eval_logits/rejected": -3.87385630607605, |
|
"eval_logps/chosen": -602.2762451171875, |
|
"eval_logps/rejected": -665.6045532226562, |
|
"eval_loss": 0.4316871762275696, |
|
"eval_rewards/accuracies": 0.7639999985694885, |
|
"eval_rewards/chosen": -2.1646955013275146, |
|
"eval_rewards/margins": 1.1874041557312012, |
|
"eval_rewards/rejected": -3.352099657058716, |
|
"eval_runtime": 204.7157, |
|
"eval_samples_per_second": 9.77, |
|
"eval_steps_per_second": 0.611, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.382138040640714e-08, |
|
"logits/chosen": -3.92706298828125, |
|
"logits/rejected": -3.9605202674865723, |
|
"logps/chosen": -567.6884155273438, |
|
"logps/rejected": -623.2738647460938, |
|
"loss": 0.4277, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1468379497528076, |
|
"rewards/margins": 1.0972353219985962, |
|
"rewards/rejected": -3.2440733909606934, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.678793653740632e-08, |
|
"logits/chosen": -3.9516849517822266, |
|
"logits/rejected": -4.008336544036865, |
|
"logps/chosen": -593.2915649414062, |
|
"logps/rejected": -721.9989013671875, |
|
"loss": 0.4049, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.1563327312469482, |
|
"rewards/margins": 1.5128753185272217, |
|
"rewards/rejected": -3.669208526611328, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.997277433690983e-08, |
|
"logits/chosen": -3.8838019371032715, |
|
"logits/rejected": -3.978668689727783, |
|
"logps/chosen": -610.8298950195312, |
|
"logps/rejected": -639.4989624023438, |
|
"loss": 0.4293, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.255786418914795, |
|
"rewards/margins": 1.1647707223892212, |
|
"rewards/rejected": -3.4205574989318848, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.338500848029602e-08, |
|
"logits/chosen": -3.8697731494903564, |
|
"logits/rejected": -4.00671911239624, |
|
"logps/chosen": -697.6756591796875, |
|
"logps/rejected": -759.2227783203125, |
|
"loss": 0.4093, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.504605293273926, |
|
"rewards/margins": 1.4814784526824951, |
|
"rewards/rejected": -3.986083507537842, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.70334495204884e-08, |
|
"logits/chosen": -3.8761909008026123, |
|
"logits/rejected": -3.915804386138916, |
|
"logps/chosen": -594.4487915039062, |
|
"logps/rejected": -745.0926513671875, |
|
"loss": 0.4379, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.277494430541992, |
|
"rewards/margins": 1.3665436506271362, |
|
"rewards/rejected": -3.644038438796997, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.092659210462231e-08, |
|
"logits/chosen": -3.8564085960388184, |
|
"logits/rejected": -3.92138671875, |
|
"logps/chosen": -573.0128173828125, |
|
"logps/rejected": -675.12060546875, |
|
"loss": 0.394, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.239450693130493, |
|
"rewards/margins": 1.328932285308838, |
|
"rewards/rejected": -3.568382740020752, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.507260361320737e-08, |
|
"logits/chosen": -3.944653034210205, |
|
"logits/rejected": -3.991550922393799, |
|
"logps/chosen": -670.3541870117188, |
|
"logps/rejected": -742.4327392578125, |
|
"loss": 0.4177, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.406642198562622, |
|
"rewards/margins": 1.1013365983963013, |
|
"rewards/rejected": -3.5079784393310547, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.947931323697982e-08, |
|
"logits/chosen": -3.9694862365722656, |
|
"logits/rejected": -3.969531297683716, |
|
"logps/chosen": -480.57232666015625, |
|
"logps/rejected": -576.7296142578125, |
|
"loss": 0.4341, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.20851469039917, |
|
"rewards/margins": 0.9807012677192688, |
|
"rewards/rejected": -3.189215898513794, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.415420150605398e-08, |
|
"logits/chosen": -3.9178504943847656, |
|
"logits/rejected": -3.8871982097625732, |
|
"logps/chosen": -574.5850830078125, |
|
"logps/rejected": -722.9913330078125, |
|
"loss": 0.4176, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.2402052879333496, |
|
"rewards/margins": 1.2347979545593262, |
|
"rewards/rejected": -3.475003480911255, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9104390285376374e-08, |
|
"logits/chosen": -3.819741725921631, |
|
"logits/rejected": -3.860565185546875, |
|
"logps/chosen": -664.21142578125, |
|
"logps/rejected": -710.8525390625, |
|
"loss": 0.4123, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2596795558929443, |
|
"rewards/margins": 1.2178363800048828, |
|
"rewards/rejected": -3.4775161743164062, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": -3.7826802730560303, |
|
"eval_logits/rejected": -3.8605716228485107, |
|
"eval_logps/chosen": -606.1934204101562, |
|
"eval_logps/rejected": -675.3074951171875, |
|
"eval_loss": 0.42910608649253845, |
|
"eval_rewards/accuracies": 0.7680000066757202, |
|
"eval_rewards/chosen": -2.2038679122924805, |
|
"eval_rewards/margins": 1.245260238647461, |
|
"eval_rewards/rejected": -3.4491279125213623, |
|
"eval_runtime": 203.5751, |
|
"eval_samples_per_second": 9.824, |
|
"eval_steps_per_second": 0.614, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.433663324986208e-08, |
|
"logits/chosen": -3.9174346923828125, |
|
"logits/rejected": -3.9697937965393066, |
|
"logps/chosen": -566.511962890625, |
|
"logps/rejected": -639.9072265625, |
|
"loss": 0.4185, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.194934606552124, |
|
"rewards/margins": 1.1658843755722046, |
|
"rewards/rejected": -3.3608193397521973, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9857306851953897e-08, |
|
"logits/chosen": -3.928525447845459, |
|
"logits/rejected": -4.02262544631958, |
|
"logps/chosen": -588.645751953125, |
|
"logps/rejected": -649.748046875, |
|
"loss": 0.4511, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.0678915977478027, |
|
"rewards/margins": 1.4058529138565063, |
|
"rewards/rejected": -3.4737441539764404, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.567240179368185e-08, |
|
"logits/chosen": -3.9319825172424316, |
|
"logits/rejected": -3.9248032569885254, |
|
"logps/chosen": -541.6334228515625, |
|
"logps/rejected": -707.2698364257812, |
|
"loss": 0.4226, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.2310593128204346, |
|
"rewards/margins": 1.1511319875717163, |
|
"rewards/rejected": -3.3821911811828613, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1787515014630357e-08, |
|
"logits/chosen": -3.929730176925659, |
|
"logits/rejected": -3.983701705932617, |
|
"logps/chosen": -561.4950561523438, |
|
"logps/rejected": -643.8230590820312, |
|
"loss": 0.457, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.112525224685669, |
|
"rewards/margins": 1.0804126262664795, |
|
"rewards/rejected": -3.1929378509521484, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.820784220652766e-08, |
|
"logits/chosen": -3.8999176025390625, |
|
"logits/rejected": -3.999703884124756, |
|
"logps/chosen": -581.9009399414062, |
|
"logps/rejected": -630.4606323242188, |
|
"loss": 0.4246, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.9299805164337158, |
|
"rewards/margins": 1.3858671188354492, |
|
"rewards/rejected": -3.315847873687744, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4938170864468636e-08, |
|
"logits/chosen": -3.9592201709747314, |
|
"logits/rejected": -3.982034206390381, |
|
"logps/chosen": -560.0491333007812, |
|
"logps/rejected": -656.3305053710938, |
|
"loss": 0.4568, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.186509847640991, |
|
"rewards/margins": 1.1325315237045288, |
|
"rewards/rejected": -3.3190410137176514, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1982873884064465e-08, |
|
"logits/chosen": -3.9274532794952393, |
|
"logits/rejected": -3.9674625396728516, |
|
"logps/chosen": -524.0363159179688, |
|
"logps/rejected": -671.90966796875, |
|
"loss": 0.424, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.886129379272461, |
|
"rewards/margins": 1.563849687576294, |
|
"rewards/rejected": -3.4499785900115967, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.345903713082304e-09, |
|
"logits/chosen": -3.8932175636291504, |
|
"logits/rejected": -3.9537672996520996, |
|
"logps/chosen": -636.6458740234375, |
|
"logps/rejected": -725.6226196289062, |
|
"loss": 0.4073, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2143332958221436, |
|
"rewards/margins": 1.2432187795639038, |
|
"rewards/rejected": -3.457551956176758, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.030787065396865e-09, |
|
"logits/chosen": -3.920933961868286, |
|
"logits/rejected": -3.956883192062378, |
|
"logps/chosen": -529.4722290039062, |
|
"logps/rejected": -692.9610595703125, |
|
"loss": 0.431, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8962398767471313, |
|
"rewards/margins": 1.446508765220642, |
|
"rewards/rejected": -3.3427481651306152, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.04062020432286e-09, |
|
"logits/chosen": -3.8574442863464355, |
|
"logits/rejected": -3.9011623859405518, |
|
"logps/chosen": -620.3335571289062, |
|
"logps/rejected": -719.7374267578125, |
|
"loss": 0.4394, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.159029722213745, |
|
"rewards/margins": 1.208085298538208, |
|
"rewards/rejected": -3.367115020751953, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -3.8000898361206055, |
|
"eval_logits/rejected": -3.8776535987854004, |
|
"eval_logps/chosen": -599.057373046875, |
|
"eval_logps/rejected": -666.7249755859375, |
|
"eval_loss": 0.42921942472457886, |
|
"eval_rewards/accuracies": 0.7680000066757202, |
|
"eval_rewards/chosen": -2.132506847381592, |
|
"eval_rewards/margins": 1.230795979499817, |
|
"eval_rewards/rejected": -3.3633029460906982, |
|
"eval_runtime": 203.7413, |
|
"eval_samples_per_second": 9.816, |
|
"eval_steps_per_second": 0.614, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.3780648016376866e-09, |
|
"logits/chosen": -3.9453799724578857, |
|
"logits/rejected": -3.986725330352783, |
|
"logps/chosen": -560.10205078125, |
|
"logps/rejected": -671.8740234375, |
|
"loss": 0.4277, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.1619820594787598, |
|
"rewards/margins": 1.315500259399414, |
|
"rewards/rejected": -3.477482557296753, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0453443778310766e-09, |
|
"logits/chosen": -3.936732530593872, |
|
"logits/rejected": -3.999469041824341, |
|
"logps/chosen": -593.9410400390625, |
|
"logps/rejected": -700.7437744140625, |
|
"loss": 0.3904, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.2095024585723877, |
|
"rewards/margins": 1.3912229537963867, |
|
"rewards/rejected": -3.600724697113037, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0442413283435758e-09, |
|
"logits/chosen": -3.881744384765625, |
|
"logits/rejected": -3.9535934925079346, |
|
"logps/chosen": -482.16058349609375, |
|
"logps/rejected": -652.9393310546875, |
|
"loss": 0.3867, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.774622917175293, |
|
"rewards/margins": 1.7917388677597046, |
|
"rewards/rejected": -3.566361904144287, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.760945397705828e-10, |
|
"logits/chosen": -3.958568572998047, |
|
"logits/rejected": -4.010906219482422, |
|
"logps/chosen": -617.6387939453125, |
|
"logps/rejected": -726.321044921875, |
|
"loss": 0.4054, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.300060749053955, |
|
"rewards/margins": 1.2705519199371338, |
|
"rewards/rejected": -3.570613145828247, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.17975992204056e-11, |
|
"logits/chosen": -3.931025266647339, |
|
"logits/rejected": -3.9499351978302, |
|
"logps/chosen": -544.7789916992188, |
|
"logps/rejected": -728.7254638671875, |
|
"loss": 0.4304, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.2649009227752686, |
|
"rewards/margins": 1.7213318347930908, |
|
"rewards/rejected": -3.9862327575683594, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.46914771214829687, |
|
"train_runtime": 18559.523, |
|
"train_samples_per_second": 3.294, |
|
"train_steps_per_second": 0.051 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|