|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5625e-07, |
|
"logits/chosen": 0.37485986948013306, |
|
"logits/rejected": 0.6487500071525574, |
|
"logps/chosen": -1078.384765625, |
|
"logps/rejected": -1101.77490234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": 0.47351381182670593, |
|
"logits/rejected": 0.5273572206497192, |
|
"logps/chosen": -1056.42822265625, |
|
"logps/rejected": -1169.265625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.3958333432674408, |
|
"rewards/chosen": -0.00091694132424891, |
|
"rewards/margins": -0.00018613642896525562, |
|
"rewards/rejected": -0.0007308049243874848, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": 0.42252635955810547, |
|
"logits/rejected": 0.49473732709884644, |
|
"logps/chosen": -1147.17236328125, |
|
"logps/rejected": -1265.768798828125, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.022750383242964745, |
|
"rewards/margins": 0.0030057504773139954, |
|
"rewards/rejected": -0.02575613185763359, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": 0.5097017884254456, |
|
"logits/rejected": 0.5685985088348389, |
|
"logps/chosen": -1142.3890380859375, |
|
"logps/rejected": -1274.76171875, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.09474565088748932, |
|
"rewards/margins": 0.013504189439117908, |
|
"rewards/rejected": -0.10824984312057495, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": 0.6262896656990051, |
|
"logits/rejected": 0.5369861125946045, |
|
"logps/chosen": -1379.107177734375, |
|
"logps/rejected": -1572.9727783203125, |
|
"loss": 0.679, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.25016888976097107, |
|
"rewards/margins": 0.05141867324709892, |
|
"rewards/rejected": -0.3015875816345215, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.949188496058089e-06, |
|
"logits/chosen": 0.595242977142334, |
|
"logits/rejected": 0.6554633378982544, |
|
"logps/chosen": -1302.7606201171875, |
|
"logps/rejected": -1508.60107421875, |
|
"loss": 0.6637, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.35188037157058716, |
|
"rewards/margins": 0.07706869393587112, |
|
"rewards/rejected": -0.4289490282535553, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": 0.6328147053718567, |
|
"logits/rejected": 0.695043683052063, |
|
"logps/chosen": -1562.052978515625, |
|
"logps/rejected": -1646.878662109375, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.4670296609401703, |
|
"rewards/margins": 0.025544878095388412, |
|
"rewards/rejected": -0.492574542760849, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7761938666470405e-06, |
|
"logits/chosen": 0.5762341618537903, |
|
"logits/rejected": 0.6952670812606812, |
|
"logps/chosen": -1321.309814453125, |
|
"logps/rejected": -1596.348876953125, |
|
"loss": 0.6667, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.27966535091400146, |
|
"rewards/margins": 0.1273583322763443, |
|
"rewards/rejected": -0.40702366828918457, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": 0.5692261457443237, |
|
"logits/rejected": 0.8441296815872192, |
|
"logps/chosen": -1434.124755859375, |
|
"logps/rejected": -1714.9644775390625, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.38832995295524597, |
|
"rewards/margins": 0.12836144864559174, |
|
"rewards/rejected": -0.5166913866996765, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4890613722044526e-06, |
|
"logits/chosen": 0.5933000445365906, |
|
"logits/rejected": 0.7363389730453491, |
|
"logps/chosen": -1363.330810546875, |
|
"logps/rejected": -1636.5830078125, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.36261868476867676, |
|
"rewards/margins": 0.11317511647939682, |
|
"rewards/rejected": -0.47579383850097656, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"logits/chosen": 0.6000683903694153, |
|
"logits/rejected": 0.7250877618789673, |
|
"logps/chosen": -1501.1116943359375, |
|
"logps/rejected": -1688.3638916015625, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.42007994651794434, |
|
"rewards/margins": 0.07729745656251907, |
|
"rewards/rejected": -0.49737733602523804, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.102189034962561e-06, |
|
"logits/chosen": 0.6664993166923523, |
|
"logits/rejected": 0.8522599935531616, |
|
"logps/chosen": -1442.220703125, |
|
"logps/rejected": -1732.263671875, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.3821481168270111, |
|
"rewards/margins": 0.13758106529712677, |
|
"rewards/rejected": -0.5197292566299438, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": 0.7149074077606201, |
|
"logits/rejected": 0.7848154902458191, |
|
"logps/chosen": -1421.830322265625, |
|
"logps/rejected": -1694.971923828125, |
|
"loss": 0.6525, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.38273271918296814, |
|
"rewards/margins": 0.12498722970485687, |
|
"rewards/rejected": -0.5077199935913086, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.634976249348867e-06, |
|
"logits/chosen": 0.709136962890625, |
|
"logits/rejected": 0.7118825912475586, |
|
"logps/chosen": -1674.842041015625, |
|
"logps/rejected": -1917.5904541015625, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.5186460614204407, |
|
"rewards/margins": 0.10890078544616699, |
|
"rewards/rejected": -0.6275469064712524, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"logits/chosen": 0.6097074747085571, |
|
"logits/rejected": 0.8623794317245483, |
|
"logps/chosen": -1559.768798828125, |
|
"logps/rejected": -1758.3902587890625, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.446970134973526, |
|
"rewards/margins": 0.08759806305170059, |
|
"rewards/rejected": -0.534568190574646, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1108510153447352e-06, |
|
"logits/chosen": 0.567806601524353, |
|
"logits/rejected": 0.8320453763008118, |
|
"logps/chosen": -1542.0999755859375, |
|
"logps/rejected": -1677.0787353515625, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.41464248299598694, |
|
"rewards/margins": 0.05952323600649834, |
|
"rewards/rejected": -0.47416573762893677, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": 0.7906177639961243, |
|
"logits/rejected": 0.7841562628746033, |
|
"logps/chosen": -1429.5552978515625, |
|
"logps/rejected": -1619.2171630859375, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.3786751627922058, |
|
"rewards/margins": 0.09149602800607681, |
|
"rewards/rejected": -0.47017115354537964, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.556095160739513e-06, |
|
"logits/chosen": 0.7081605195999146, |
|
"logits/rejected": 0.6871576905250549, |
|
"logps/chosen": -1445.623779296875, |
|
"logps/rejected": -1650.127197265625, |
|
"loss": 0.6577, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.41454702615737915, |
|
"rewards/margins": 0.10776009410619736, |
|
"rewards/rejected": -0.5223071575164795, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"logits/chosen": 0.7904581427574158, |
|
"logits/rejected": 0.7038453817367554, |
|
"logps/chosen": -1671.363525390625, |
|
"logps/rejected": -1868.4300537109375, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.5421901941299438, |
|
"rewards/margins": 0.10026909410953522, |
|
"rewards/rejected": -0.6424592733383179, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9985264605418185e-06, |
|
"logits/chosen": 0.6540366411209106, |
|
"logits/rejected": 0.7285584807395935, |
|
"logps/chosen": -1518.58935546875, |
|
"logps/rejected": -1726.029052734375, |
|
"loss": 0.652, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.4512515962123871, |
|
"rewards/margins": 0.09267839789390564, |
|
"rewards/rejected": -0.5439299941062927, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": 0.879226565361023, |
|
"logits/rejected": 0.8555147051811218, |
|
"logps/chosen": -1627.2894287109375, |
|
"logps/rejected": -1928.3841552734375, |
|
"loss": 0.6602, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.5301727056503296, |
|
"rewards/margins": 0.161948561668396, |
|
"rewards/rejected": -0.6921212673187256, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.466103737583699e-06, |
|
"logits/chosen": 0.6861797571182251, |
|
"logits/rejected": 0.9016023874282837, |
|
"logps/chosen": -1539.524658203125, |
|
"logps/rejected": -1874.627685546875, |
|
"loss": 0.659, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.4866175651550293, |
|
"rewards/margins": 0.17361479997634888, |
|
"rewards/rejected": -0.6602323651313782, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.217751806485235e-06, |
|
"logits/chosen": 0.7002454996109009, |
|
"logits/rejected": 0.8032233119010925, |
|
"logps/chosen": -1692.8636474609375, |
|
"logps/rejected": -1974.6669921875, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.5267156958580017, |
|
"rewards/margins": 0.14205826818943024, |
|
"rewards/rejected": -0.6687740087509155, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.855248903979505e-07, |
|
"logits/chosen": 0.7965744733810425, |
|
"logits/rejected": 0.7885487079620361, |
|
"logps/chosen": -1604.53466796875, |
|
"logps/rejected": -1794.8245849609375, |
|
"loss": 0.6672, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.48480549454689026, |
|
"rewards/margins": 0.0899248868227005, |
|
"rewards/rejected": -0.574730396270752, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": 0.7397829294204712, |
|
"logits/rejected": 0.8248909711837769, |
|
"logps/chosen": -1530.4615478515625, |
|
"logps/rejected": -1826.6988525390625, |
|
"loss": 0.654, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.41139334440231323, |
|
"rewards/margins": 0.1389993578195572, |
|
"rewards/rejected": -0.550392746925354, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.808881491049723e-07, |
|
"logits/chosen": 0.6129003763198853, |
|
"logits/rejected": 0.8366864919662476, |
|
"logps/chosen": -1451.37890625, |
|
"logps/rejected": -1729.635986328125, |
|
"loss": 0.6728, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.40951260924339294, |
|
"rewards/margins": 0.1274668127298355, |
|
"rewards/rejected": -0.5369793772697449, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"logits/chosen": 0.7178203463554382, |
|
"logits/rejected": 0.8371836543083191, |
|
"logps/chosen": -1524.3497314453125, |
|
"logps/rejected": -1778.346923828125, |
|
"loss": 0.6495, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.4218766689300537, |
|
"rewards/margins": 0.12465916574001312, |
|
"rewards/rejected": -0.546535849571228, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7248368952908055e-07, |
|
"logits/chosen": 0.6586390733718872, |
|
"logits/rejected": 0.8500372767448425, |
|
"logps/chosen": -1523.057373046875, |
|
"logps/rejected": -1753.3544921875, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.43091854453086853, |
|
"rewards/margins": 0.11140650510787964, |
|
"rewards/rejected": -0.5423250198364258, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": 0.7076243162155151, |
|
"logits/rejected": 0.7846710681915283, |
|
"logps/chosen": -1433.0615234375, |
|
"logps/rejected": -1673.491455078125, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.39409512281417847, |
|
"rewards/margins": 0.11671394109725952, |
|
"rewards/rejected": -0.510809063911438, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.577619905828281e-08, |
|
"logits/chosen": 0.6640155911445618, |
|
"logits/rejected": 0.7537108659744263, |
|
"logps/chosen": -1413.724853515625, |
|
"logps/rejected": -1650.0406494140625, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.38546374440193176, |
|
"rewards/margins": 0.11493394523859024, |
|
"rewards/rejected": -0.5003976821899414, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.262559558016325e-08, |
|
"logits/chosen": 0.635712742805481, |
|
"logits/rejected": 0.8228713274002075, |
|
"logps/chosen": -1463.5419921875, |
|
"logps/rejected": -1637.050048828125, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.41111892461776733, |
|
"rewards/margins": 0.08340780436992645, |
|
"rewards/rejected": -0.4945267140865326, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.294126437336734e-10, |
|
"logits/chosen": 0.6655277013778687, |
|
"logits/rejected": 0.759280800819397, |
|
"logps/chosen": -1619.28857421875, |
|
"logps/rejected": -1727.4703369140625, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.4628829061985016, |
|
"rewards/margins": 0.05492577701807022, |
|
"rewards/rejected": -0.517808735370636, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 312, |
|
"total_flos": 0.0, |
|
"train_loss": 0.666122229435505, |
|
"train_runtime": 4198.8516, |
|
"train_samples_per_second": 4.763, |
|
"train_steps_per_second": 0.074 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|