|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.259780907668231, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.40062597809076683, |
|
"grad_norm": 54.871246337890625, |
|
"learning_rate": 3.2e-07, |
|
"log_odds_chosen": -0.2863271236419678, |
|
"log_odds_ratio": -0.8682101964950562, |
|
"logits/chosen": -2.625612735748291, |
|
"logits/rejected": -2.950411558151245, |
|
"logps/chosen": -1.0862526893615723, |
|
"logps/rejected": -0.9258921146392822, |
|
"loss": 1.5757, |
|
"nll_loss": 1.4893277883529663, |
|
"rewards/accuracies": 0.26953125, |
|
"rewards/chosen": -0.10862527787685394, |
|
"rewards/margins": -0.01603606529533863, |
|
"rewards/rejected": -0.09258921444416046, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.6259780907668232, |
|
"eval_log_odds_chosen": -0.008341665379703045, |
|
"eval_log_odds_ratio": -0.7013140320777893, |
|
"eval_logits/chosen": -2.989178419113159, |
|
"eval_logits/rejected": -3.091775417327881, |
|
"eval_logps/chosen": -0.919834554195404, |
|
"eval_logps/rejected": -0.9251189827919006, |
|
"eval_loss": 1.1759616136550903, |
|
"eval_nll_loss": 1.0507723093032837, |
|
"eval_rewards/accuracies": 0.5714285969734192, |
|
"eval_rewards/chosen": -0.09198347479104996, |
|
"eval_rewards/margins": 0.0005284372018650174, |
|
"eval_rewards/rejected": -0.09251189976930618, |
|
"eval_runtime": 3.582, |
|
"eval_samples_per_second": 14.517, |
|
"eval_steps_per_second": 1.954, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8012519561815337, |
|
"grad_norm": 14.854985237121582, |
|
"learning_rate": 4.988068499954577e-07, |
|
"log_odds_chosen": -0.08584073185920715, |
|
"log_odds_ratio": -0.7622759342193604, |
|
"logits/chosen": -2.6125504970550537, |
|
"logits/rejected": -2.8110339641571045, |
|
"logps/chosen": -0.7727512121200562, |
|
"logps/rejected": -0.7502321004867554, |
|
"loss": 0.9889, |
|
"nll_loss": 0.9098491668701172, |
|
"rewards/accuracies": 0.43359375, |
|
"rewards/chosen": -0.0772751197218895, |
|
"rewards/margins": -0.0022519100457429886, |
|
"rewards/rejected": -0.07502321898937225, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.2018779342723005, |
|
"grad_norm": 12.362813949584961, |
|
"learning_rate": 4.872190029111241e-07, |
|
"log_odds_chosen": 0.6338525414466858, |
|
"log_odds_ratio": -0.46056824922561646, |
|
"logits/chosen": -2.487048387527466, |
|
"logits/rejected": -2.679857015609741, |
|
"logps/chosen": -0.6807280778884888, |
|
"logps/rejected": -1.0647395849227905, |
|
"loss": 0.7611, |
|
"nll_loss": 0.7052887082099915, |
|
"rewards/accuracies": 0.8984375, |
|
"rewards/chosen": -0.06807281076908112, |
|
"rewards/margins": 0.038401152938604355, |
|
"rewards/rejected": -0.10647396743297577, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.2519561815336462, |
|
"eval_log_odds_chosen": 1.3032406568527222, |
|
"eval_log_odds_ratio": -0.27346786856651306, |
|
"eval_logits/chosen": -2.8948192596435547, |
|
"eval_logits/rejected": -2.813701868057251, |
|
"eval_logps/chosen": -0.8319589495658875, |
|
"eval_logps/rejected": -1.7709095478057861, |
|
"eval_loss": 0.874918520450592, |
|
"eval_nll_loss": 0.8324368596076965, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.08319590240716934, |
|
"eval_rewards/margins": 0.0938950628042221, |
|
"eval_rewards/rejected": -0.17709095776081085, |
|
"eval_runtime": 3.5651, |
|
"eval_samples_per_second": 14.586, |
|
"eval_steps_per_second": 1.963, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6025039123630673, |
|
"grad_norm": 11.88025188446045, |
|
"learning_rate": 4.6384106504012665e-07, |
|
"log_odds_chosen": 1.6872429847717285, |
|
"log_odds_ratio": -0.21468885242938995, |
|
"logits/chosen": -2.4869632720947266, |
|
"logits/rejected": -2.536886215209961, |
|
"logps/chosen": -0.634860098361969, |
|
"logps/rejected": -1.7767176628112793, |
|
"loss": 0.6843, |
|
"nll_loss": 0.6487288475036621, |
|
"rewards/accuracies": 0.99609375, |
|
"rewards/chosen": -0.0634860098361969, |
|
"rewards/margins": 0.11418575048446655, |
|
"rewards/rejected": -0.17767177522182465, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.8779342723004695, |
|
"eval_log_odds_chosen": 2.711949586868286, |
|
"eval_log_odds_ratio": -0.10135732591152191, |
|
"eval_logits/chosen": -2.8942391872406006, |
|
"eval_logits/rejected": -2.879112482070923, |
|
"eval_logps/chosen": -0.8020210862159729, |
|
"eval_logps/rejected": -2.9775755405426025, |
|
"eval_loss": 0.8163785338401794, |
|
"eval_nll_loss": 0.795821487903595, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.08020210266113281, |
|
"eval_rewards/margins": 0.217555433511734, |
|
"eval_rewards/rejected": -0.2977575361728668, |
|
"eval_runtime": 3.5856, |
|
"eval_samples_per_second": 14.502, |
|
"eval_steps_per_second": 1.952, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.003129890453834, |
|
"grad_norm": 11.101346969604492, |
|
"learning_rate": 4.2983495008466273e-07, |
|
"log_odds_chosen": 2.479166030883789, |
|
"log_odds_ratio": -0.11427275836467743, |
|
"logits/chosen": -2.462007522583008, |
|
"logits/rejected": -2.5404274463653564, |
|
"logps/chosen": -0.6245267391204834, |
|
"logps/rejected": -2.437384843826294, |
|
"loss": 0.6521, |
|
"nll_loss": 0.6418842077255249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06245267391204834, |
|
"rewards/margins": 0.18128584325313568, |
|
"rewards/rejected": -0.24373850226402283, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.403755868544601, |
|
"grad_norm": 9.568058013916016, |
|
"learning_rate": 3.8689080587313755e-07, |
|
"log_odds_chosen": 2.8940343856811523, |
|
"log_odds_ratio": -0.08038710057735443, |
|
"logits/chosen": -2.4281036853790283, |
|
"logits/rejected": -2.5184569358825684, |
|
"logps/chosen": -0.587062418460846, |
|
"logps/rejected": -2.7329537868499756, |
|
"loss": 0.6314, |
|
"nll_loss": 0.5980546474456787, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05870624631643295, |
|
"rewards/margins": 0.21458914875984192, |
|
"rewards/rejected": -0.27329540252685547, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.5039123630672924, |
|
"eval_log_odds_chosen": 3.3576512336730957, |
|
"eval_log_odds_ratio": -0.06285899877548218, |
|
"eval_logits/chosen": -2.901575803756714, |
|
"eval_logits/rejected": -2.8905088901519775, |
|
"eval_logps/chosen": -0.7869912385940552, |
|
"eval_logps/rejected": -3.557527780532837, |
|
"eval_loss": 0.7937864065170288, |
|
"eval_nll_loss": 0.7795100808143616, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.07869912683963776, |
|
"eval_rewards/margins": 0.2770536541938782, |
|
"eval_rewards/rejected": -0.3557527959346771, |
|
"eval_runtime": 3.5965, |
|
"eval_samples_per_second": 14.458, |
|
"eval_steps_per_second": 1.946, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.804381846635368, |
|
"grad_norm": 10.476876258850098, |
|
"learning_rate": 3.371430118304538e-07, |
|
"log_odds_chosen": 3.5498757362365723, |
|
"log_odds_ratio": -0.05812463164329529, |
|
"logits/chosen": -2.4844484329223633, |
|
"logits/rejected": -2.5605552196502686, |
|
"logps/chosen": -0.6014833450317383, |
|
"logps/rejected": -3.397700786590576, |
|
"loss": 0.6194, |
|
"nll_loss": 0.6138021945953369, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06014833599328995, |
|
"rewards/margins": 0.27962177991867065, |
|
"rewards/rejected": -0.3397701382637024, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 3.1298904538341157, |
|
"eval_log_odds_chosen": 4.2982916831970215, |
|
"eval_log_odds_ratio": -0.03043905831873417, |
|
"eval_logits/chosen": -2.9097611904144287, |
|
"eval_logits/rejected": -2.854037046432495, |
|
"eval_logps/chosen": -0.7769914865493774, |
|
"eval_logps/rejected": -4.445803165435791, |
|
"eval_loss": 0.7800766825675964, |
|
"eval_nll_loss": 0.7686944603919983, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.07769914716482162, |
|
"eval_rewards/margins": 0.3668811619281769, |
|
"eval_rewards/rejected": -0.4445803463459015, |
|
"eval_runtime": 3.5656, |
|
"eval_samples_per_second": 14.584, |
|
"eval_steps_per_second": 1.963, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.2050078247261347, |
|
"grad_norm": 10.105437278747559, |
|
"learning_rate": 2.830640975642806e-07, |
|
"log_odds_chosen": 4.234708786010742, |
|
"log_odds_ratio": -0.03429976850748062, |
|
"logits/chosen": -2.4910290241241455, |
|
"logits/rejected": -2.5494701862335205, |
|
"logps/chosen": -0.6145447492599487, |
|
"logps/rejected": -4.0717267990112305, |
|
"loss": 0.6159, |
|
"nll_loss": 0.6162381768226624, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06145448237657547, |
|
"rewards/margins": 0.3457182049751282, |
|
"rewards/rejected": -0.40717267990112305, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 3.6056338028169015, |
|
"grad_norm": 9.233214378356934, |
|
"learning_rate": 2.2734185495055498e-07, |
|
"log_odds_chosen": 4.952095031738281, |
|
"log_odds_ratio": -0.01972360536456108, |
|
"logits/chosen": -2.4912912845611572, |
|
"logits/rejected": -2.502811908721924, |
|
"logps/chosen": -0.594947874546051, |
|
"logps/rejected": -4.724546432495117, |
|
"loss": 0.6043, |
|
"nll_loss": 0.6036252379417419, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05949478596448898, |
|
"rewards/margins": 0.41295987367630005, |
|
"rewards/rejected": -0.47245466709136963, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 3.755868544600939, |
|
"eval_log_odds_chosen": 5.4648847579956055, |
|
"eval_log_odds_ratio": -0.010219605639576912, |
|
"eval_logits/chosen": -2.894416093826294, |
|
"eval_logits/rejected": -2.8168509006500244, |
|
"eval_logps/chosen": -0.772580623626709, |
|
"eval_logps/rejected": -5.580881595611572, |
|
"eval_loss": 0.7731113433837891, |
|
"eval_nll_loss": 0.7631542086601257, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.07725805789232254, |
|
"eval_rewards/margins": 0.48083004355430603, |
|
"eval_rewards/rejected": -0.5580881237983704, |
|
"eval_runtime": 3.5679, |
|
"eval_samples_per_second": 14.574, |
|
"eval_steps_per_second": 1.962, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.006259780907668, |
|
"grad_norm": 10.460640907287598, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"log_odds_chosen": 5.956634998321533, |
|
"log_odds_ratio": -0.01081022247672081, |
|
"logits/chosen": -2.474257707595825, |
|
"logits/rejected": -2.4944746494293213, |
|
"logps/chosen": -0.584967315196991, |
|
"logps/rejected": -5.693123817443848, |
|
"loss": 0.5961, |
|
"nll_loss": 0.591893196105957, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.058496732264757156, |
|
"rewards/margins": 0.5108156800270081, |
|
"rewards/rejected": -0.5693123936653137, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.381846635367762, |
|
"eval_log_odds_chosen": 6.661163330078125, |
|
"eval_log_odds_ratio": -0.003369454061612487, |
|
"eval_logits/chosen": -2.8870294094085693, |
|
"eval_logits/rejected": -2.8113913536071777, |
|
"eval_logps/chosen": -0.7696248888969421, |
|
"eval_logps/rejected": -6.763743877410889, |
|
"eval_loss": 0.769009530544281, |
|
"eval_nll_loss": 0.7600502967834473, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.07696248590946198, |
|
"eval_rewards/margins": 0.5994119644165039, |
|
"eval_rewards/rejected": -0.6763744354248047, |
|
"eval_runtime": 3.5818, |
|
"eval_samples_per_second": 14.518, |
|
"eval_steps_per_second": 1.954, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.406885758998435, |
|
"grad_norm": 10.492298126220703, |
|
"learning_rate": 1.2198928378235715e-07, |
|
"log_odds_chosen": 6.660679817199707, |
|
"log_odds_ratio": -0.00656685046851635, |
|
"logits/chosen": -2.4854841232299805, |
|
"logits/rejected": -2.487821102142334, |
|
"logps/chosen": -0.5842890739440918, |
|
"logps/rejected": -6.385722637176514, |
|
"loss": 0.5976, |
|
"nll_loss": 0.5909620523452759, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05842890590429306, |
|
"rewards/margins": 0.5801433324813843, |
|
"rewards/rejected": -0.6385722160339355, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 4.807511737089202, |
|
"grad_norm": 10.00313949584961, |
|
"learning_rate": 7.759511406608255e-08, |
|
"log_odds_chosen": 6.706086158752441, |
|
"log_odds_ratio": -0.0058883922174572945, |
|
"logits/chosen": -2.4211370944976807, |
|
"logits/rejected": -2.4340803623199463, |
|
"logps/chosen": -0.570120632648468, |
|
"logps/rejected": -6.413407325744629, |
|
"loss": 0.584, |
|
"nll_loss": 0.5762451887130737, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05701206251978874, |
|
"rewards/margins": 0.5843286514282227, |
|
"rewards/rejected": -0.6413407921791077, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.007824726134586, |
|
"eval_log_odds_chosen": 7.109870910644531, |
|
"eval_log_odds_ratio": -0.002329548355191946, |
|
"eval_logits/chosen": -2.9075374603271484, |
|
"eval_logits/rejected": -2.810256242752075, |
|
"eval_logps/chosen": -0.7671002149581909, |
|
"eval_logps/rejected": -7.206047534942627, |
|
"eval_loss": 0.7668870091438293, |
|
"eval_nll_loss": 0.7577933073043823, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.07671000808477402, |
|
"eval_rewards/margins": 0.6438947319984436, |
|
"eval_rewards/rejected": -0.7206048369407654, |
|
"eval_runtime": 3.5587, |
|
"eval_samples_per_second": 14.612, |
|
"eval_steps_per_second": 1.967, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.208137715179968, |
|
"grad_norm": 12.630816459655762, |
|
"learning_rate": 4.176968982247514e-08, |
|
"log_odds_chosen": 7.1072587966918945, |
|
"log_odds_ratio": -0.0049699898809194565, |
|
"logits/chosen": -2.4647462368011475, |
|
"logits/rejected": -2.4573941230773926, |
|
"logps/chosen": -0.5844926834106445, |
|
"logps/rejected": -6.838533401489258, |
|
"loss": 0.5949, |
|
"nll_loss": 0.5898107290267944, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05844927579164505, |
|
"rewards/margins": 0.6254041194915771, |
|
"rewards/rejected": -0.6838533878326416, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 5.608763693270736, |
|
"grad_norm": 50.02872848510742, |
|
"learning_rate": 1.629358090099639e-08, |
|
"log_odds_chosen": 7.274372577667236, |
|
"log_odds_ratio": -0.004320599138736725, |
|
"logits/chosen": -2.405644178390503, |
|
"logits/rejected": -2.42146635055542, |
|
"logps/chosen": -0.5744296908378601, |
|
"logps/rejected": -6.983857154846191, |
|
"loss": 0.5954, |
|
"nll_loss": 0.5800217986106873, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05744296684861183, |
|
"rewards/margins": 0.6409427523612976, |
|
"rewards/rejected": -0.6983856558799744, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 5.633802816901408, |
|
"eval_log_odds_chosen": 7.28384256362915, |
|
"eval_log_odds_ratio": -0.0020152912475168705, |
|
"eval_logits/chosen": -2.903193235397339, |
|
"eval_logits/rejected": -2.808192014694214, |
|
"eval_logps/chosen": -0.767649233341217, |
|
"eval_logps/rejected": -7.380805969238281, |
|
"eval_loss": 0.7667036652565002, |
|
"eval_nll_loss": 0.7581475377082825, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.07676493376493454, |
|
"eval_rewards/margins": 0.6613157391548157, |
|
"eval_rewards/rejected": -0.738080620765686, |
|
"eval_runtime": 3.589, |
|
"eval_samples_per_second": 14.489, |
|
"eval_steps_per_second": 1.95, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.009389671361502, |
|
"grad_norm": 19.15870475769043, |
|
"learning_rate": 2.4329828146074096e-09, |
|
"log_odds_chosen": 7.12039852142334, |
|
"log_odds_ratio": -0.0058927275240421295, |
|
"logits/chosen": -2.4525253772735596, |
|
"logits/rejected": -2.446058988571167, |
|
"logps/chosen": -0.5847591757774353, |
|
"logps/rejected": -6.854161739349365, |
|
"loss": 0.589, |
|
"nll_loss": 0.589131236076355, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05847591161727905, |
|
"rewards/margins": 0.6269403100013733, |
|
"rewards/rejected": -0.6854162216186523, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.259780907668231, |
|
"grad_norm": 9.93666934967041, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 7.366458892822266, |
|
"log_odds_ratio": -0.00489471573382616, |
|
"logits/chosen": -2.495069980621338, |
|
"logits/rejected": -2.4710330963134766, |
|
"logps/chosen": -0.5860047936439514, |
|
"logps/rejected": -7.1015305519104, |
|
"loss": 0.589, |
|
"nll_loss": 0.593083381652832, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.058600474148988724, |
|
"rewards/margins": 0.6515525579452515, |
|
"rewards/rejected": -0.7101531028747559, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.259780907668231, |
|
"eval_log_odds_chosen": 7.277224063873291, |
|
"eval_log_odds_ratio": -0.0020051717292517424, |
|
"eval_logits/chosen": -2.901961088180542, |
|
"eval_logits/rejected": -2.8077356815338135, |
|
"eval_logps/chosen": -0.7674554586410522, |
|
"eval_logps/rejected": -7.3736891746521, |
|
"eval_loss": 0.7665765285491943, |
|
"eval_nll_loss": 0.7579033970832825, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.07674554735422134, |
|
"eval_rewards/margins": 0.6606234312057495, |
|
"eval_rewards/rejected": -0.7373689413070679, |
|
"eval_runtime": 3.5704, |
|
"eval_samples_per_second": 14.564, |
|
"eval_steps_per_second": 1.961, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 32, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 1, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|