avemio-digital's picture
Add files using upload-large-folder tool
23f3a4b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.259780907668231,
"eval_steps": 50,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.40062597809076683,
"grad_norm": 54.871246337890625,
"learning_rate": 3.2e-07,
"log_odds_chosen": -0.2863271236419678,
"log_odds_ratio": -0.8682101964950562,
"logits/chosen": -2.625612735748291,
"logits/rejected": -2.950411558151245,
"logps/chosen": -1.0862526893615723,
"logps/rejected": -0.9258921146392822,
"loss": 1.5757,
"nll_loss": 1.4893277883529663,
"rewards/accuracies": 0.26953125,
"rewards/chosen": -0.10862527787685394,
"rewards/margins": -0.01603606529533863,
"rewards/rejected": -0.09258921444416046,
"step": 32
},
{
"epoch": 0.6259780907668232,
"eval_log_odds_chosen": -0.008341665379703045,
"eval_log_odds_ratio": -0.7013140320777893,
"eval_logits/chosen": -2.989178419113159,
"eval_logits/rejected": -3.091775417327881,
"eval_logps/chosen": -0.919834554195404,
"eval_logps/rejected": -0.9251189827919006,
"eval_loss": 1.1759616136550903,
"eval_nll_loss": 1.0507723093032837,
"eval_rewards/accuracies": 0.5714285969734192,
"eval_rewards/chosen": -0.09198347479104996,
"eval_rewards/margins": 0.0005284372018650174,
"eval_rewards/rejected": -0.09251189976930618,
"eval_runtime": 3.582,
"eval_samples_per_second": 14.517,
"eval_steps_per_second": 1.954,
"step": 50
},
{
"epoch": 0.8012519561815337,
"grad_norm": 14.854985237121582,
"learning_rate": 4.988068499954577e-07,
"log_odds_chosen": -0.08584073185920715,
"log_odds_ratio": -0.7622759342193604,
"logits/chosen": -2.6125504970550537,
"logits/rejected": -2.8110339641571045,
"logps/chosen": -0.7727512121200562,
"logps/rejected": -0.7502321004867554,
"loss": 0.9889,
"nll_loss": 0.9098491668701172,
"rewards/accuracies": 0.43359375,
"rewards/chosen": -0.0772751197218895,
"rewards/margins": -0.0022519100457429886,
"rewards/rejected": -0.07502321898937225,
"step": 64
},
{
"epoch": 1.2018779342723005,
"grad_norm": 12.362813949584961,
"learning_rate": 4.872190029111241e-07,
"log_odds_chosen": 0.6338525414466858,
"log_odds_ratio": -0.46056824922561646,
"logits/chosen": -2.487048387527466,
"logits/rejected": -2.679857015609741,
"logps/chosen": -0.6807280778884888,
"logps/rejected": -1.0647395849227905,
"loss": 0.7611,
"nll_loss": 0.7052887082099915,
"rewards/accuracies": 0.8984375,
"rewards/chosen": -0.06807281076908112,
"rewards/margins": 0.038401152938604355,
"rewards/rejected": -0.10647396743297577,
"step": 96
},
{
"epoch": 1.2519561815336462,
"eval_log_odds_chosen": 1.3032406568527222,
"eval_log_odds_ratio": -0.27346786856651306,
"eval_logits/chosen": -2.8948192596435547,
"eval_logits/rejected": -2.813701868057251,
"eval_logps/chosen": -0.8319589495658875,
"eval_logps/rejected": -1.7709095478057861,
"eval_loss": 0.874918520450592,
"eval_nll_loss": 0.8324368596076965,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.08319590240716934,
"eval_rewards/margins": 0.0938950628042221,
"eval_rewards/rejected": -0.17709095776081085,
"eval_runtime": 3.5651,
"eval_samples_per_second": 14.586,
"eval_steps_per_second": 1.963,
"step": 100
},
{
"epoch": 1.6025039123630673,
"grad_norm": 11.88025188446045,
"learning_rate": 4.6384106504012665e-07,
"log_odds_chosen": 1.6872429847717285,
"log_odds_ratio": -0.21468885242938995,
"logits/chosen": -2.4869632720947266,
"logits/rejected": -2.536886215209961,
"logps/chosen": -0.634860098361969,
"logps/rejected": -1.7767176628112793,
"loss": 0.6843,
"nll_loss": 0.6487288475036621,
"rewards/accuracies": 0.99609375,
"rewards/chosen": -0.0634860098361969,
"rewards/margins": 0.11418575048446655,
"rewards/rejected": -0.17767177522182465,
"step": 128
},
{
"epoch": 1.8779342723004695,
"eval_log_odds_chosen": 2.711949586868286,
"eval_log_odds_ratio": -0.10135732591152191,
"eval_logits/chosen": -2.8942391872406006,
"eval_logits/rejected": -2.879112482070923,
"eval_logps/chosen": -0.8020210862159729,
"eval_logps/rejected": -2.9775755405426025,
"eval_loss": 0.8163785338401794,
"eval_nll_loss": 0.795821487903595,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.08020210266113281,
"eval_rewards/margins": 0.217555433511734,
"eval_rewards/rejected": -0.2977575361728668,
"eval_runtime": 3.5856,
"eval_samples_per_second": 14.502,
"eval_steps_per_second": 1.952,
"step": 150
},
{
"epoch": 2.003129890453834,
"grad_norm": 11.101346969604492,
"learning_rate": 4.2983495008466273e-07,
"log_odds_chosen": 2.479166030883789,
"log_odds_ratio": -0.11427275836467743,
"logits/chosen": -2.462007522583008,
"logits/rejected": -2.5404274463653564,
"logps/chosen": -0.6245267391204834,
"logps/rejected": -2.437384843826294,
"loss": 0.6521,
"nll_loss": 0.6418842077255249,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.06245267391204834,
"rewards/margins": 0.18128584325313568,
"rewards/rejected": -0.24373850226402283,
"step": 160
},
{
"epoch": 2.403755868544601,
"grad_norm": 9.568058013916016,
"learning_rate": 3.8689080587313755e-07,
"log_odds_chosen": 2.8940343856811523,
"log_odds_ratio": -0.08038710057735443,
"logits/chosen": -2.4281036853790283,
"logits/rejected": -2.5184569358825684,
"logps/chosen": -0.587062418460846,
"logps/rejected": -2.7329537868499756,
"loss": 0.6314,
"nll_loss": 0.5980546474456787,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.05870624631643295,
"rewards/margins": 0.21458914875984192,
"rewards/rejected": -0.27329540252685547,
"step": 192
},
{
"epoch": 2.5039123630672924,
"eval_log_odds_chosen": 3.3576512336730957,
"eval_log_odds_ratio": -0.06285899877548218,
"eval_logits/chosen": -2.901575803756714,
"eval_logits/rejected": -2.8905088901519775,
"eval_logps/chosen": -0.7869912385940552,
"eval_logps/rejected": -3.557527780532837,
"eval_loss": 0.7937864065170288,
"eval_nll_loss": 0.7795100808143616,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.07869912683963776,
"eval_rewards/margins": 0.2770536541938782,
"eval_rewards/rejected": -0.3557527959346771,
"eval_runtime": 3.5965,
"eval_samples_per_second": 14.458,
"eval_steps_per_second": 1.946,
"step": 200
},
{
"epoch": 2.804381846635368,
"grad_norm": 10.476876258850098,
"learning_rate": 3.371430118304538e-07,
"log_odds_chosen": 3.5498757362365723,
"log_odds_ratio": -0.05812463164329529,
"logits/chosen": -2.4844484329223633,
"logits/rejected": -2.5605552196502686,
"logps/chosen": -0.6014833450317383,
"logps/rejected": -3.397700786590576,
"loss": 0.6194,
"nll_loss": 0.6138021945953369,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.06014833599328995,
"rewards/margins": 0.27962177991867065,
"rewards/rejected": -0.3397701382637024,
"step": 224
},
{
"epoch": 3.1298904538341157,
"eval_log_odds_chosen": 4.2982916831970215,
"eval_log_odds_ratio": -0.03043905831873417,
"eval_logits/chosen": -2.9097611904144287,
"eval_logits/rejected": -2.854037046432495,
"eval_logps/chosen": -0.7769914865493774,
"eval_logps/rejected": -4.445803165435791,
"eval_loss": 0.7800766825675964,
"eval_nll_loss": 0.7686944603919983,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.07769914716482162,
"eval_rewards/margins": 0.3668811619281769,
"eval_rewards/rejected": -0.4445803463459015,
"eval_runtime": 3.5656,
"eval_samples_per_second": 14.584,
"eval_steps_per_second": 1.963,
"step": 250
},
{
"epoch": 3.2050078247261347,
"grad_norm": 10.105437278747559,
"learning_rate": 2.830640975642806e-07,
"log_odds_chosen": 4.234708786010742,
"log_odds_ratio": -0.03429976850748062,
"logits/chosen": -2.4910290241241455,
"logits/rejected": -2.5494701862335205,
"logps/chosen": -0.6145447492599487,
"logps/rejected": -4.0717267990112305,
"loss": 0.6159,
"nll_loss": 0.6162381768226624,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.06145448237657547,
"rewards/margins": 0.3457182049751282,
"rewards/rejected": -0.40717267990112305,
"step": 256
},
{
"epoch": 3.6056338028169015,
"grad_norm": 9.233214378356934,
"learning_rate": 2.2734185495055498e-07,
"log_odds_chosen": 4.952095031738281,
"log_odds_ratio": -0.01972360536456108,
"logits/chosen": -2.4912912845611572,
"logits/rejected": -2.502811908721924,
"logps/chosen": -0.594947874546051,
"logps/rejected": -4.724546432495117,
"loss": 0.6043,
"nll_loss": 0.6036252379417419,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.05949478596448898,
"rewards/margins": 0.41295987367630005,
"rewards/rejected": -0.47245466709136963,
"step": 288
},
{
"epoch": 3.755868544600939,
"eval_log_odds_chosen": 5.4648847579956055,
"eval_log_odds_ratio": -0.010219605639576912,
"eval_logits/chosen": -2.894416093826294,
"eval_logits/rejected": -2.8168509006500244,
"eval_logps/chosen": -0.772580623626709,
"eval_logps/rejected": -5.580881595611572,
"eval_loss": 0.7731113433837891,
"eval_nll_loss": 0.7631542086601257,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.07725805789232254,
"eval_rewards/margins": 0.48083004355430603,
"eval_rewards/rejected": -0.5580881237983704,
"eval_runtime": 3.5679,
"eval_samples_per_second": 14.574,
"eval_steps_per_second": 1.962,
"step": 300
},
{
"epoch": 4.006259780907668,
"grad_norm": 10.460640907287598,
"learning_rate": 1.7274575140626315e-07,
"log_odds_chosen": 5.956634998321533,
"log_odds_ratio": -0.01081022247672081,
"logits/chosen": -2.474257707595825,
"logits/rejected": -2.4944746494293213,
"logps/chosen": -0.584967315196991,
"logps/rejected": -5.693123817443848,
"loss": 0.5961,
"nll_loss": 0.591893196105957,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.058496732264757156,
"rewards/margins": 0.5108156800270081,
"rewards/rejected": -0.5693123936653137,
"step": 320
},
{
"epoch": 4.381846635367762,
"eval_log_odds_chosen": 6.661163330078125,
"eval_log_odds_ratio": -0.003369454061612487,
"eval_logits/chosen": -2.8870294094085693,
"eval_logits/rejected": -2.8113913536071777,
"eval_logps/chosen": -0.7696248888969421,
"eval_logps/rejected": -6.763743877410889,
"eval_loss": 0.769009530544281,
"eval_nll_loss": 0.7600502967834473,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.07696248590946198,
"eval_rewards/margins": 0.5994119644165039,
"eval_rewards/rejected": -0.6763744354248047,
"eval_runtime": 3.5818,
"eval_samples_per_second": 14.518,
"eval_steps_per_second": 1.954,
"step": 350
},
{
"epoch": 4.406885758998435,
"grad_norm": 10.492298126220703,
"learning_rate": 1.2198928378235715e-07,
"log_odds_chosen": 6.660679817199707,
"log_odds_ratio": -0.00656685046851635,
"logits/chosen": -2.4854841232299805,
"logits/rejected": -2.487821102142334,
"logps/chosen": -0.5842890739440918,
"logps/rejected": -6.385722637176514,
"loss": 0.5976,
"nll_loss": 0.5909620523452759,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.05842890590429306,
"rewards/margins": 0.5801433324813843,
"rewards/rejected": -0.6385722160339355,
"step": 352
},
{
"epoch": 4.807511737089202,
"grad_norm": 10.00313949584961,
"learning_rate": 7.759511406608255e-08,
"log_odds_chosen": 6.706086158752441,
"log_odds_ratio": -0.0058883922174572945,
"logits/chosen": -2.4211370944976807,
"logits/rejected": -2.4340803623199463,
"logps/chosen": -0.570120632648468,
"logps/rejected": -6.413407325744629,
"loss": 0.584,
"nll_loss": 0.5762451887130737,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.05701206251978874,
"rewards/margins": 0.5843286514282227,
"rewards/rejected": -0.6413407921791077,
"step": 384
},
{
"epoch": 5.007824726134586,
"eval_log_odds_chosen": 7.109870910644531,
"eval_log_odds_ratio": -0.002329548355191946,
"eval_logits/chosen": -2.9075374603271484,
"eval_logits/rejected": -2.810256242752075,
"eval_logps/chosen": -0.7671002149581909,
"eval_logps/rejected": -7.206047534942627,
"eval_loss": 0.7668870091438293,
"eval_nll_loss": 0.7577933073043823,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.07671000808477402,
"eval_rewards/margins": 0.6438947319984436,
"eval_rewards/rejected": -0.7206048369407654,
"eval_runtime": 3.5587,
"eval_samples_per_second": 14.612,
"eval_steps_per_second": 1.967,
"step": 400
},
{
"epoch": 5.208137715179968,
"grad_norm": 12.630816459655762,
"learning_rate": 4.176968982247514e-08,
"log_odds_chosen": 7.1072587966918945,
"log_odds_ratio": -0.0049699898809194565,
"logits/chosen": -2.4647462368011475,
"logits/rejected": -2.4573941230773926,
"logps/chosen": -0.5844926834106445,
"logps/rejected": -6.838533401489258,
"loss": 0.5949,
"nll_loss": 0.5898107290267944,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.05844927579164505,
"rewards/margins": 0.6254041194915771,
"rewards/rejected": -0.6838533878326416,
"step": 416
},
{
"epoch": 5.608763693270736,
"grad_norm": 50.02872848510742,
"learning_rate": 1.629358090099639e-08,
"log_odds_chosen": 7.274372577667236,
"log_odds_ratio": -0.004320599138736725,
"logits/chosen": -2.405644178390503,
"logits/rejected": -2.42146635055542,
"logps/chosen": -0.5744296908378601,
"logps/rejected": -6.983857154846191,
"loss": 0.5954,
"nll_loss": 0.5800217986106873,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.05744296684861183,
"rewards/margins": 0.6409427523612976,
"rewards/rejected": -0.6983856558799744,
"step": 448
},
{
"epoch": 5.633802816901408,
"eval_log_odds_chosen": 7.28384256362915,
"eval_log_odds_ratio": -0.0020152912475168705,
"eval_logits/chosen": -2.903193235397339,
"eval_logits/rejected": -2.808192014694214,
"eval_logps/chosen": -0.767649233341217,
"eval_logps/rejected": -7.380805969238281,
"eval_loss": 0.7667036652565002,
"eval_nll_loss": 0.7581475377082825,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.07676493376493454,
"eval_rewards/margins": 0.6613157391548157,
"eval_rewards/rejected": -0.738080620765686,
"eval_runtime": 3.589,
"eval_samples_per_second": 14.489,
"eval_steps_per_second": 1.95,
"step": 450
},
{
"epoch": 6.009389671361502,
"grad_norm": 19.15870475769043,
"learning_rate": 2.4329828146074096e-09,
"log_odds_chosen": 7.12039852142334,
"log_odds_ratio": -0.0058927275240421295,
"logits/chosen": -2.4525253772735596,
"logits/rejected": -2.446058988571167,
"logps/chosen": -0.5847591757774353,
"logps/rejected": -6.854161739349365,
"loss": 0.589,
"nll_loss": 0.589131236076355,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.05847591161727905,
"rewards/margins": 0.6269403100013733,
"rewards/rejected": -0.6854162216186523,
"step": 480
},
{
"epoch": 6.259780907668231,
"grad_norm": 9.93666934967041,
"learning_rate": 0.0,
"log_odds_chosen": 7.366458892822266,
"log_odds_ratio": -0.00489471573382616,
"logits/chosen": -2.495069980621338,
"logits/rejected": -2.4710330963134766,
"logps/chosen": -0.5860047936439514,
"logps/rejected": -7.1015305519104,
"loss": 0.589,
"nll_loss": 0.593083381652832,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.058600474148988724,
"rewards/margins": 0.6515525579452515,
"rewards/rejected": -0.7101531028747559,
"step": 500
},
{
"epoch": 6.259780907668231,
"eval_log_odds_chosen": 7.277224063873291,
"eval_log_odds_ratio": -0.0020051717292517424,
"eval_logits/chosen": -2.901961088180542,
"eval_logits/rejected": -2.8077356815338135,
"eval_logps/chosen": -0.7674554586410522,
"eval_logps/rejected": -7.3736891746521,
"eval_loss": 0.7665765285491943,
"eval_nll_loss": 0.7579033970832825,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.07674554735422134,
"eval_rewards/margins": 0.6606234312057495,
"eval_rewards/rejected": -0.7373689413070679,
"eval_runtime": 3.5704,
"eval_samples_per_second": 14.564,
"eval_steps_per_second": 1.961,
"step": 500
}
],
"logging_steps": 32,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 1,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}