uf-l31-orpo-base-armo-iter1 / trainer_state.json
nlee-208's picture
Model save
7d71062 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999080761654629,
"eval_steps": 500,
"global_step": 951,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005252790544977019,
"grad_norm": 33.25,
"learning_rate": 1.0416666666666667e-07,
"log_odds_chosen": -0.12333051860332489,
"log_odds_ratio": -0.8621311187744141,
"logits/chosen": -2.540858030319214,
"logits/rejected": -2.1144332885742188,
"logps/chosen": -1.1002752780914307,
"logps/rejected": -1.0134268999099731,
"loss": 2.3046,
"nll_loss": 1.4424240589141846,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -1.1002752780914307,
"rewards/margins": -0.08684836328029633,
"rewards/rejected": -1.0134268999099731,
"step": 5
},
{
"epoch": 0.010505581089954037,
"grad_norm": 30.125,
"learning_rate": 2.0833333333333333e-07,
"log_odds_chosen": -0.1254591941833496,
"log_odds_ratio": -0.8488509058952332,
"logits/chosen": -2.521646022796631,
"logits/rejected": -2.12934947013855,
"logps/chosen": -1.0548789501190186,
"logps/rejected": -0.9548781514167786,
"loss": 2.2337,
"nll_loss": 1.3848837614059448,
"rewards/accuracies": 0.453125,
"rewards/chosen": -1.0548789501190186,
"rewards/margins": -0.10000075399875641,
"rewards/rejected": -0.9548781514167786,
"step": 10
},
{
"epoch": 0.015758371634931056,
"grad_norm": 28.25,
"learning_rate": 3.1249999999999997e-07,
"log_odds_chosen": -0.092379130423069,
"log_odds_ratio": -0.839794933795929,
"logits/chosen": -2.496335744857788,
"logits/rejected": -2.134352445602417,
"logps/chosen": -1.0547659397125244,
"logps/rejected": -0.9888293147087097,
"loss": 2.2323,
"nll_loss": 1.3924893140792847,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -1.0547659397125244,
"rewards/margins": -0.06593648344278336,
"rewards/rejected": -0.9888293147087097,
"step": 15
},
{
"epoch": 0.021011162179908074,
"grad_norm": 27.75,
"learning_rate": 4.1666666666666667e-07,
"log_odds_chosen": -0.08341892063617706,
"log_odds_ratio": -0.845537006855011,
"logits/chosen": -2.502532720565796,
"logits/rejected": -2.0534327030181885,
"logps/chosen": -1.0713450908660889,
"logps/rejected": -1.0228570699691772,
"loss": 2.2615,
"nll_loss": 1.415948748588562,
"rewards/accuracies": 0.46875,
"rewards/chosen": -1.0713450908660889,
"rewards/margins": -0.04848797246813774,
"rewards/rejected": -1.0228570699691772,
"step": 20
},
{
"epoch": 0.026263952724885097,
"grad_norm": 31.625,
"learning_rate": 5.208333333333334e-07,
"log_odds_chosen": -0.05041329935193062,
"log_odds_ratio": -0.8150845766067505,
"logits/chosen": -2.3506855964660645,
"logits/rejected": -2.041471481323242,
"logps/chosen": -1.0723893642425537,
"logps/rejected": -1.0427805185317993,
"loss": 2.2269,
"nll_loss": 1.411767601966858,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": -1.0723893642425537,
"rewards/margins": -0.029608914628624916,
"rewards/rejected": -1.0427805185317993,
"step": 25
},
{
"epoch": 0.03151674326986211,
"grad_norm": 34.75,
"learning_rate": 6.249999999999999e-07,
"log_odds_chosen": -0.16907325387001038,
"log_odds_ratio": -0.8892423510551453,
"logits/chosen": -2.4877123832702637,
"logits/rejected": -2.091643810272217,
"logps/chosen": -1.0780900716781616,
"logps/rejected": -0.960413932800293,
"loss": 2.2862,
"nll_loss": 1.3969789743423462,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -1.0780900716781616,
"rewards/margins": -0.1176760345697403,
"rewards/rejected": -0.960413932800293,
"step": 30
},
{
"epoch": 0.036769533814839134,
"grad_norm": 24.625,
"learning_rate": 7.291666666666666e-07,
"log_odds_chosen": -0.12296156585216522,
"log_odds_ratio": -0.8445537686347961,
"logits/chosen": -2.460153579711914,
"logits/rejected": -2.100581169128418,
"logps/chosen": -0.9918639063835144,
"logps/rejected": -0.8978347778320312,
"loss": 2.1014,
"nll_loss": 1.256840467453003,
"rewards/accuracies": 0.43437498807907104,
"rewards/chosen": -0.9918639063835144,
"rewards/margins": -0.09402903914451599,
"rewards/rejected": -0.8978347778320312,
"step": 35
},
{
"epoch": 0.04202232435981615,
"grad_norm": 20.375,
"learning_rate": 8.333333333333333e-07,
"log_odds_chosen": -0.10392768681049347,
"log_odds_ratio": -0.8281729817390442,
"logits/chosen": -2.4672484397888184,
"logits/rejected": -2.1189260482788086,
"logps/chosen": -0.9796692132949829,
"logps/rejected": -0.8947553634643555,
"loss": 2.0709,
"nll_loss": 1.2427122592926025,
"rewards/accuracies": 0.4781250059604645,
"rewards/chosen": -0.9796692132949829,
"rewards/margins": -0.08491390943527222,
"rewards/rejected": -0.8947553634643555,
"step": 40
},
{
"epoch": 0.04727511490479317,
"grad_norm": 25.75,
"learning_rate": 9.374999999999999e-07,
"log_odds_chosen": -0.07403279840946198,
"log_odds_ratio": -0.8119841814041138,
"logits/chosen": -2.5748581886291504,
"logits/rejected": -2.2311367988586426,
"logps/chosen": -0.9425970911979675,
"logps/rejected": -0.8925843238830566,
"loss": 1.966,
"nll_loss": 1.1540277004241943,
"rewards/accuracies": 0.4781250059604645,
"rewards/chosen": -0.9425970911979675,
"rewards/margins": -0.05001285672187805,
"rewards/rejected": -0.8925843238830566,
"step": 45
},
{
"epoch": 0.05252790544977019,
"grad_norm": 15.5625,
"learning_rate": 1.0416666666666667e-06,
"log_odds_chosen": -0.015203160233795643,
"log_odds_ratio": -0.7965196371078491,
"logits/chosen": -2.517662763595581,
"logits/rejected": -2.291977882385254,
"logps/chosen": -1.0069010257720947,
"logps/rejected": -0.9928563833236694,
"loss": 2.001,
"nll_loss": 1.2044353485107422,
"rewards/accuracies": 0.49687498807907104,
"rewards/chosen": -1.0069010257720947,
"rewards/margins": -0.014044714160263538,
"rewards/rejected": -0.9928563833236694,
"step": 50
},
{
"epoch": 0.05778069599474721,
"grad_norm": 19.125,
"learning_rate": 1.1458333333333333e-06,
"log_odds_chosen": -0.06918958574533463,
"log_odds_ratio": -0.8064200282096863,
"logits/chosen": -2.7286930084228516,
"logits/rejected": -2.3158278465270996,
"logps/chosen": -0.9621369242668152,
"logps/rejected": -0.9042080044746399,
"loss": 1.9673,
"nll_loss": 1.1608707904815674,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.9621369242668152,
"rewards/margins": -0.05792900174856186,
"rewards/rejected": -0.9042080044746399,
"step": 55
},
{
"epoch": 0.06303348653972422,
"grad_norm": 20.375,
"learning_rate": 1.2499999999999999e-06,
"log_odds_chosen": -0.055296190083026886,
"log_odds_ratio": -0.795842170715332,
"logits/chosen": -2.733304500579834,
"logits/rejected": -2.257201671600342,
"logps/chosen": -0.9258626699447632,
"logps/rejected": -0.8971433639526367,
"loss": 1.9557,
"nll_loss": 1.159847378730774,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.9258626699447632,
"rewards/margins": -0.028719374909996986,
"rewards/rejected": -0.8971433639526367,
"step": 60
},
{
"epoch": 0.06828627708470125,
"grad_norm": 18.25,
"learning_rate": 1.3541666666666667e-06,
"log_odds_chosen": -0.05717029422521591,
"log_odds_ratio": -0.7737418413162231,
"logits/chosen": -2.6654744148254395,
"logits/rejected": -2.187049627304077,
"logps/chosen": -0.8003360033035278,
"logps/rejected": -0.7723677754402161,
"loss": 1.8696,
"nll_loss": 1.0958433151245117,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.8003360033035278,
"rewards/margins": -0.02796824648976326,
"rewards/rejected": -0.7723677754402161,
"step": 65
},
{
"epoch": 0.07353906762967827,
"grad_norm": 19.375,
"learning_rate": 1.4583333333333333e-06,
"log_odds_chosen": 0.002531373407691717,
"log_odds_ratio": -0.7339381575584412,
"logits/chosen": -2.5733718872070312,
"logits/rejected": -2.1028685569763184,
"logps/chosen": -0.7143228054046631,
"logps/rejected": -0.718761146068573,
"loss": 1.7947,
"nll_loss": 1.0607960224151611,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": -0.7143228054046631,
"rewards/margins": 0.004438319243490696,
"rewards/rejected": -0.718761146068573,
"step": 70
},
{
"epoch": 0.07879185817465528,
"grad_norm": 15.9375,
"learning_rate": 1.5624999999999999e-06,
"log_odds_chosen": 0.06011660769581795,
"log_odds_ratio": -0.7009418606758118,
"logits/chosen": -2.5496840476989746,
"logits/rejected": -2.0580315589904785,
"logps/chosen": -0.6317678689956665,
"logps/rejected": -0.6753242611885071,
"loss": 1.6452,
"nll_loss": 0.9442570805549622,
"rewards/accuracies": 0.528124988079071,
"rewards/chosen": -0.6317678689956665,
"rewards/margins": 0.04355642572045326,
"rewards/rejected": -0.6753242611885071,
"step": 75
},
{
"epoch": 0.0840446487196323,
"grad_norm": 14.875,
"learning_rate": 1.6666666666666667e-06,
"log_odds_chosen": 0.10804717242717743,
"log_odds_ratio": -0.6780250072479248,
"logits/chosen": -2.371317148208618,
"logits/rejected": -1.9558740854263306,
"logps/chosen": -0.5971282124519348,
"logps/rejected": -0.6553691029548645,
"loss": 1.6518,
"nll_loss": 0.9737834930419922,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": -0.5971282124519348,
"rewards/margins": 0.05824087932705879,
"rewards/rejected": -0.6553691029548645,
"step": 80
},
{
"epoch": 0.08929743926460933,
"grad_norm": 15.375,
"learning_rate": 1.7708333333333332e-06,
"log_odds_chosen": 0.13051114976406097,
"log_odds_ratio": -0.6608899235725403,
"logits/chosen": -2.441239833831787,
"logits/rejected": -2.080503225326538,
"logps/chosen": -0.5396751165390015,
"logps/rejected": -0.6057919263839722,
"loss": 1.6033,
"nll_loss": 0.9424022436141968,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.5396751165390015,
"rewards/margins": 0.06611678004264832,
"rewards/rejected": -0.6057919263839722,
"step": 85
},
{
"epoch": 0.09455022980958634,
"grad_norm": 15.5625,
"learning_rate": 1.8749999999999998e-06,
"log_odds_chosen": 0.19523096084594727,
"log_odds_ratio": -0.6398605108261108,
"logits/chosen": -2.388965606689453,
"logits/rejected": -2.051954507827759,
"logps/chosen": -0.514168381690979,
"logps/rejected": -0.6006937623023987,
"loss": 1.5701,
"nll_loss": 0.9302393794059753,
"rewards/accuracies": 0.6468750238418579,
"rewards/chosen": -0.514168381690979,
"rewards/margins": 0.08652535825967789,
"rewards/rejected": -0.6006937623023987,
"step": 90
},
{
"epoch": 0.09980302035456336,
"grad_norm": 10.625,
"learning_rate": 1.9791666666666666e-06,
"log_odds_chosen": 0.12450599670410156,
"log_odds_ratio": -0.6654147505760193,
"logits/chosen": -2.3805699348449707,
"logits/rejected": -2.010688304901123,
"logps/chosen": -0.49114733934402466,
"logps/rejected": -0.5494757890701294,
"loss": 1.5446,
"nll_loss": 0.8791642189025879,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.49114733934402466,
"rewards/margins": 0.05832843855023384,
"rewards/rejected": -0.5494757890701294,
"step": 95
},
{
"epoch": 0.10505581089954039,
"grad_norm": 7.8125,
"learning_rate": 1.9998919935516766e-06,
"log_odds_chosen": 0.17239874601364136,
"log_odds_ratio": -0.6507178544998169,
"logits/chosen": -2.2754485607147217,
"logits/rejected": -2.040553569793701,
"logps/chosen": -0.485573947429657,
"logps/rejected": -0.5674648284912109,
"loss": 1.4726,
"nll_loss": 0.8218661546707153,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.485573947429657,
"rewards/margins": 0.08189092576503754,
"rewards/rejected": -0.5674648284912109,
"step": 100
},
{
"epoch": 0.1103086014445174,
"grad_norm": 8.3125,
"learning_rate": 1.999453257340926e-06,
"log_odds_chosen": 0.2180129736661911,
"log_odds_ratio": -0.6303091049194336,
"logits/chosen": -2.4427425861358643,
"logits/rejected": -2.181597948074341,
"logps/chosen": -0.4835621416568756,
"logps/rejected": -0.5780085325241089,
"loss": 1.4945,
"nll_loss": 0.8642352223396301,
"rewards/accuracies": 0.6343749761581421,
"rewards/chosen": -0.4835621416568756,
"rewards/margins": 0.09444637596607208,
"rewards/rejected": -0.5780085325241089,
"step": 105
},
{
"epoch": 0.11556139198949442,
"grad_norm": 8.0,
"learning_rate": 1.998677188931617e-06,
"log_odds_chosen": 0.27974802255630493,
"log_odds_ratio": -0.6000305414199829,
"logits/chosen": -2.4073500633239746,
"logits/rejected": -2.158104419708252,
"logps/chosen": -0.4692881107330322,
"logps/rejected": -0.5915614366531372,
"loss": 1.5236,
"nll_loss": 0.9235590100288391,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.4692881107330322,
"rewards/margins": 0.12227334082126617,
"rewards/rejected": -0.5915614366531372,
"step": 110
},
{
"epoch": 0.12081418253447143,
"grad_norm": 7.9375,
"learning_rate": 1.997564050259824e-06,
"log_odds_chosen": 0.28100112080574036,
"log_odds_ratio": -0.601650595664978,
"logits/chosen": -2.3918166160583496,
"logits/rejected": -2.029897689819336,
"logps/chosen": -0.4723443388938904,
"logps/rejected": -0.5918693542480469,
"loss": 1.5166,
"nll_loss": 0.9149250984191895,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.4723443388938904,
"rewards/margins": 0.11952495574951172,
"rewards/rejected": -0.5918693542480469,
"step": 115
},
{
"epoch": 0.12606697307944845,
"grad_norm": 8.8125,
"learning_rate": 1.996114217028476e-06,
"log_odds_chosen": 0.25655943155288696,
"log_odds_ratio": -0.6146520376205444,
"logits/chosen": -2.470524311065674,
"logits/rejected": -2.134540557861328,
"logps/chosen": -0.477255642414093,
"logps/rejected": -0.5925866961479187,
"loss": 1.5111,
"nll_loss": 0.8964211344718933,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.477255642414093,
"rewards/margins": 0.11533106863498688,
"rewards/rejected": -0.5925866961479187,
"step": 120
},
{
"epoch": 0.1313197636244255,
"grad_norm": 8.75,
"learning_rate": 1.994328178580548e-06,
"log_odds_chosen": 0.2803216576576233,
"log_odds_ratio": -0.601326584815979,
"logits/chosen": -2.367903232574463,
"logits/rejected": -2.018990993499756,
"logps/chosen": -0.46639877557754517,
"logps/rejected": -0.5851758718490601,
"loss": 1.481,
"nll_loss": 0.8796539306640625,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.46639877557754517,
"rewards/margins": 0.11877720057964325,
"rewards/rejected": -0.5851758718490601,
"step": 125
},
{
"epoch": 0.1365725541694025,
"grad_norm": 8.3125,
"learning_rate": 1.9922065377339033e-06,
"log_odds_chosen": 0.2894327640533447,
"log_odds_ratio": -0.6087297201156616,
"logits/chosen": -2.5040173530578613,
"logits/rejected": -2.2061374187469482,
"logps/chosen": -0.4694454073905945,
"logps/rejected": -0.5906943678855896,
"loss": 1.4968,
"nll_loss": 0.888100266456604,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -0.4694454073905945,
"rewards/margins": 0.12124893814325333,
"rewards/rejected": -0.5906943678855896,
"step": 130
},
{
"epoch": 0.14182534471437952,
"grad_norm": 7.375,
"learning_rate": 1.98975001057783e-06,
"log_odds_chosen": 0.30140143632888794,
"log_odds_ratio": -0.5964145660400391,
"logits/chosen": -2.4213032722473145,
"logits/rejected": -2.004279375076294,
"logps/chosen": -0.44823235273361206,
"logps/rejected": -0.583377480506897,
"loss": 1.4442,
"nll_loss": 0.8478012084960938,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.44823235273361206,
"rewards/margins": 0.13514509797096252,
"rewards/rejected": -0.583377480506897,
"step": 135
},
{
"epoch": 0.14707813525935653,
"grad_norm": 9.4375,
"learning_rate": 1.986959426231349e-06,
"log_odds_chosen": 0.33596453070640564,
"log_odds_ratio": -0.5885840654373169,
"logits/chosen": -2.471541166305542,
"logits/rejected": -2.1307930946350098,
"logps/chosen": -0.47856172919273376,
"logps/rejected": -0.6193875074386597,
"loss": 1.4974,
"nll_loss": 0.9087700843811035,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -0.47856172919273376,
"rewards/margins": 0.14082582294940948,
"rewards/rejected": -0.6193875074386597,
"step": 140
},
{
"epoch": 0.15233092580433355,
"grad_norm": 8.0625,
"learning_rate": 1.9838357265633724e-06,
"log_odds_chosen": 0.35230931639671326,
"log_odds_ratio": -0.5799855589866638,
"logits/chosen": -2.4745469093322754,
"logits/rejected": -2.0399346351623535,
"logps/chosen": -0.45584583282470703,
"logps/rejected": -0.6081861257553101,
"loss": 1.4708,
"nll_loss": 0.8907746076583862,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.45584583282470703,
"rewards/margins": 0.15234029293060303,
"rewards/rejected": -0.6081861257553101,
"step": 145
},
{
"epoch": 0.15758371634931057,
"grad_norm": 7.59375,
"learning_rate": 1.9803799658748095e-06,
"log_odds_chosen": 0.32377585768699646,
"log_odds_ratio": -0.5951502919197083,
"logits/chosen": -2.3601431846618652,
"logits/rejected": -2.0099222660064697,
"logps/chosen": -0.46314555406570435,
"logps/rejected": -0.6009119153022766,
"loss": 1.4988,
"nll_loss": 0.9036917686462402,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.46314555406570435,
"rewards/margins": 0.13776634633541107,
"rewards/rejected": -0.6009119153022766,
"step": 150
},
{
"epoch": 0.16283650689428758,
"grad_norm": 9.75,
"learning_rate": 1.9765933105427177e-06,
"log_odds_chosen": 0.29054537415504456,
"log_odds_ratio": -0.6080166101455688,
"logits/chosen": -2.429213762283325,
"logits/rejected": -2.1127424240112305,
"logps/chosen": -0.48361191153526306,
"logps/rejected": -0.6187745928764343,
"loss": 1.5015,
"nll_loss": 0.8934603929519653,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.48361191153526306,
"rewards/margins": 0.13516271114349365,
"rewards/rejected": -0.6187745928764343,
"step": 155
},
{
"epoch": 0.1680892974392646,
"grad_norm": 14.0625,
"learning_rate": 1.972477038626636e-06,
"log_odds_chosen": 0.27817827463150024,
"log_odds_ratio": -0.6112152338027954,
"logits/chosen": -2.4246554374694824,
"logits/rejected": -2.0224289894104004,
"logps/chosen": -0.49589210748672485,
"logps/rejected": -0.6248718500137329,
"loss": 1.4978,
"nll_loss": 0.886622428894043,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.49589210748672485,
"rewards/margins": 0.12897971272468567,
"rewards/rejected": -0.6248718500137329,
"step": 160
},
{
"epoch": 0.17334208798424164,
"grad_norm": 10.875,
"learning_rate": 1.9680325394372147e-06,
"log_odds_chosen": 0.35008612275123596,
"log_odds_ratio": -0.5786347389221191,
"logits/chosen": -2.506772756576538,
"logits/rejected": -2.057096004486084,
"logps/chosen": -0.46079978346824646,
"logps/rejected": -0.6112517714500427,
"loss": 1.4896,
"nll_loss": 0.9109176397323608,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.46079978346824646,
"rewards/margins": 0.15045206248760223,
"rewards/rejected": -0.6112517714500427,
"step": 165
},
{
"epoch": 0.17859487852921865,
"grad_norm": 15.1875,
"learning_rate": 1.9632613130673015e-06,
"log_odds_chosen": 0.33634597063064575,
"log_odds_ratio": -0.589142918586731,
"logits/chosen": -2.467883348464966,
"logits/rejected": -1.9834989309310913,
"logps/chosen": -0.4864015579223633,
"logps/rejected": -0.6304683089256287,
"loss": 1.4988,
"nll_loss": 0.9096533060073853,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.4864015579223633,
"rewards/margins": 0.144066721200943,
"rewards/rejected": -0.6304683089256287,
"step": 170
},
{
"epoch": 0.18384766907419567,
"grad_norm": 26.75,
"learning_rate": 1.9581649698856357e-06,
"log_odds_chosen": 0.351374089717865,
"log_odds_ratio": -0.5786073207855225,
"logits/chosen": -2.3902525901794434,
"logits/rejected": -2.0138325691223145,
"logps/chosen": -0.45923271775245667,
"logps/rejected": -0.6129686236381531,
"loss": 1.477,
"nll_loss": 0.8983781933784485,
"rewards/accuracies": 0.715624988079071,
"rewards/chosen": -0.45923271775245667,
"rewards/margins": 0.1537359207868576,
"rewards/rejected": -0.6129686236381531,
"step": 175
},
{
"epoch": 0.18910045961917268,
"grad_norm": 8.5625,
"learning_rate": 1.952745229993319e-06,
"log_odds_chosen": 0.3817608952522278,
"log_odds_ratio": -0.5729137659072876,
"logits/chosen": -2.52931547164917,
"logits/rejected": -2.1916394233703613,
"logps/chosen": -0.48729705810546875,
"logps/rejected": -0.6591955423355103,
"loss": 1.4891,
"nll_loss": 0.9161707758903503,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.48729705810546875,
"rewards/margins": 0.1718985140323639,
"rewards/rejected": -0.6591955423355103,
"step": 180
},
{
"epoch": 0.1943532501641497,
"grad_norm": 8.5,
"learning_rate": 1.947003922643256e-06,
"log_odds_chosen": 0.379459023475647,
"log_odds_ratio": -0.5737109184265137,
"logits/chosen": -2.282898426055908,
"logits/rejected": -1.9805419445037842,
"logps/chosen": -0.47503146529197693,
"logps/rejected": -0.6488234400749207,
"loss": 1.4454,
"nll_loss": 0.8717378377914429,
"rewards/accuracies": 0.721875011920929,
"rewards/chosen": -0.47503146529197693,
"rewards/margins": 0.1737920045852661,
"rewards/rejected": -0.6488234400749207,
"step": 185
},
{
"epoch": 0.19960604070912671,
"grad_norm": 10.125,
"learning_rate": 1.9409429856227482e-06,
"log_odds_chosen": 0.4121369421482086,
"log_odds_ratio": -0.5561366081237793,
"logits/chosen": -2.488356113433838,
"logits/rejected": -2.0776686668395996,
"logps/chosen": -0.4683772921562195,
"logps/rejected": -0.647982656955719,
"loss": 1.4436,
"nll_loss": 0.8874515295028687,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.4683772921562195,
"rewards/margins": 0.17960533499717712,
"rewards/rejected": -0.647982656955719,
"step": 190
},
{
"epoch": 0.20485883125410373,
"grad_norm": 11.5,
"learning_rate": 1.934564464599461e-06,
"log_odds_chosen": 0.32919231057167053,
"log_odds_ratio": -0.5908551812171936,
"logits/chosen": -2.501392364501953,
"logits/rejected": -2.0592591762542725,
"logps/chosen": -0.49434512853622437,
"logps/rejected": -0.6509113311767578,
"loss": 1.4187,
"nll_loss": 0.8278582692146301,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.49434512853622437,
"rewards/margins": 0.15656621754169464,
"rewards/rejected": -0.6509113311767578,
"step": 195
},
{
"epoch": 0.21011162179908077,
"grad_norm": 12.8125,
"learning_rate": 1.927870512430972e-06,
"log_odds_chosen": 0.42371082305908203,
"log_odds_ratio": -0.5525480508804321,
"logits/chosen": -2.4069533348083496,
"logits/rejected": -2.019406795501709,
"logps/chosen": -0.4768436551094055,
"logps/rejected": -0.6629732251167297,
"loss": 1.4572,
"nll_loss": 0.9046151041984558,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.4768436551094055,
"rewards/margins": 0.18612954020500183,
"rewards/rejected": -0.6629732251167297,
"step": 200
},
{
"epoch": 0.2153644123440578,
"grad_norm": 9.0,
"learning_rate": 1.9208633884381526e-06,
"log_odds_chosen": 0.42966872453689575,
"log_odds_ratio": -0.5522044897079468,
"logits/chosen": -2.430342197418213,
"logits/rejected": -2.0743634700775146,
"logps/chosen": -0.4722970426082611,
"logps/rejected": -0.6621736884117126,
"loss": 1.4295,
"nll_loss": 0.8772872090339661,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.4722970426082611,
"rewards/margins": 0.18987664580345154,
"rewards/rejected": -0.6621736884117126,
"step": 205
},
{
"epoch": 0.2206172028890348,
"grad_norm": 9.1875,
"learning_rate": 1.9135454576426007e-06,
"log_odds_chosen": 0.40302562713623047,
"log_odds_ratio": -0.5604028105735779,
"logits/chosen": -2.412562847137451,
"logits/rejected": -2.0246427059173584,
"logps/chosen": -0.4761424660682678,
"logps/rejected": -0.661251425743103,
"loss": 1.3993,
"nll_loss": 0.8388580083847046,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.4761424660682678,
"rewards/margins": 0.18510892987251282,
"rewards/rejected": -0.661251425743103,
"step": 210
},
{
"epoch": 0.22586999343401182,
"grad_norm": 7.875,
"learning_rate": 1.905919189968415e-06,
"log_odds_chosen": 0.4606761932373047,
"log_odds_ratio": -0.5445691347122192,
"logits/chosen": -2.4419312477111816,
"logits/rejected": -2.030771493911743,
"logps/chosen": -0.4771277904510498,
"logps/rejected": -0.6932464838027954,
"loss": 1.4377,
"nll_loss": 0.8931263089179993,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.4771277904510498,
"rewards/margins": 0.21611860394477844,
"rewards/rejected": -0.6932464838027954,
"step": 215
},
{
"epoch": 0.23112278397898883,
"grad_norm": 7.78125,
"learning_rate": 1.897987159408548e-06,
"log_odds_chosen": 0.4278109073638916,
"log_odds_ratio": -0.5563892722129822,
"logits/chosen": -2.4070868492126465,
"logits/rejected": -2.033133029937744,
"logps/chosen": -0.4777792990207672,
"logps/rejected": -0.6746242642402649,
"loss": 1.3836,
"nll_loss": 0.827177882194519,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.4777792990207672,
"rewards/margins": 0.19684496521949768,
"rewards/rejected": -0.6746242642402649,
"step": 220
},
{
"epoch": 0.23637557452396585,
"grad_norm": 9.4375,
"learning_rate": 1.8897520431560433e-06,
"log_odds_chosen": 0.39412638545036316,
"log_odds_ratio": -0.5616167187690735,
"logits/chosen": -2.437281608581543,
"logits/rejected": -2.0233240127563477,
"logps/chosen": -0.49209141731262207,
"logps/rejected": -0.670540988445282,
"loss": 1.3984,
"nll_loss": 0.8367835879325867,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.49209141731262207,
"rewards/margins": 0.17844951152801514,
"rewards/rejected": -0.670540988445282,
"step": 225
},
{
"epoch": 0.24162836506894286,
"grad_norm": 9.4375,
"learning_rate": 1.8812166207004366e-06,
"log_odds_chosen": 0.45934948325157166,
"log_odds_ratio": -0.5536540746688843,
"logits/chosen": -2.4575705528259277,
"logits/rejected": -2.0787205696105957,
"logps/chosen": -0.4777277112007141,
"logps/rejected": -0.6928449869155884,
"loss": 1.3871,
"nll_loss": 0.8334070444107056,
"rewards/accuracies": 0.7593749761581421,
"rewards/chosen": -0.4777277112007141,
"rewards/margins": 0.21511724591255188,
"rewards/rejected": -0.6928449869155884,
"step": 230
},
{
"epoch": 0.2468811556139199,
"grad_norm": 7.71875,
"learning_rate": 1.8723837728896337e-06,
"log_odds_chosen": 0.45329445600509644,
"log_odds_ratio": -0.5616171360015869,
"logits/chosen": -2.522167682647705,
"logits/rejected": -2.1475300788879395,
"logps/chosen": -0.4945332407951355,
"logps/rejected": -0.7140644788742065,
"loss": 1.4402,
"nll_loss": 0.878614068031311,
"rewards/accuracies": 0.7093750238418579,
"rewards/chosen": -0.4945332407951355,
"rewards/margins": 0.21953122317790985,
"rewards/rejected": -0.7140644788742065,
"step": 235
},
{
"epoch": 0.2521339461588969,
"grad_norm": 7.75,
"learning_rate": 1.8632564809575738e-06,
"log_odds_chosen": 0.4688095152378082,
"log_odds_ratio": -0.5438790917396545,
"logits/chosen": -2.512554168701172,
"logits/rejected": -2.105734348297119,
"logps/chosen": -0.48634210228919983,
"logps/rejected": -0.7048304677009583,
"loss": 1.4387,
"nll_loss": 0.8948429226875305,
"rewards/accuracies": 0.753125011920929,
"rewards/chosen": -0.48634210228919983,
"rewards/margins": 0.21848826110363007,
"rewards/rejected": -0.7048304677009583,
"step": 240
},
{
"epoch": 0.2573867367038739,
"grad_norm": 9.9375,
"learning_rate": 1.8538378255180138e-06,
"log_odds_chosen": 0.488097608089447,
"log_odds_ratio": -0.5403500199317932,
"logits/chosen": -2.3577160835266113,
"logits/rejected": -2.0601189136505127,
"logps/chosen": -0.5090717077255249,
"logps/rejected": -0.7453780174255371,
"loss": 1.4193,
"nll_loss": 0.878923773765564,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.5090717077255249,
"rewards/margins": 0.23630623519420624,
"rewards/rejected": -0.7453780174255371,
"step": 245
},
{
"epoch": 0.262639527248851,
"grad_norm": 8.4375,
"learning_rate": 1.8441309855247707e-06,
"log_odds_chosen": 0.6032781004905701,
"log_odds_ratio": -0.5000559091567993,
"logits/chosen": -2.403979539871216,
"logits/rejected": -2.1050338745117188,
"logps/chosen": -0.5098007917404175,
"logps/rejected": -0.8097056150436401,
"loss": 1.4018,
"nll_loss": 0.9017453193664551,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.5098007917404175,
"rewards/margins": 0.29990485310554504,
"rewards/rejected": -0.8097056150436401,
"step": 250
},
{
"epoch": 0.267892317793828,
"grad_norm": 7.90625,
"learning_rate": 1.83413923719877e-06,
"log_odds_chosen": 0.5410558581352234,
"log_odds_ratio": -0.5238425135612488,
"logits/chosen": -2.42203688621521,
"logits/rejected": -2.095054864883423,
"logps/chosen": -0.49079209566116333,
"logps/rejected": -0.763100266456604,
"loss": 1.3797,
"nll_loss": 0.8558791875839233,
"rewards/accuracies": 0.778124988079071,
"rewards/chosen": -0.49079209566116333,
"rewards/margins": 0.2723081707954407,
"rewards/rejected": -0.763100266456604,
"step": 255
},
{
"epoch": 0.273145108338805,
"grad_norm": 10.1875,
"learning_rate": 1.8238659529222668e-06,
"log_odds_chosen": 0.5387502908706665,
"log_odds_ratio": -0.5273549556732178,
"logits/chosen": -2.458590269088745,
"logits/rejected": -2.1467177867889404,
"logps/chosen": -0.5123028755187988,
"logps/rejected": -0.781539797782898,
"loss": 1.4312,
"nll_loss": 0.9038845300674438,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.5123028755187988,
"rewards/margins": 0.2692369818687439,
"rewards/rejected": -0.781539797782898,
"step": 260
},
{
"epoch": 0.278397898883782,
"grad_norm": 12.3125,
"learning_rate": 1.8133146001006117e-06,
"log_odds_chosen": 0.585041880607605,
"log_odds_ratio": -0.5241442322731018,
"logits/chosen": -2.434957504272461,
"logits/rejected": -2.08172345161438,
"logps/chosen": -0.5419186353683472,
"logps/rejected": -0.8563257455825806,
"loss": 1.4995,
"nll_loss": 0.9753583669662476,
"rewards/accuracies": 0.7593749761581421,
"rewards/chosen": -0.5419186353683472,
"rewards/margins": 0.314407080411911,
"rewards/rejected": -0.8563257455825806,
"step": 265
},
{
"epoch": 0.28365068942875904,
"grad_norm": 12.8125,
"learning_rate": 1.8024887399919408e-06,
"log_odds_chosen": 0.686429500579834,
"log_odds_ratio": -0.49835652112960815,
"logits/chosen": -2.493675947189331,
"logits/rejected": -2.192899465560913,
"logps/chosen": -0.533765435218811,
"logps/rejected": -0.9061405062675476,
"loss": 1.4053,
"nll_loss": 0.9069935083389282,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.533765435218811,
"rewards/margins": 0.3723750710487366,
"rewards/rejected": -0.9061405062675476,
"step": 270
},
{
"epoch": 0.28890347997373605,
"grad_norm": 11.5,
"learning_rate": 1.7913920265051946e-06,
"log_odds_chosen": 0.7045778036117554,
"log_odds_ratio": -0.49370041489601135,
"logits/chosen": -2.4899590015411377,
"logits/rejected": -2.1618402004241943,
"logps/chosen": -0.5214771032333374,
"logps/rejected": -0.9220815896987915,
"loss": 1.4176,
"nll_loss": 0.92388916015625,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.5214771032333374,
"rewards/margins": 0.4006044268608093,
"rewards/rejected": -0.9220815896987915,
"step": 275
},
{
"epoch": 0.29415627051871307,
"grad_norm": 9.5625,
"learning_rate": 1.780028204966859e-06,
"log_odds_chosen": 0.6810405254364014,
"log_odds_ratio": -0.4989449381828308,
"logits/chosen": -2.3327696323394775,
"logits/rejected": -2.0119078159332275,
"logps/chosen": -0.5228633880615234,
"logps/rejected": -0.8827990293502808,
"loss": 1.405,
"nll_loss": 0.9060786962509155,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.5228633880615234,
"rewards/margins": 0.3599356710910797,
"rewards/rejected": -0.8827990293502808,
"step": 280
},
{
"epoch": 0.2994090610636901,
"grad_norm": 10.8125,
"learning_rate": 1.768401110856859e-06,
"log_odds_chosen": 0.7910138964653015,
"log_odds_ratio": -0.47219276428222656,
"logits/chosen": -2.465003252029419,
"logits/rejected": -2.085939407348633,
"logps/chosen": -0.5146728754043579,
"logps/rejected": -0.9470351934432983,
"loss": 1.3015,
"nll_loss": 0.8292847871780396,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -0.5146728754043579,
"rewards/margins": 0.43236231803894043,
"rewards/rejected": -0.9470351934432983,
"step": 285
},
{
"epoch": 0.3046618516086671,
"grad_norm": 13.6875,
"learning_rate": 1.7565146685140167e-06,
"log_odds_chosen": 0.771044135093689,
"log_odds_ratio": -0.4853692948818207,
"logits/chosen": -2.4471678733825684,
"logits/rejected": -2.1012349128723145,
"logps/chosen": -0.5462040901184082,
"logps/rejected": -0.9886453747749329,
"loss": 1.3976,
"nll_loss": 0.9122269749641418,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.5462040901184082,
"rewards/margins": 0.4424411654472351,
"rewards/rejected": -0.9886453747749329,
"step": 290
},
{
"epoch": 0.3099146421536441,
"grad_norm": 12.0,
"learning_rate": 1.7443728898115224e-06,
"log_odds_chosen": 0.6316434144973755,
"log_odds_ratio": -0.5107887983322144,
"logits/chosen": -2.432225465774536,
"logits/rejected": -2.0828986167907715,
"logps/chosen": -0.5212147235870361,
"logps/rejected": -0.8626314997673035,
"loss": 1.3532,
"nll_loss": 0.8424150347709656,
"rewards/accuracies": 0.7406250238418579,
"rewards/chosen": -0.5212147235870361,
"rewards/margins": 0.34141671657562256,
"rewards/rejected": -0.8626314997673035,
"step": 295
},
{
"epoch": 0.31516743269862113,
"grad_norm": 19.875,
"learning_rate": 1.7319798728028616e-06,
"log_odds_chosen": 0.8003711700439453,
"log_odds_ratio": -0.4749313294887543,
"logits/chosen": -2.4634110927581787,
"logits/rejected": -2.111607313156128,
"logps/chosen": -0.5615866780281067,
"logps/rejected": -1.0098183155059814,
"loss": 1.4088,
"nll_loss": 0.9338866472244263,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.5615866780281067,
"rewards/margins": 0.44823163747787476,
"rewards/rejected": -1.0098183155059814,
"step": 300
},
{
"epoch": 0.32042022324359815,
"grad_norm": 17.25,
"learning_rate": 1.719339800338651e-06,
"log_odds_chosen": 0.8279815912246704,
"log_odds_ratio": -0.4675443172454834,
"logits/chosen": -2.5601465702056885,
"logits/rejected": -2.2116811275482178,
"logps/chosen": -0.5433454513549805,
"logps/rejected": -1.0226011276245117,
"loss": 1.3768,
"nll_loss": 0.9092954397201538,
"rewards/accuracies": 0.8031250238418579,
"rewards/chosen": -0.5433454513549805,
"rewards/margins": 0.479255735874176,
"rewards/rejected": -1.0226011276245117,
"step": 305
},
{
"epoch": 0.32567301378857516,
"grad_norm": 15.3125,
"learning_rate": 1.7064569386548585e-06,
"log_odds_chosen": 0.859075665473938,
"log_odds_ratio": -0.4543831944465637,
"logits/chosen": -2.531367301940918,
"logits/rejected": -2.2318122386932373,
"logps/chosen": -0.5256025195121765,
"logps/rejected": -1.0284937620162964,
"loss": 1.3533,
"nll_loss": 0.8989534378051758,
"rewards/accuracies": 0.8031250238418579,
"rewards/chosen": -0.5256025195121765,
"rewards/margins": 0.5028911828994751,
"rewards/rejected": -1.0284937620162964,
"step": 310
},
{
"epoch": 0.3309258043335522,
"grad_norm": 16.625,
"learning_rate": 1.6933356359328754e-06,
"log_odds_chosen": 0.7117995619773865,
"log_odds_ratio": -0.4905334413051605,
"logits/chosen": -2.5292108058929443,
"logits/rejected": -2.1856768131256104,
"logps/chosen": -0.5284509658813477,
"logps/rejected": -0.915407657623291,
"loss": 1.371,
"nll_loss": 0.8804505467414856,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.5284509658813477,
"rewards/margins": 0.3869567811489105,
"rewards/rejected": -0.915407657623291,
"step": 315
},
{
"epoch": 0.3361785948785292,
"grad_norm": 17.875,
"learning_rate": 1.679980320831934e-06,
"log_odds_chosen": 0.7291110754013062,
"log_odds_ratio": -0.4787971079349518,
"logits/chosen": -2.4682400226593018,
"logits/rejected": -2.2220332622528076,
"logps/chosen": -0.5479062795639038,
"logps/rejected": -0.9491809606552124,
"loss": 1.3781,
"nll_loss": 0.8992602229118347,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -0.5479062795639038,
"rewards/margins": 0.40127477049827576,
"rewards/rejected": -0.9491809606552124,
"step": 320
},
{
"epoch": 0.34143138542350626,
"grad_norm": 32.0,
"learning_rate": 1.6663955009943602e-06,
"log_odds_chosen": 0.9077841639518738,
"log_odds_ratio": -0.4515516757965088,
"logits/chosen": -2.4324584007263184,
"logits/rejected": -2.178394317626953,
"logps/chosen": -0.5766757726669312,
"logps/rejected": -1.1069071292877197,
"loss": 1.374,
"nll_loss": 0.9224408268928528,
"rewards/accuracies": 0.828125,
"rewards/chosen": -0.5766757726669312,
"rewards/margins": 0.530231237411499,
"rewards/rejected": -1.1069071292877197,
"step": 325
},
{
"epoch": 0.3466841759684833,
"grad_norm": 23.0,
"learning_rate": 1.6525857615241685e-06,
"log_odds_chosen": 0.733812689781189,
"log_odds_ratio": -0.4906436800956726,
"logits/chosen": -2.523135185241699,
"logits/rejected": -2.1835999488830566,
"logps/chosen": -0.5466452836990356,
"logps/rejected": -0.9662971496582031,
"loss": 1.4195,
"nll_loss": 0.9288629293441772,
"rewards/accuracies": 0.8031250238418579,
"rewards/chosen": -0.5466452836990356,
"rewards/margins": 0.4196518361568451,
"rewards/rejected": -0.9662971496582031,
"step": 330
},
{
"epoch": 0.3519369665134603,
"grad_norm": 21.875,
"learning_rate": 1.6385557634395136e-06,
"log_odds_chosen": 0.7822979688644409,
"log_odds_ratio": -0.47422999143600464,
"logits/chosen": -2.4535679817199707,
"logits/rejected": -2.2028393745422363,
"logps/chosen": -0.5340802669525146,
"logps/rejected": -0.9806568026542664,
"loss": 1.3555,
"nll_loss": 0.881304144859314,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.5340802669525146,
"rewards/margins": 0.44657665491104126,
"rewards/rejected": -0.9806568026542664,
"step": 335
},
{
"epoch": 0.3571897570584373,
"grad_norm": 21.5,
"learning_rate": 1.624310242099518e-06,
"log_odds_chosen": 0.7664231061935425,
"log_odds_ratio": -0.48080235719680786,
"logits/chosen": -2.453505039215088,
"logits/rejected": -2.18292498588562,
"logps/chosen": -0.5327800512313843,
"logps/rejected": -0.9523041844367981,
"loss": 1.4089,
"nll_loss": 0.9281209111213684,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.5327800512313843,
"rewards/margins": 0.4195241332054138,
"rewards/rejected": -0.9523041844367981,
"step": 340
},
{
"epoch": 0.3624425476034143,
"grad_norm": 24.5,
"learning_rate": 1.609854005606009e-06,
"log_odds_chosen": 0.9470375776290894,
"log_odds_ratio": -0.4272763729095459,
"logits/chosen": -2.5423166751861572,
"logits/rejected": -2.210846424102783,
"logps/chosen": -0.5365777015686035,
"logps/rejected": -1.076774001121521,
"loss": 1.3329,
"nll_loss": 0.9056490063667297,
"rewards/accuracies": 0.815625011920929,
"rewards/chosen": -0.5365777015686035,
"rewards/margins": 0.5401962995529175,
"rewards/rejected": -1.076774001121521,
"step": 345
},
{
"epoch": 0.36769533814839134,
"grad_norm": 15.125,
"learning_rate": 1.5951919331807048e-06,
"log_odds_chosen": 0.9901137351989746,
"log_odds_ratio": -0.43201208114624023,
"logits/chosen": -2.3910915851593018,
"logits/rejected": -2.085310935974121,
"logps/chosen": -0.5491678714752197,
"logps/rejected": -1.141390085220337,
"loss": 1.3711,
"nll_loss": 0.9390678405761719,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.5491678714752197,
"rewards/margins": 0.5922220945358276,
"rewards/rejected": -1.141390085220337,
"step": 350
},
{
"epoch": 0.37294812869336835,
"grad_norm": 19.25,
"learning_rate": 1.5803289735183949e-06,
"log_odds_chosen": 0.9613128900527954,
"log_odds_ratio": -0.43703293800354004,
"logits/chosen": -2.404744863510132,
"logits/rejected": -2.0907814502716064,
"logps/chosen": -0.5635210871696472,
"logps/rejected": -1.1492810249328613,
"loss": 1.3534,
"nll_loss": 0.9164144396781921,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.5635210871696472,
"rewards/margins": 0.5857599377632141,
"rewards/rejected": -1.1492810249328613,
"step": 355
},
{
"epoch": 0.37820091923834537,
"grad_norm": 32.25,
"learning_rate": 1.5652701431166717e-06,
"log_odds_chosen": 0.9359542727470398,
"log_odds_ratio": -0.4396037459373474,
"logits/chosen": -2.4650635719299316,
"logits/rejected": -2.122915267944336,
"logps/chosen": -0.5267240405082703,
"logps/rejected": -1.0681325197219849,
"loss": 1.3381,
"nll_loss": 0.8984518051147461,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.5267240405082703,
"rewards/margins": 0.5414084792137146,
"rewards/rejected": -1.0681325197219849,
"step": 360
},
{
"epoch": 0.3834537097833224,
"grad_norm": 22.5,
"learning_rate": 1.550020524582781e-06,
"log_odds_chosen": 0.9607855677604675,
"log_odds_ratio": -0.4296341836452484,
"logits/chosen": -2.556321620941162,
"logits/rejected": -2.233931064605713,
"logps/chosen": -0.5581452250480652,
"logps/rejected": -1.131134033203125,
"loss": 1.2919,
"nll_loss": 0.8622277975082397,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.5581452250480652,
"rewards/margins": 0.5729888677597046,
"rewards/rejected": -1.131134033203125,
"step": 365
},
{
"epoch": 0.3887065003282994,
"grad_norm": 20.375,
"learning_rate": 1.5345852649181553e-06,
"log_odds_chosen": 0.9939554333686829,
"log_odds_ratio": -0.4331156313419342,
"logits/chosen": -2.4889018535614014,
"logits/rejected": -2.2245144844055176,
"logps/chosen": -0.5625091791152954,
"logps/rejected": -1.159073829650879,
"loss": 1.3688,
"nll_loss": 0.9356663823127747,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.5625091791152954,
"rewards/margins": 0.5965645909309387,
"rewards/rejected": -1.159073829650879,
"step": 370
},
{
"epoch": 0.3939592908732764,
"grad_norm": 23.625,
"learning_rate": 1.5189695737812151e-06,
"log_odds_chosen": 1.057094931602478,
"log_odds_ratio": -0.4173505902290344,
"logits/chosen": -2.63775634765625,
"logits/rejected": -2.2736358642578125,
"logps/chosen": -0.5382205843925476,
"logps/rejected": -1.1550116539001465,
"loss": 1.3662,
"nll_loss": 0.9488565325737,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.5382205843925476,
"rewards/margins": 0.6167910099029541,
"rewards/rejected": -1.1550116539001465,
"step": 375
},
{
"epoch": 0.39921208141825343,
"grad_norm": 20.375,
"learning_rate": 1.5031787217290216e-06,
"log_odds_chosen": 1.2109272480010986,
"log_odds_ratio": -0.40476536750793457,
"logits/chosen": -2.441784143447876,
"logits/rejected": -2.141080856323242,
"logps/chosen": -0.5574549436569214,
"logps/rejected": -1.3256219625473022,
"loss": 1.3395,
"nll_loss": 0.9347711801528931,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.5574549436569214,
"rewards/margins": 0.7681670188903809,
"rewards/rejected": -1.3256219625473022,
"step": 380
},
{
"epoch": 0.40446487196323044,
"grad_norm": 22.125,
"learning_rate": 1.487218038438377e-06,
"log_odds_chosen": 1.0492345094680786,
"log_odds_ratio": -0.41920414566993713,
"logits/chosen": -2.4877960681915283,
"logits/rejected": -2.2220120429992676,
"logps/chosen": -0.5476792454719543,
"logps/rejected": -1.1791220903396606,
"loss": 1.3255,
"nll_loss": 0.9063073992729187,
"rewards/accuracies": 0.8218749761581421,
"rewards/chosen": -0.5476792454719543,
"rewards/margins": 0.6314427256584167,
"rewards/rejected": -1.1791220903396606,
"step": 385
},
{
"epoch": 0.40971766250820746,
"grad_norm": 52.5,
"learning_rate": 1.4710929109069672e-06,
"log_odds_chosen": 1.1698648929595947,
"log_odds_ratio": -0.4003461003303528,
"logits/chosen": -2.450030565261841,
"logits/rejected": -2.1449716091156006,
"logps/chosen": -0.5605112314224243,
"logps/rejected": -1.2878248691558838,
"loss": 1.3523,
"nll_loss": 0.9519191980361938,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.5605112314224243,
"rewards/margins": 0.7273136377334595,
"rewards/rejected": -1.2878248691558838,
"step": 390
},
{
"epoch": 0.41497045305318453,
"grad_norm": 23.75,
"learning_rate": 1.4548087816351614e-06,
"log_odds_chosen": 1.1297777891159058,
"log_odds_ratio": -0.41146859526634216,
"logits/chosen": -2.514195442199707,
"logits/rejected": -2.1877148151397705,
"logps/chosen": -0.5281041860580444,
"logps/rejected": -1.2085294723510742,
"loss": 1.2817,
"nll_loss": 0.8702155947685242,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.5281041860580444,
"rewards/margins": 0.6804252862930298,
"rewards/rejected": -1.2085294723510742,
"step": 395
},
{
"epoch": 0.42022324359816154,
"grad_norm": 68.0,
"learning_rate": 1.4383711467890773e-06,
"log_odds_chosen": 1.1593742370605469,
"log_odds_ratio": -0.4072793424129486,
"logits/chosen": -2.410384178161621,
"logits/rejected": -2.1880173683166504,
"logps/chosen": -0.5577239990234375,
"logps/rejected": -1.2925007343292236,
"loss": 1.2823,
"nll_loss": 0.8749955892562866,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.5577239990234375,
"rewards/margins": 0.7347767353057861,
"rewards/rejected": -1.2925007343292236,
"step": 400
},
{
"epoch": 0.42547603414313856,
"grad_norm": 26.75,
"learning_rate": 1.4217855543455323e-06,
"log_odds_chosen": 1.0840833187103271,
"log_odds_ratio": -0.4106718599796295,
"logits/chosen": -2.384483575820923,
"logits/rejected": -2.11120343208313,
"logps/chosen": -0.5574430227279663,
"logps/rejected": -1.2079960107803345,
"loss": 1.3143,
"nll_loss": 0.9036461710929871,
"rewards/accuracies": 0.8343750238418579,
"rewards/chosen": -0.5574430227279663,
"rewards/margins": 0.6505529880523682,
"rewards/rejected": -1.2079960107803345,
"step": 405
},
{
"epoch": 0.4307288246881156,
"grad_norm": 22.625,
"learning_rate": 1.4050576022195082e-06,
"log_odds_chosen": 0.8836471438407898,
"log_odds_ratio": -0.4627167582511902,
"logits/chosen": -2.4845831394195557,
"logits/rejected": -2.3066840171813965,
"logps/chosen": -0.5467715263366699,
"logps/rejected": -1.0581128597259521,
"loss": 1.3641,
"nll_loss": 0.9014018774032593,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.5467715263366699,
"rewards/margins": 0.5113412141799927,
"rewards/rejected": -1.0581128597259521,
"step": 410
},
{
"epoch": 0.4359816152330926,
"grad_norm": 34.0,
"learning_rate": 1.3881929363747626e-06,
"log_odds_chosen": 1.0594258308410645,
"log_odds_ratio": -0.4148578643798828,
"logits/chosen": -2.3405816555023193,
"logits/rejected": -2.115149974822998,
"logps/chosen": -0.5290128588676453,
"logps/rejected": -1.1592894792556763,
"loss": 1.3394,
"nll_loss": 0.9245734214782715,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.5290128588676453,
"rewards/margins": 0.6302765607833862,
"rewards/rejected": -1.1592894792556763,
"step": 415
},
{
"epoch": 0.4412344057780696,
"grad_norm": 47.0,
"learning_rate": 1.3711972489182206e-06,
"log_odds_chosen": 1.4167802333831787,
"log_odds_ratio": -0.3603227734565735,
"logits/chosen": -2.4658875465393066,
"logits/rejected": -2.18940806388855,
"logps/chosen": -0.5862340331077576,
"logps/rejected": -1.5004864931106567,
"loss": 1.305,
"nll_loss": 0.9447038769721985,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.5862340331077576,
"rewards/margins": 0.9142524003982544,
"rewards/rejected": -1.5004864931106567,
"step": 420
},
{
"epoch": 0.4464871963230466,
"grad_norm": 19.75,
"learning_rate": 1.3540762761787936e-06,
"log_odds_chosen": 1.2667293548583984,
"log_odds_ratio": -0.3922019898891449,
"logits/chosen": -2.449897289276123,
"logits/rejected": -2.1496291160583496,
"logps/chosen": -0.5754435658454895,
"logps/rejected": -1.3866373300552368,
"loss": 1.2676,
"nll_loss": 0.8754428625106812,
"rewards/accuracies": 0.859375,
"rewards/chosen": -0.5754435658454895,
"rewards/margins": 0.8111938238143921,
"rewards/rejected": -1.3866373300552368,
"step": 425
},
{
"epoch": 0.45173998686802364,
"grad_norm": 39.0,
"learning_rate": 1.3368357967712725e-06,
"log_odds_chosen": 1.156019687652588,
"log_odds_ratio": -0.395340234041214,
"logits/chosen": -2.553677797317505,
"logits/rejected": -2.2673325538635254,
"logps/chosen": -0.5371165871620178,
"logps/rejected": -1.2541286945343018,
"loss": 1.3117,
"nll_loss": 0.9164005517959595,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.5371165871620178,
"rewards/margins": 0.7170120477676392,
"rewards/rejected": -1.2541286945343018,
"step": 430
},
{
"epoch": 0.45699277741300065,
"grad_norm": 44.25,
"learning_rate": 1.3194816296459482e-06,
"log_odds_chosen": 1.1215965747833252,
"log_odds_ratio": -0.40178972482681274,
"logits/chosen": -2.4841268062591553,
"logits/rejected": -2.2464358806610107,
"logps/chosen": -0.6227961182594299,
"logps/rejected": -1.3194401264190674,
"loss": 1.3687,
"nll_loss": 0.9668703079223633,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.6227961182594299,
"rewards/margins": 0.6966440081596375,
"rewards/rejected": -1.3194401264190674,
"step": 435
},
{
"epoch": 0.46224556795797767,
"grad_norm": 30.625,
"learning_rate": 1.302019632124619e-06,
"log_odds_chosen": 1.4459072351455688,
"log_odds_ratio": -0.3312341868877411,
"logits/chosen": -2.497469902038574,
"logits/rejected": -2.215177297592163,
"logps/chosen": -0.5155361294746399,
"logps/rejected": -1.4294028282165527,
"loss": 1.227,
"nll_loss": 0.8957819938659668,
"rewards/accuracies": 0.8968750238418579,
"rewards/chosen": -0.5155361294746399,
"rewards/margins": 0.9138666391372681,
"rewards/rejected": -1.4294028282165527,
"step": 440
},
{
"epoch": 0.4674983585029547,
"grad_norm": 27.5,
"learning_rate": 1.284455697923646e-06,
"log_odds_chosen": 1.5342215299606323,
"log_odds_ratio": -0.3261391222476959,
"logits/chosen": -2.5461294651031494,
"logits/rejected": -2.2099266052246094,
"logps/chosen": -0.5843450427055359,
"logps/rejected": -1.5760066509246826,
"loss": 1.312,
"nll_loss": 0.9858700037002563,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.5843450427055359,
"rewards/margins": 0.991661548614502,
"rewards/rejected": -1.5760066509246826,
"step": 445
},
{
"epoch": 0.4727511490479317,
"grad_norm": 43.0,
"learning_rate": 1.2667957551647261e-06,
"log_odds_chosen": 1.2222964763641357,
"log_odds_ratio": -0.3712048828601837,
"logits/chosen": -2.5557785034179688,
"logits/rejected": -2.261915922164917,
"logps/chosen": -0.5360510945320129,
"logps/rejected": -1.2696157693862915,
"loss": 1.233,
"nll_loss": 0.8618295788764954,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -0.5360510945320129,
"rewards/margins": 0.7335647344589233,
"rewards/rejected": -1.2696157693862915,
"step": 450
},
{
"epoch": 0.4780039395929087,
"grad_norm": 50.5,
"learning_rate": 1.24904576437405e-06,
"log_odds_chosen": 1.1964861154556274,
"log_odds_ratio": -0.380424439907074,
"logits/chosen": -2.387500762939453,
"logits/rejected": -2.2171878814697266,
"logps/chosen": -0.5144879221916199,
"logps/rejected": -1.2391068935394287,
"loss": 1.182,
"nll_loss": 0.801527202129364,
"rewards/accuracies": 0.8843749761581421,
"rewards/chosen": -0.5144879221916199,
"rewards/margins": 0.7246190309524536,
"rewards/rejected": -1.2391068935394287,
"step": 455
},
{
"epoch": 0.4832567301378857,
"grad_norm": 34.5,
"learning_rate": 1.2312117164705265e-06,
"log_odds_chosen": 1.319461703300476,
"log_odds_ratio": -0.37714654207229614,
"logits/chosen": -2.5138354301452637,
"logits/rejected": -2.2482171058654785,
"logps/chosen": -0.5467159748077393,
"logps/rejected": -1.3964442014694214,
"loss": 1.2877,
"nll_loss": 0.9105404019355774,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.5467159748077393,
"rewards/margins": 0.8497281074523926,
"rewards/rejected": -1.3964442014694214,
"step": 460
},
{
"epoch": 0.4885095206828628,
"grad_norm": 43.5,
"learning_rate": 1.2132996307437468e-06,
"log_odds_chosen": 1.3355519771575928,
"log_odds_ratio": -0.3902519941329956,
"logits/chosen": -2.482901096343994,
"logits/rejected": -2.2286696434020996,
"logps/chosen": -0.566125750541687,
"logps/rejected": -1.4363183975219727,
"loss": 1.3035,
"nll_loss": 0.9132728576660156,
"rewards/accuracies": 0.815625011920929,
"rewards/chosen": -0.566125750541687,
"rewards/margins": 0.8701925277709961,
"rewards/rejected": -1.4363183975219727,
"step": 465
},
{
"epoch": 0.4937623112278398,
"grad_norm": 83.0,
"learning_rate": 1.1953155528223725e-06,
"log_odds_chosen": 1.1865278482437134,
"log_odds_ratio": -0.392407089471817,
"logits/chosen": -2.425886869430542,
"logits/rejected": -2.155287265777588,
"logps/chosen": -0.5029312968254089,
"logps/rejected": -1.2368618249893188,
"loss": 1.2357,
"nll_loss": 0.8432880640029907,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.5029312968254089,
"rewards/margins": 0.7339304089546204,
"rewards/rejected": -1.2368618249893188,
"step": 470
},
{
"epoch": 0.4990151017728168,
"grad_norm": 40.5,
"learning_rate": 1.1772655526336367e-06,
"log_odds_chosen": 1.4356929063796997,
"log_odds_ratio": -0.3839671313762665,
"logits/chosen": -2.398430585861206,
"logits/rejected": -2.104560136795044,
"logps/chosen": -0.5578696131706238,
"logps/rejected": -1.5088526010513306,
"loss": 1.2412,
"nll_loss": 0.8572656512260437,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.5578696131706238,
"rewards/margins": 0.9509830474853516,
"rewards/rejected": -1.5088526010513306,
"step": 475
},
{
"epoch": 0.5042678923177938,
"grad_norm": 28.25,
"learning_rate": 1.1591557223546393e-06,
"log_odds_chosen": 1.148279070854187,
"log_odds_ratio": -0.3996050953865051,
"logits/chosen": -2.365521192550659,
"logits/rejected": -2.152665615081787,
"logps/chosen": -0.566467821598053,
"logps/rejected": -1.2856696844100952,
"loss": 1.3237,
"nll_loss": 0.9241225123405457,
"rewards/accuracies": 0.8531249761581421,
"rewards/chosen": -0.566467821598053,
"rewards/margins": 0.719201922416687,
"rewards/rejected": -1.2856696844100952,
"step": 480
},
{
"epoch": 0.5095206828627709,
"grad_norm": 36.25,
"learning_rate": 1.1409921743561381e-06,
"log_odds_chosen": 1.1759016513824463,
"log_odds_ratio": -0.41472458839416504,
"logits/chosen": -2.404526472091675,
"logits/rejected": -2.2163596153259277,
"logps/chosen": -0.5324310064315796,
"logps/rejected": -1.2714060544967651,
"loss": 1.293,
"nll_loss": 0.8782441020011902,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.5324310064315796,
"rewards/margins": 0.7389749884605408,
"rewards/rejected": -1.2714060544967651,
"step": 485
},
{
"epoch": 0.5147734734077478,
"grad_norm": 223.0,
"learning_rate": 1.1227810391395199e-06,
"log_odds_chosen": 1.385846734046936,
"log_odds_ratio": -0.3814238905906677,
"logits/chosen": -2.4934306144714355,
"logits/rejected": -2.2085797786712646,
"logps/chosen": -0.5657092928886414,
"logps/rejected": -1.4650784730911255,
"loss": 1.2852,
"nll_loss": 0.9037421345710754,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.5657092928886414,
"rewards/margins": 0.8993691205978394,
"rewards/rejected": -1.4650784730911255,
"step": 490
},
{
"epoch": 0.5200262639527249,
"grad_norm": 27.625,
"learning_rate": 1.1045284632676535e-06,
"log_odds_chosen": 1.637117624282837,
"log_odds_ratio": -0.36074963212013245,
"logits/chosen": -2.505157947540283,
"logits/rejected": -2.18147611618042,
"logps/chosen": -0.5794259905815125,
"logps/rejected": -1.7134405374526978,
"loss": 1.2555,
"nll_loss": 0.8947887420654297,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.5794259905815125,
"rewards/margins": 1.1340144872665405,
"rewards/rejected": -1.7134405374526978,
"step": 495
},
{
"epoch": 0.525279054497702,
"grad_norm": 25.375,
"learning_rate": 1.0862406072903223e-06,
"log_odds_chosen": 1.4640438556671143,
"log_odds_ratio": -0.36846035718917847,
"logits/chosen": -2.5681748390197754,
"logits/rejected": -2.232964038848877,
"logps/chosen": -0.5701361298561096,
"logps/rejected": -1.5233440399169922,
"loss": 1.2435,
"nll_loss": 0.8750120997428894,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.5701361298561096,
"rewards/margins": 0.9532078504562378,
"rewards/rejected": -1.5233440399169922,
"step": 500
},
{
"epoch": 0.5305318450426789,
"grad_norm": 23.75,
"learning_rate": 1.067923643664936e-06,
"log_odds_chosen": 1.4654853343963623,
"log_odds_ratio": -0.35504215955734253,
"logits/chosen": -2.502295970916748,
"logits/rejected": -2.181178569793701,
"logps/chosen": -0.5419307947158813,
"logps/rejected": -1.5056110620498657,
"loss": 1.2431,
"nll_loss": 0.8880621790885925,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.5419307947158813,
"rewards/margins": 0.9636803865432739,
"rewards/rejected": -1.5056110620498657,
"step": 505
},
{
"epoch": 0.535784635587656,
"grad_norm": 35.0,
"learning_rate": 1.0495837546732222e-06,
"log_odds_chosen": 1.5194576978683472,
"log_odds_ratio": -0.37253108620643616,
"logits/chosen": -2.413229465484619,
"logits/rejected": -2.184525728225708,
"logps/chosen": -0.5820909738540649,
"logps/rejected": -1.6039245128631592,
"loss": 1.3383,
"nll_loss": 0.9657222032546997,
"rewards/accuracies": 0.8531249761581421,
"rewards/chosen": -0.5820909738540649,
"rewards/margins": 1.0218335390090942,
"rewards/rejected": -1.6039245128631592,
"step": 510
},
{
"epoch": 0.541037426132633,
"grad_norm": 32.25,
"learning_rate": 1.0312271303346038e-06,
"log_odds_chosen": 1.314542531967163,
"log_odds_ratio": -0.396615594625473,
"logits/chosen": -2.545009136199951,
"logits/rejected": -2.301347017288208,
"logps/chosen": -0.562983512878418,
"logps/rejected": -1.4147989749908447,
"loss": 1.3396,
"nll_loss": 0.9429594278335571,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.562983512878418,
"rewards/margins": 0.851815402507782,
"rewards/rejected": -1.4147989749908447,
"step": 515
},
{
"epoch": 0.54629021667761,
"grad_norm": 47.75,
"learning_rate": 1.0128599663169628e-06,
"log_odds_chosen": 1.084162950515747,
"log_odds_ratio": -0.4125159680843353,
"logits/chosen": -2.4878952503204346,
"logits/rejected": -2.245314359664917,
"logps/chosen": -0.5130459666252136,
"logps/rejected": -1.1407145261764526,
"loss": 1.3142,
"nll_loss": 0.901726245880127,
"rewards/accuracies": 0.8343750238418579,
"rewards/chosen": -0.5130459666252136,
"rewards/margins": 0.6276686191558838,
"rewards/rejected": -1.1407145261764526,
"step": 520
},
{
"epoch": 0.551543007222587,
"grad_norm": 74.0,
"learning_rate": 9.944884618454995e-07,
"log_odds_chosen": 1.5892114639282227,
"log_odds_ratio": -0.3318895697593689,
"logits/chosen": -2.5057709217071533,
"logits/rejected": -2.110414505004883,
"logps/chosen": -0.5387485027313232,
"logps/rejected": -1.5842351913452148,
"loss": 1.2507,
"nll_loss": 0.9187744855880737,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.5387485027313232,
"rewards/margins": 1.0454866886138916,
"rewards/rejected": -1.5842351913452148,
"step": 525
},
{
"epoch": 0.556795797767564,
"grad_norm": 73.0,
"learning_rate": 9.7611881761039e-07,
"log_odds_chosen": 1.6785354614257812,
"log_odds_ratio": -0.3325541019439697,
"logits/chosen": -2.462970733642578,
"logits/rejected": -2.220999240875244,
"logps/chosen": -0.6112784147262573,
"logps/rejected": -1.7486165761947632,
"loss": 1.3345,
"nll_loss": 1.0019125938415527,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.6112784147262573,
"rewards/margins": 1.1373381614685059,
"rewards/rejected": -1.7486165761947632,
"step": 530
},
{
"epoch": 0.562048588312541,
"grad_norm": 36.0,
"learning_rate": 9.57757233673949e-07,
"log_odds_chosen": 1.4563804864883423,
"log_odds_ratio": -0.36100301146507263,
"logits/chosen": -2.4625449180603027,
"logits/rejected": -2.1974194049835205,
"logps/chosen": -0.5516290664672852,
"logps/rejected": -1.515852928161621,
"loss": 1.2346,
"nll_loss": 0.8735913038253784,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.5516290664672852,
"rewards/margins": 0.9642238616943359,
"rewards/rejected": -1.515852928161621,
"step": 535
},
{
"epoch": 0.5673013788575181,
"grad_norm": 36.0,
"learning_rate": 9.394099073780066e-07,
"log_odds_chosen": 1.4258034229278564,
"log_odds_ratio": -0.36102384328842163,
"logits/chosen": -2.5518240928649902,
"logits/rejected": -2.2731943130493164,
"logps/chosen": -0.5590797662734985,
"logps/rejected": -1.4916408061981201,
"loss": 1.2649,
"nll_loss": 0.9038828015327454,
"rewards/accuracies": 0.8343750238418579,
"rewards/chosen": -0.5590797662734985,
"rewards/margins": 0.9325610399246216,
"rewards/rejected": -1.4916408061981201,
"step": 540
},
{
"epoch": 0.572554169402495,
"grad_norm": 64.0,
"learning_rate": 9.210830312521991e-07,
"log_odds_chosen": 1.605653166770935,
"log_odds_ratio": -0.338408887386322,
"logits/chosen": -2.5818705558776855,
"logits/rejected": -2.311086416244507,
"logps/chosen": -0.5466338992118835,
"logps/rejected": -1.6157076358795166,
"loss": 1.3041,
"nll_loss": 0.9657169580459595,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.5466338992118835,
"rewards/margins": 1.0690736770629883,
"rewards/rejected": -1.6157076358795166,
"step": 545
},
{
"epoch": 0.5778069599474721,
"grad_norm": 57.25,
"learning_rate": 9.027827909238901e-07,
"log_odds_chosen": 1.8266319036483765,
"log_odds_ratio": -0.3148033320903778,
"logits/chosen": -2.48435115814209,
"logits/rejected": -2.166586399078369,
"logps/chosen": -0.5606757402420044,
"logps/rejected": -1.8262403011322021,
"loss": 1.2896,
"nll_loss": 0.97479248046875,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": -0.5606757402420044,
"rewards/margins": 1.2655649185180664,
"rewards/rejected": -1.8262403011322021,
"step": 550
},
{
"epoch": 0.5830597504924491,
"grad_norm": 47.5,
"learning_rate": 8.845153630304139e-07,
"log_odds_chosen": 1.663627028465271,
"log_odds_ratio": -0.3311775028705597,
"logits/chosen": -2.4467196464538574,
"logits/rejected": -2.2170791625976562,
"logps/chosen": -0.5954256057739258,
"logps/rejected": -1.7486213445663452,
"loss": 1.2756,
"nll_loss": 0.9444006085395813,
"rewards/accuracies": 0.878125011920929,
"rewards/chosen": -0.5954256057739258,
"rewards/margins": 1.1531956195831299,
"rewards/rejected": -1.7486213445663452,
"step": 555
},
{
"epoch": 0.5883125410374261,
"grad_norm": 45.5,
"learning_rate": 8.662869131343606e-07,
"log_odds_chosen": 1.4104127883911133,
"log_odds_ratio": -0.39170485734939575,
"logits/chosen": -2.5256340503692627,
"logits/rejected": -2.213099241256714,
"logps/chosen": -0.5794434547424316,
"logps/rejected": -1.5348830223083496,
"loss": 1.3375,
"nll_loss": 0.9457686543464661,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.5794434547424316,
"rewards/margins": 0.955439567565918,
"rewards/rejected": -1.5348830223083496,
"step": 560
},
{
"epoch": 0.5935653315824031,
"grad_norm": 72.0,
"learning_rate": 8.481035936425926e-07,
"log_odds_chosen": 1.1931443214416504,
"log_odds_ratio": -0.3968736529350281,
"logits/chosen": -2.56657338142395,
"logits/rejected": -2.191765785217285,
"logps/chosen": -0.5020140409469604,
"logps/rejected": -1.227325201034546,
"loss": 1.2792,
"nll_loss": 0.8822978138923645,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.5020140409469604,
"rewards/margins": 0.7253111600875854,
"rewards/rejected": -1.227325201034546,
"step": 565
},
{
"epoch": 0.5988181221273802,
"grad_norm": 37.25,
"learning_rate": 8.29971541729707e-07,
"log_odds_chosen": 1.549736738204956,
"log_odds_ratio": -0.3515177369117737,
"logits/chosen": -2.526639461517334,
"logits/rejected": -2.2129909992218018,
"logps/chosen": -0.5579209923744202,
"logps/rejected": -1.5522905588150024,
"loss": 1.2671,
"nll_loss": 0.9156067967414856,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.5579209923744202,
"rewards/margins": 0.9943695068359375,
"rewards/rejected": -1.5522905588150024,
"step": 570
},
{
"epoch": 0.6040709126723572,
"grad_norm": 78.5,
"learning_rate": 8.118968772666338e-07,
"log_odds_chosen": 1.9918029308319092,
"log_odds_ratio": -0.33105817437171936,
"logits/chosen": -2.5553669929504395,
"logits/rejected": -2.255253791809082,
"logps/chosen": -0.6138916015625,
"logps/rejected": -2.058006763458252,
"loss": 1.261,
"nll_loss": 0.9299631118774414,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.6138916015625,
"rewards/margins": 1.4441156387329102,
"rewards/rejected": -2.058006763458252,
"step": 575
},
{
"epoch": 0.6093237032173342,
"grad_norm": 32.25,
"learning_rate": 7.938857007550796e-07,
"log_odds_chosen": 1.5095994472503662,
"log_odds_ratio": -0.36659660935401917,
"logits/chosen": -2.4949142932891846,
"logits/rejected": -2.217616558074951,
"logps/chosen": -0.5693143606185913,
"logps/rejected": -1.5770564079284668,
"loss": 1.2795,
"nll_loss": 0.9128750562667847,
"rewards/accuracies": 0.8531249761581421,
"rewards/chosen": -0.5693143606185913,
"rewards/margins": 1.007741928100586,
"rewards/rejected": -1.5770564079284668,
"step": 580
},
{
"epoch": 0.6145764937623113,
"grad_norm": 44.5,
"learning_rate": 7.759440912685042e-07,
"log_odds_chosen": 1.313231348991394,
"log_odds_ratio": -0.39206627011299133,
"logits/chosen": -2.4366495609283447,
"logits/rejected": -2.1927928924560547,
"logps/chosen": -0.5398006439208984,
"logps/rejected": -1.4002869129180908,
"loss": 1.2987,
"nll_loss": 0.9065971374511719,
"rewards/accuracies": 0.8218749761581421,
"rewards/chosen": -0.5398006439208984,
"rewards/margins": 0.8604865074157715,
"rewards/rejected": -1.4002869129180908,
"step": 585
},
{
"epoch": 0.6198292843072882,
"grad_norm": 41.75,
"learning_rate": 7.580781044003324e-07,
"log_odds_chosen": 1.5099523067474365,
"log_odds_ratio": -0.37858808040618896,
"logits/chosen": -2.5282700061798096,
"logits/rejected": -2.1985023021698,
"logps/chosen": -0.554128110408783,
"logps/rejected": -1.5762214660644531,
"loss": 1.2642,
"nll_loss": 0.885593593120575,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.554128110408783,
"rewards/margins": 1.022093415260315,
"rewards/rejected": -1.5762214660644531,
"step": 590
},
{
"epoch": 0.6250820748522653,
"grad_norm": 94.0,
"learning_rate": 7.402937702200904e-07,
"log_odds_chosen": 1.7455905675888062,
"log_odds_ratio": -0.3350276052951813,
"logits/chosen": -2.5306236743927,
"logits/rejected": -2.249689817428589,
"logps/chosen": -0.5238341093063354,
"logps/rejected": -1.7180259227752686,
"loss": 1.2212,
"nll_loss": 0.8861449956893921,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.5238341093063354,
"rewards/margins": 1.1941916942596436,
"rewards/rejected": -1.7180259227752686,
"step": 595
},
{
"epoch": 0.6303348653972423,
"grad_norm": 57.0,
"learning_rate": 7.225970912381556e-07,
"log_odds_chosen": 1.5003291368484497,
"log_odds_ratio": -0.391081303358078,
"logits/chosen": -2.381641387939453,
"logits/rejected": -2.1322736740112305,
"logps/chosen": -0.5944348573684692,
"logps/rejected": -1.6424591541290283,
"loss": 1.3066,
"nll_loss": 0.9154736399650574,
"rewards/accuracies": 0.8218749761581421,
"rewards/chosen": -0.5944348573684692,
"rewards/margins": 1.048024296760559,
"rewards/rejected": -1.6424591541290283,
"step": 600
},
{
"epoch": 0.6355876559422193,
"grad_norm": 41.0,
"learning_rate": 7.049940403798089e-07,
"log_odds_chosen": 1.531709909439087,
"log_odds_ratio": -0.3830433487892151,
"logits/chosen": -2.4697697162628174,
"logits/rejected": -2.217533826828003,
"logps/chosen": -0.5523134469985962,
"logps/rejected": -1.5712653398513794,
"loss": 1.314,
"nll_loss": 0.9309525489807129,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.5523134469985962,
"rewards/margins": 1.0189517736434937,
"rewards/rejected": -1.5712653398513794,
"step": 605
},
{
"epoch": 0.6408404464871963,
"grad_norm": 46.5,
"learning_rate": 6.874905589692733e-07,
"log_odds_chosen": 1.6414533853530884,
"log_odds_ratio": -0.34355098009109497,
"logits/chosen": -2.509610176086426,
"logits/rejected": -2.1736972332000732,
"logps/chosen": -0.5539788007736206,
"logps/rejected": -1.6842210292816162,
"loss": 1.2389,
"nll_loss": 0.8953197598457336,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": -0.5539788007736206,
"rewards/margins": 1.1302422285079956,
"rewards/rejected": -1.6842210292816162,
"step": 610
},
{
"epoch": 0.6460932370321734,
"grad_norm": 32.25,
"learning_rate": 6.700925547244171e-07,
"log_odds_chosen": 1.9415044784545898,
"log_odds_ratio": -0.31946122646331787,
"logits/chosen": -2.4332690238952637,
"logits/rejected": -2.26471209526062,
"logps/chosen": -0.6300308704376221,
"logps/rejected": -2.049290180206299,
"loss": 1.2482,
"nll_loss": 0.9287741780281067,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.6300308704376221,
"rewards/margins": 1.4192593097686768,
"rewards/rejected": -2.049290180206299,
"step": 615
},
{
"epoch": 0.6513460275771503,
"grad_norm": 32.5,
"learning_rate": 6.528058997627995e-07,
"log_odds_chosen": 1.9388889074325562,
"log_odds_ratio": -0.3166273534297943,
"logits/chosen": -2.5412425994873047,
"logits/rejected": -2.1768264770507812,
"logps/chosen": -0.5474293828010559,
"logps/rejected": -1.9378162622451782,
"loss": 1.2866,
"nll_loss": 0.9699424505233765,
"rewards/accuracies": 0.878125011920929,
"rewards/chosen": -0.5474293828010559,
"rewards/margins": 1.3903871774673462,
"rewards/rejected": -1.9378162622451782,
"step": 620
},
{
"epoch": 0.6565988181221274,
"grad_norm": 40.0,
"learning_rate": 6.35636428619734e-07,
"log_odds_chosen": 1.7123737335205078,
"log_odds_ratio": -0.34193840622901917,
"logits/chosen": -2.5048129558563232,
"logits/rejected": -2.1842281818389893,
"logps/chosen": -0.5440694093704224,
"logps/rejected": -1.7357890605926514,
"loss": 1.2903,
"nll_loss": 0.9483565092086792,
"rewards/accuracies": 0.859375,
"rewards/chosen": -0.5440694093704224,
"rewards/margins": 1.1917197704315186,
"rewards/rejected": -1.7357890605926514,
"step": 625
},
{
"epoch": 0.6618516086671044,
"grad_norm": 46.25,
"learning_rate": 6.185899362790338e-07,
"log_odds_chosen": 1.6516172885894775,
"log_odds_ratio": -0.3549567461013794,
"logits/chosen": -2.4393770694732666,
"logits/rejected": -2.138049602508545,
"logps/chosen": -0.5555499196052551,
"logps/rejected": -1.7016226053237915,
"loss": 1.2573,
"nll_loss": 0.9023006558418274,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.5555499196052551,
"rewards/margins": 1.1460726261138916,
"rewards/rejected": -1.7016226053237915,
"step": 630
},
{
"epoch": 0.6671043992120814,
"grad_norm": 101.5,
"learning_rate": 6.016721762171098e-07,
"log_odds_chosen": 1.636366605758667,
"log_odds_ratio": -0.3687242567539215,
"logits/chosen": -2.469954252243042,
"logits/rejected": -2.2552268505096436,
"logps/chosen": -0.6394462585449219,
"logps/rejected": -1.7851154804229736,
"loss": 1.3697,
"nll_loss": 1.000967025756836,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.6394462585449219,
"rewards/margins": 1.1456692218780518,
"rewards/rejected": -1.7851154804229736,
"step": 635
},
{
"epoch": 0.6723571897570584,
"grad_norm": 64.5,
"learning_rate": 5.848888584610726e-07,
"log_odds_chosen": 1.693683385848999,
"log_odds_ratio": -0.34921011328697205,
"logits/chosen": -2.486765146255493,
"logits/rejected": -2.2645862102508545,
"logps/chosen": -0.5731798410415649,
"logps/rejected": -1.7742217779159546,
"loss": 1.261,
"nll_loss": 0.9118081331253052,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.5731798410415649,
"rewards/margins": 1.2010419368743896,
"rewards/rejected": -1.7742217779159546,
"step": 640
},
{
"epoch": 0.6776099803020355,
"grad_norm": 58.5,
"learning_rate": 5.682456476615072e-07,
"log_odds_chosen": 1.4461402893066406,
"log_odds_ratio": -0.3787740170955658,
"logits/chosen": -2.355269432067871,
"logits/rejected": -2.16302490234375,
"logps/chosen": -0.5690776705741882,
"logps/rejected": -1.5551892518997192,
"loss": 1.2771,
"nll_loss": 0.8983281254768372,
"rewards/accuracies": 0.846875011920929,
"rewards/chosen": -0.5690776705741882,
"rewards/margins": 0.9861115217208862,
"rewards/rejected": -1.5551892518997192,
"step": 645
},
{
"epoch": 0.6828627708470125,
"grad_norm": 36.75,
"learning_rate": 5.517481611805539e-07,
"log_odds_chosen": 1.5578912496566772,
"log_odds_ratio": -0.35105592012405396,
"logits/chosen": -2.3847219944000244,
"logits/rejected": -2.130415439605713,
"logps/chosen": -0.537613570690155,
"logps/rejected": -1.5832931995391846,
"loss": 1.246,
"nll_loss": 0.8949264287948608,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.537613570690155,
"rewards/margins": 1.0456795692443848,
"rewards/rejected": -1.5832931995391846,
"step": 650
},
{
"epoch": 0.6881155613919895,
"grad_norm": 28.25,
"learning_rate": 5.354019671959599e-07,
"log_odds_chosen": 1.4725126028060913,
"log_odds_ratio": -0.38070547580718994,
"logits/chosen": -2.3801114559173584,
"logits/rejected": -2.134171724319458,
"logps/chosen": -0.5319515466690063,
"logps/rejected": -1.5217872858047485,
"loss": 1.3054,
"nll_loss": 0.9246999621391296,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.5319515466690063,
"rewards/margins": 0.9898357391357422,
"rewards/rejected": -1.5217872858047485,
"step": 655
},
{
"epoch": 0.6933683519369666,
"grad_norm": 35.5,
"learning_rate": 5.192125828217202e-07,
"log_odds_chosen": 1.628064513206482,
"log_odds_ratio": -0.370327889919281,
"logits/chosen": -2.5233168601989746,
"logits/rejected": -2.1562933921813965,
"logps/chosen": -0.5629066824913025,
"logps/rejected": -1.6909490823745728,
"loss": 1.2606,
"nll_loss": 0.8903215527534485,
"rewards/accuracies": 0.846875011920929,
"rewards/chosen": -0.5629066824913025,
"rewards/margins": 1.128042459487915,
"rewards/rejected": -1.6909490823745728,
"step": 660
},
{
"epoch": 0.6986211424819435,
"grad_norm": 51.5,
"learning_rate": 5.031854722459652e-07,
"log_odds_chosen": 1.8480112552642822,
"log_odds_ratio": -0.3127003610134125,
"logits/chosen": -2.4370510578155518,
"logits/rejected": -2.0890867710113525,
"logps/chosen": -0.5302228927612305,
"logps/rejected": -1.8121706247329712,
"loss": 1.2074,
"nll_loss": 0.8947356939315796,
"rewards/accuracies": 0.878125011920929,
"rewards/chosen": -0.5302228927612305,
"rewards/margins": 1.2819478511810303,
"rewards/rejected": -1.8121706247329712,
"step": 665
},
{
"epoch": 0.7038739330269206,
"grad_norm": 31.5,
"learning_rate": 4.873260448867004e-07,
"log_odds_chosen": 2.02109956741333,
"log_odds_ratio": -0.31728652119636536,
"logits/chosen": -2.470301628112793,
"logits/rejected": -2.2189319133758545,
"logps/chosen": -0.6230054497718811,
"logps/rejected": -2.0598232746124268,
"loss": 1.3239,
"nll_loss": 1.0066121816635132,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.6230054497718811,
"rewards/margins": 1.4368176460266113,
"rewards/rejected": -2.0598232746124268,
"step": 670
},
{
"epoch": 0.7091267235718975,
"grad_norm": 68.0,
"learning_rate": 4.7163965356604117e-07,
"log_odds_chosen": 1.897443413734436,
"log_odds_ratio": -0.3486331105232239,
"logits/chosen": -2.554206132888794,
"logits/rejected": -2.1669750213623047,
"logps/chosen": -0.64203941822052,
"logps/rejected": -2.0166876316070557,
"loss": 1.3553,
"nll_loss": 1.0066633224487305,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.64203941822052,
"rewards/margins": 1.3746483325958252,
"rewards/rejected": -2.0166876316070557,
"step": 675
},
{
"epoch": 0.7143795141168746,
"grad_norm": 34.5,
"learning_rate": 4.561315927035445e-07,
"log_odds_chosen": 1.707550048828125,
"log_odds_ratio": -0.34410637617111206,
"logits/chosen": -2.440441846847534,
"logits/rejected": -2.1145124435424805,
"logps/chosen": -0.5574239492416382,
"logps/rejected": -1.7339591979980469,
"loss": 1.2025,
"nll_loss": 0.8583625555038452,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.5574239492416382,
"rewards/margins": 1.1765353679656982,
"rewards/rejected": -1.7339591979980469,
"step": 680
},
{
"epoch": 0.7196323046618516,
"grad_norm": 64.5,
"learning_rate": 4.408070965292533e-07,
"log_odds_chosen": 1.7007535696029663,
"log_odds_ratio": -0.35346347093582153,
"logits/chosen": -2.456326961517334,
"logits/rejected": -2.1892619132995605,
"logps/chosen": -0.5550821423530579,
"logps/rejected": -1.7205698490142822,
"loss": 1.2778,
"nll_loss": 0.9243642091751099,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.5550821423530579,
"rewards/margins": 1.1654876470565796,
"rewards/rejected": -1.7205698490142822,
"step": 685
},
{
"epoch": 0.7248850952068286,
"grad_norm": 42.25,
"learning_rate": 4.256713373170564e-07,
"log_odds_chosen": 1.5547049045562744,
"log_odds_ratio": -0.36127448081970215,
"logits/chosen": -2.46553373336792,
"logits/rejected": -2.2510862350463867,
"logps/chosen": -0.603643536567688,
"logps/rejected": -1.6664206981658936,
"loss": 1.3272,
"nll_loss": 0.9659638404846191,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.603643536567688,
"rewards/margins": 1.0627771615982056,
"rewards/rejected": -1.6664206981658936,
"step": 690
},
{
"epoch": 0.7301378857518056,
"grad_norm": 38.5,
"learning_rate": 4.1072942363896025e-07,
"log_odds_chosen": 1.6411514282226562,
"log_odds_ratio": -0.3377731442451477,
"logits/chosen": -2.5552942752838135,
"logits/rejected": -2.229196071624756,
"logps/chosen": -0.5576506853103638,
"logps/rejected": -1.6714286804199219,
"loss": 1.2942,
"nll_loss": 0.9564154744148254,
"rewards/accuracies": 0.878125011920929,
"rewards/chosen": -0.5576506853103638,
"rewards/margins": 1.1137781143188477,
"rewards/rejected": -1.6714286804199219,
"step": 695
},
{
"epoch": 0.7353906762967827,
"grad_norm": 26.125,
"learning_rate": 3.9598639864085925e-07,
"log_odds_chosen": 1.2980868816375732,
"log_odds_ratio": -0.3892515301704407,
"logits/chosen": -2.417532444000244,
"logits/rejected": -2.2620291709899902,
"logps/chosen": -0.5406171083450317,
"logps/rejected": -1.3745439052581787,
"loss": 1.283,
"nll_loss": 0.893776535987854,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.5406171083450317,
"rewards/margins": 0.833926796913147,
"rewards/rejected": -1.3745439052581787,
"step": 700
},
{
"epoch": 0.7406434668417596,
"grad_norm": 57.75,
"learning_rate": 3.8144723834039073e-07,
"log_odds_chosen": 1.3730871677398682,
"log_odds_ratio": -0.38403210043907166,
"logits/chosen": -2.492102861404419,
"logits/rejected": -2.1305251121520996,
"logps/chosen": -0.5153442621231079,
"logps/rejected": -1.4106855392456055,
"loss": 1.2797,
"nll_loss": 0.8956896662712097,
"rewards/accuracies": 0.8343750238418579,
"rewards/chosen": -0.5153442621231079,
"rewards/margins": 0.8953412175178528,
"rewards/rejected": -1.4106855392456055,
"step": 705
},
{
"epoch": 0.7458962573867367,
"grad_norm": 28.25,
"learning_rate": 3.6711684994744486e-07,
"log_odds_chosen": 1.7186520099639893,
"log_odds_ratio": -0.33004146814346313,
"logits/chosen": -2.537470817565918,
"logits/rejected": -2.23635196685791,
"logps/chosen": -0.4957657754421234,
"logps/rejected": -1.6590726375579834,
"loss": 1.2277,
"nll_loss": 0.8976136445999146,
"rewards/accuracies": 0.859375,
"rewards/chosen": -0.4957657754421234,
"rewards/margins": 1.1633068323135376,
"rewards/rejected": -1.6590726375579834,
"step": 710
},
{
"epoch": 0.7511490479317138,
"grad_norm": 50.25,
"learning_rate": 3.530000702078999e-07,
"log_odds_chosen": 1.9104875326156616,
"log_odds_ratio": -0.30225199460983276,
"logits/chosen": -2.41103196144104,
"logits/rejected": -2.163609743118286,
"logps/chosen": -0.535643458366394,
"logps/rejected": -1.8592544794082642,
"loss": 1.2363,
"nll_loss": 0.9340142011642456,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.535643458366394,
"rewards/margins": 1.3236110210418701,
"rewards/rejected": -1.8592544794082642,
"step": 715
},
{
"epoch": 0.7564018384766907,
"grad_norm": 215.0,
"learning_rate": 3.391016637711389e-07,
"log_odds_chosen": 1.9387279748916626,
"log_odds_ratio": -0.32732483744621277,
"logits/chosen": -2.503373861312866,
"logits/rejected": -2.184051990509033,
"logps/chosen": -0.6013236045837402,
"logps/rejected": -1.979087233543396,
"loss": 1.2995,
"nll_loss": 0.9722166061401367,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -0.6013236045837402,
"rewards/margins": 1.3777637481689453,
"rewards/rejected": -1.979087233543396,
"step": 720
},
{
"epoch": 0.7616546290216678,
"grad_norm": 64.5,
"learning_rate": 3.2542632158190133e-07,
"log_odds_chosen": 1.8217693567276,
"log_odds_ratio": -0.3460733890533447,
"logits/chosen": -2.4695355892181396,
"logits/rejected": -2.266535758972168,
"logps/chosen": -0.5930324792861938,
"logps/rejected": -1.8648335933685303,
"loss": 1.2692,
"nll_loss": 0.9231220483779907,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.5930324792861938,
"rewards/margins": 1.271801233291626,
"rewards/rejected": -1.8648335933685303,
"step": 725
},
{
"epoch": 0.7669074195666448,
"grad_norm": 60.0,
"learning_rate": 3.1197865929701017e-07,
"log_odds_chosen": 1.9611870050430298,
"log_odds_ratio": -0.3502156138420105,
"logits/chosen": -2.595439910888672,
"logits/rejected": -2.2361018657684326,
"logps/chosen": -0.5836862921714783,
"logps/rejected": -2.0304791927337646,
"loss": 1.3186,
"nll_loss": 0.9683855175971985,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.5836862921714783,
"rewards/margins": 1.4467928409576416,
"rewards/rejected": -2.0304791927337646,
"step": 730
},
{
"epoch": 0.7721602101116218,
"grad_norm": 63.75,
"learning_rate": 2.987632157275114e-07,
"log_odds_chosen": 1.6977773904800415,
"log_odds_ratio": -0.3493327796459198,
"logits/chosen": -2.5089340209960938,
"logits/rejected": -2.2651724815368652,
"logps/chosen": -0.5790574550628662,
"logps/rejected": -1.748196005821228,
"loss": 1.2328,
"nll_loss": 0.883512020111084,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.5790574550628662,
"rewards/margins": 1.1691386699676514,
"rewards/rejected": -1.748196005821228,
"step": 735
},
{
"epoch": 0.7774130006565988,
"grad_norm": 47.0,
"learning_rate": 2.8578445130674833e-07,
"log_odds_chosen": 1.5758211612701416,
"log_odds_ratio": -0.3468172550201416,
"logits/chosen": -2.4574217796325684,
"logits/rejected": -2.2445011138916016,
"logps/chosen": -0.5336965322494507,
"logps/rejected": -1.6166375875473022,
"loss": 1.2211,
"nll_loss": 0.8742717504501343,
"rewards/accuracies": 0.8531249761581421,
"rewards/chosen": -0.5336965322494507,
"rewards/margins": 1.082940936088562,
"rewards/rejected": -1.6166375875473022,
"step": 740
},
{
"epoch": 0.7826657912015759,
"grad_norm": 33.25,
"learning_rate": 2.73046746584891e-07,
"log_odds_chosen": 1.6906464099884033,
"log_odds_ratio": -0.3406273126602173,
"logits/chosen": -2.5112786293029785,
"logits/rejected": -2.2304630279541016,
"logps/chosen": -0.5315414667129517,
"logps/rejected": -1.6976295709609985,
"loss": 1.2098,
"nll_loss": 0.8692021369934082,
"rewards/accuracies": 0.859375,
"rewards/chosen": -0.5315414667129517,
"rewards/margins": 1.1660881042480469,
"rewards/rejected": -1.6976295709609985,
"step": 745
},
{
"epoch": 0.7879185817465528,
"grad_norm": 40.25,
"learning_rate": 2.605544007504279e-07,
"log_odds_chosen": 1.7450376749038696,
"log_odds_ratio": -0.32459336519241333,
"logits/chosen": -2.553576946258545,
"logits/rejected": -2.259354591369629,
"logps/chosen": -0.5844911336898804,
"logps/rejected": -1.801825761795044,
"loss": 1.2855,
"nll_loss": 0.9608856439590454,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -0.5844911336898804,
"rewards/margins": 1.217334508895874,
"rewards/rejected": -1.801825761795044,
"step": 750
},
{
"epoch": 0.7931713722915299,
"grad_norm": 36.5,
"learning_rate": 2.4831163017911683e-07,
"log_odds_chosen": 1.651958703994751,
"log_odds_ratio": -0.34634822607040405,
"logits/chosen": -2.405233144760132,
"logits/rejected": -2.138745069503784,
"logps/chosen": -0.5561404228210449,
"logps/rejected": -1.6944749355316162,
"loss": 1.2428,
"nll_loss": 0.8964967727661133,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.5561404228210449,
"rewards/margins": 1.1383345127105713,
"rewards/rejected": -1.6944749355316162,
"step": 755
},
{
"epoch": 0.7984241628365069,
"grad_norm": 46.75,
"learning_rate": 2.3632256701088814e-07,
"log_odds_chosen": 1.698676347732544,
"log_odds_ratio": -0.3407271206378937,
"logits/chosen": -2.5164520740509033,
"logits/rejected": -2.169098377227783,
"logps/chosen": -0.546515166759491,
"logps/rejected": -1.726548433303833,
"loss": 1.2007,
"nll_loss": 0.8599587678909302,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.546515166759491,
"rewards/margins": 1.1800330877304077,
"rewards/rejected": -1.726548433303833,
"step": 760
},
{
"epoch": 0.8036769533814839,
"grad_norm": 31.625,
"learning_rate": 2.245912577551785e-07,
"log_odds_chosen": 1.7021366357803345,
"log_odds_ratio": -0.36240798234939575,
"logits/chosen": -2.583963632583618,
"logits/rejected": -2.3067448139190674,
"logps/chosen": -0.610865592956543,
"logps/rejected": -1.795292854309082,
"loss": 1.3449,
"nll_loss": 0.9824475049972534,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.610865592956543,
"rewards/margins": 1.184427261352539,
"rewards/rejected": -1.795292854309082,
"step": 765
},
{
"epoch": 0.8089297439264609,
"grad_norm": 97.5,
"learning_rate": 2.131216619251659e-07,
"log_odds_chosen": 1.825273871421814,
"log_odds_ratio": -0.3238641917705536,
"logits/chosen": -2.533202648162842,
"logits/rejected": -2.3293657302856445,
"logps/chosen": -0.6178978681564331,
"logps/rejected": -1.9215917587280273,
"loss": 1.3183,
"nll_loss": 0.9943979978561401,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -0.6178978681564331,
"rewards/margins": 1.3036938905715942,
"rewards/rejected": -1.9215917587280273,
"step": 770
},
{
"epoch": 0.814182534471438,
"grad_norm": 63.0,
"learning_rate": 2.0191765070136768e-07,
"log_odds_chosen": 1.8990042209625244,
"log_odds_ratio": -0.3358913064002991,
"logits/chosen": -2.4345898628234863,
"logits/rejected": -2.134831190109253,
"logps/chosen": -0.5476903915405273,
"logps/rejected": -1.865012526512146,
"loss": 1.2756,
"nll_loss": 0.9396783709526062,
"rewards/accuracies": 0.878125011920929,
"rewards/chosen": -0.5476903915405273,
"rewards/margins": 1.317322015762329,
"rewards/rejected": -1.865012526512146,
"step": 775
},
{
"epoch": 0.8194353250164149,
"grad_norm": 102.5,
"learning_rate": 1.9098300562505264e-07,
"log_odds_chosen": 1.6969549655914307,
"log_odds_ratio": -0.3712518811225891,
"logits/chosen": -2.4698281288146973,
"logits/rejected": -2.181797981262207,
"logps/chosen": -0.5788697004318237,
"logps/rejected": -1.7834043502807617,
"loss": 1.2457,
"nll_loss": 0.8744741678237915,
"rewards/accuracies": 0.815625011920929,
"rewards/chosen": -0.5788697004318237,
"rewards/margins": 1.2045344114303589,
"rewards/rejected": -1.7834043502807617,
"step": 780
},
{
"epoch": 0.824688115561392,
"grad_norm": 28.625,
"learning_rate": 1.803214173219072e-07,
"log_odds_chosen": 1.9696476459503174,
"log_odds_ratio": -0.30190950632095337,
"logits/chosen": -2.483811616897583,
"logits/rejected": -2.173767328262329,
"logps/chosen": -0.535027265548706,
"logps/rejected": -1.9312782287597656,
"loss": 1.2045,
"nll_loss": 0.902554988861084,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.535027265548706,
"rewards/margins": 1.3962510824203491,
"rewards/rejected": -1.9312782287597656,
"step": 785
},
{
"epoch": 0.8299409061063691,
"grad_norm": 31.125,
"learning_rate": 1.6993648425638796e-07,
"log_odds_chosen": 1.6274923086166382,
"log_odds_ratio": -0.3982171416282654,
"logits/chosen": -2.5815181732177734,
"logits/rejected": -2.206310987472534,
"logps/chosen": -0.5905428528785706,
"logps/rejected": -1.7506492137908936,
"loss": 1.3427,
"nll_loss": 0.9444986581802368,
"rewards/accuracies": 0.8343750238418579,
"rewards/chosen": -0.5905428528785706,
"rewards/margins": 1.1601064205169678,
"rewards/rejected": -1.7506492137908936,
"step": 790
},
{
"epoch": 0.835193696651346,
"grad_norm": 61.0,
"learning_rate": 1.5983171151717921e-07,
"log_odds_chosen": 1.5922825336456299,
"log_odds_ratio": -0.3533628284931183,
"logits/chosen": -2.4570369720458984,
"logits/rejected": -2.210930824279785,
"logps/chosen": -0.581910252571106,
"logps/rejected": -1.6624376773834229,
"loss": 1.2185,
"nll_loss": 0.8651579022407532,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -0.581910252571106,
"rewards/margins": 1.080527424812317,
"rewards/rejected": -1.6624376773834229,
"step": 795
},
{
"epoch": 0.8404464871963231,
"grad_norm": 56.25,
"learning_rate": 1.5001050963416716e-07,
"log_odds_chosen": 1.7499481439590454,
"log_odds_ratio": -0.3268365263938904,
"logits/chosen": -2.4593491554260254,
"logits/rejected": -2.1416468620300293,
"logps/chosen": -0.5591254234313965,
"logps/rejected": -1.7770287990570068,
"loss": 1.1877,
"nll_loss": 0.8609007596969604,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.5591254234313965,
"rewards/margins": 1.2179033756256104,
"rewards/rejected": -1.7770287990570068,
"step": 800
},
{
"epoch": 0.8456992777413,
"grad_norm": 59.25,
"learning_rate": 1.4047619342732908e-07,
"log_odds_chosen": 1.5950630903244019,
"log_odds_ratio": -0.3615456819534302,
"logits/chosen": -2.5065274238586426,
"logits/rejected": -2.24869441986084,
"logps/chosen": -0.6077946424484253,
"logps/rejected": -1.7057603597640991,
"loss": 1.273,
"nll_loss": 0.911415696144104,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.6077946424484253,
"rewards/margins": 1.0979657173156738,
"rewards/rejected": -1.7057603597640991,
"step": 805
},
{
"epoch": 0.8509520682862771,
"grad_norm": 30.0,
"learning_rate": 1.3123198088792577e-07,
"log_odds_chosen": 1.6475883722305298,
"log_odds_ratio": -0.37195760011672974,
"logits/chosen": -2.4656014442443848,
"logits/rejected": -2.1296868324279785,
"logps/chosen": -0.5928062200546265,
"logps/rejected": -1.764866828918457,
"loss": 1.2998,
"nll_loss": 0.9278379678726196,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.5928062200546265,
"rewards/margins": 1.172060489654541,
"rewards/rejected": -1.764866828918457,
"step": 810
},
{
"epoch": 0.8562048588312541,
"grad_norm": 48.25,
"learning_rate": 1.2228099209237607e-07,
"log_odds_chosen": 1.6707931756973267,
"log_odds_ratio": -0.35219767689704895,
"logits/chosen": -2.416558027267456,
"logits/rejected": -2.1250758171081543,
"logps/chosen": -0.577375054359436,
"logps/rejected": -1.7357622385025024,
"loss": 1.3212,
"nll_loss": 0.969050407409668,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.577375054359436,
"rewards/margins": 1.1583871841430664,
"rewards/rejected": -1.7357622385025024,
"step": 815
},
{
"epoch": 0.8614576493762311,
"grad_norm": 48.5,
"learning_rate": 1.1362624814917842e-07,
"log_odds_chosen": 1.469254732131958,
"log_odds_ratio": -0.3808806836605072,
"logits/chosen": -2.428011178970337,
"logits/rejected": -2.1460485458374023,
"logps/chosen": -0.549521803855896,
"logps/rejected": -1.5279179811477661,
"loss": 1.2772,
"nll_loss": 0.8962807655334473,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.549521803855896,
"rewards/margins": 0.9783961176872253,
"rewards/rejected": -1.5279179811477661,
"step": 820
},
{
"epoch": 0.8667104399212081,
"grad_norm": 34.5,
"learning_rate": 1.0527067017923652e-07,
"log_odds_chosen": 1.5520200729370117,
"log_odds_ratio": -0.3591814637184143,
"logits/chosen": -2.5619750022888184,
"logits/rejected": -2.3039004802703857,
"logps/chosen": -0.5574966073036194,
"logps/rejected": -1.6048591136932373,
"loss": 1.2779,
"nll_loss": 0.918703556060791,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.5574966073036194,
"rewards/margins": 1.0473625659942627,
"rewards/rejected": -1.6048591136932373,
"step": 825
},
{
"epoch": 0.8719632304661852,
"grad_norm": 39.0,
"learning_rate": 9.721707832993231e-08,
"log_odds_chosen": 1.7053543329238892,
"log_odds_ratio": -0.33547329902648926,
"logits/chosen": -2.483564615249634,
"logits/rejected": -2.2165513038635254,
"logps/chosen": -0.5104734301567078,
"logps/rejected": -1.664214849472046,
"loss": 1.1955,
"nll_loss": 0.8600661158561707,
"rewards/accuracies": 0.8531249761581421,
"rewards/chosen": -0.5104734301567078,
"rewards/margins": 1.1537415981292725,
"rewards/rejected": -1.664214849472046,
"step": 830
},
{
"epoch": 0.8772160210111621,
"grad_norm": 34.25,
"learning_rate": 8.946819082327828e-08,
"log_odds_chosen": 1.5886516571044922,
"log_odds_ratio": -0.3529045283794403,
"logits/chosen": -2.3829362392425537,
"logits/rejected": -2.1005430221557617,
"logps/chosen": -0.5660222172737122,
"logps/rejected": -1.645013451576233,
"loss": 1.2596,
"nll_loss": 0.9066807627677917,
"rewards/accuracies": 0.8531249761581421,
"rewards/chosen": -0.5660222172737122,
"rewards/margins": 1.078991174697876,
"rewards/rejected": -1.645013451576233,
"step": 835
},
{
"epoch": 0.8824688115561392,
"grad_norm": 36.75,
"learning_rate": 8.202662303847297e-08,
"log_odds_chosen": 1.7980045080184937,
"log_odds_ratio": -0.3362274765968323,
"logits/chosen": -2.490861654281616,
"logits/rejected": -2.1576590538024902,
"logps/chosen": -0.5558806657791138,
"logps/rejected": -1.7892097234725952,
"loss": 1.2791,
"nll_loss": 0.9428805112838745,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.5558806657791138,
"rewards/margins": 1.2333290576934814,
"rewards/rejected": -1.7892097234725952,
"step": 840
},
{
"epoch": 0.8877216021011162,
"grad_norm": 55.5,
"learning_rate": 7.48948866291661e-08,
"log_odds_chosen": 1.7913442850112915,
"log_odds_ratio": -0.32501915097236633,
"logits/chosen": -2.5119128227233887,
"logits/rejected": -2.193650960922241,
"logps/chosen": -0.5597657561302185,
"logps/rejected": -1.8090870380401611,
"loss": 1.234,
"nll_loss": 0.9089807271957397,
"rewards/accuracies": 0.890625,
"rewards/chosen": -0.5597657561302185,
"rewards/margins": 1.2493212223052979,
"rewards/rejected": -1.8090870380401611,
"step": 845
},
{
"epoch": 0.8929743926460932,
"grad_norm": 39.5,
"learning_rate": 6.80753886757336e-08,
"log_odds_chosen": 1.5741755962371826,
"log_odds_ratio": -0.34667596220970154,
"logits/chosen": -2.4587669372558594,
"logits/rejected": -2.187401056289673,
"logps/chosen": -0.5418094396591187,
"logps/rejected": -1.594808578491211,
"loss": 1.2259,
"nll_loss": 0.8791839480400085,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": -0.5418094396591187,
"rewards/margins": 1.0529991388320923,
"rewards/rejected": -1.594808578491211,
"step": 850
},
{
"epoch": 0.8982271831910703,
"grad_norm": 25.625,
"learning_rate": 6.157043087284797e-08,
"log_odds_chosen": 1.708722710609436,
"log_odds_ratio": -0.34805282950401306,
"logits/chosen": -2.472571849822998,
"logits/rejected": -2.1671009063720703,
"logps/chosen": -0.5452659130096436,
"logps/rejected": -1.7160043716430664,
"loss": 1.2583,
"nll_loss": 0.9102743268013,
"rewards/accuracies": 0.8343750238418579,
"rewards/chosen": -0.5452659130096436,
"rewards/margins": 1.1707384586334229,
"rewards/rejected": -1.7160043716430664,
"step": 855
},
{
"epoch": 0.9034799737360473,
"grad_norm": 30.75,
"learning_rate": 5.538220875261734e-08,
"log_odds_chosen": 1.7142833471298218,
"log_odds_ratio": -0.31549376249313354,
"logits/chosen": -2.5251572132110596,
"logits/rejected": -2.228562593460083,
"logps/chosen": -0.5315389633178711,
"logps/rejected": -1.699853539466858,
"loss": 1.2153,
"nll_loss": 0.8998427391052246,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.5315389633178711,
"rewards/margins": 1.1683146953582764,
"rewards/rejected": -1.699853539466858,
"step": 860
},
{
"epoch": 0.9087327642810243,
"grad_norm": 50.75,
"learning_rate": 4.9512810943557083e-08,
"log_odds_chosen": 1.7466316223144531,
"log_odds_ratio": -0.3088250756263733,
"logits/chosen": -2.492593288421631,
"logits/rejected": -2.1745035648345947,
"logps/chosen": -0.5664678812026978,
"logps/rejected": -1.7472212314605713,
"loss": 1.2514,
"nll_loss": 0.9425439834594727,
"rewards/accuracies": 0.8968750238418579,
"rewards/chosen": -0.5664678812026978,
"rewards/margins": 1.1807533502578735,
"rewards/rejected": -1.7472212314605713,
"step": 865
},
{
"epoch": 0.9139855548260013,
"grad_norm": 42.25,
"learning_rate": 4.396421846564235e-08,
"log_odds_chosen": 1.420175313949585,
"log_odds_ratio": -0.39961543679237366,
"logits/chosen": -2.5364463329315186,
"logits/rejected": -2.272904634475708,
"logps/chosen": -0.5728206038475037,
"logps/rejected": -1.547858715057373,
"loss": 1.3665,
"nll_loss": 0.9668663144111633,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.5728206038475037,
"rewards/margins": 0.9750380516052246,
"rewards/rejected": -1.547858715057373,
"step": 870
},
{
"epoch": 0.9192383453709784,
"grad_norm": 88.0,
"learning_rate": 3.87383040616811e-08,
"log_odds_chosen": 1.8361127376556396,
"log_odds_ratio": -0.3314815163612366,
"logits/chosen": -2.5305237770080566,
"logits/rejected": -2.205706834793091,
"logps/chosen": -0.5290949940681458,
"logps/rejected": -1.7841472625732422,
"loss": 1.2038,
"nll_loss": 0.8723037838935852,
"rewards/accuracies": 0.871874988079071,
"rewards/chosen": -0.5290949940681458,
"rewards/margins": 1.2550525665283203,
"rewards/rejected": -1.7841472625732422,
"step": 875
},
{
"epoch": 0.9244911359159553,
"grad_norm": 59.25,
"learning_rate": 3.383683156523187e-08,
"log_odds_chosen": 1.5235865116119385,
"log_odds_ratio": -0.3648485541343689,
"logits/chosen": -2.4326975345611572,
"logits/rejected": -2.0849132537841797,
"logps/chosen": -0.5309010744094849,
"logps/rejected": -1.563246726989746,
"loss": 1.2608,
"nll_loss": 0.8959411382675171,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.5309010744094849,
"rewards/margins": 1.0323456525802612,
"rewards/rejected": -1.563246726989746,
"step": 880
},
{
"epoch": 0.9297439264609324,
"grad_norm": 33.25,
"learning_rate": 2.9261455305280014e-08,
"log_odds_chosen": 1.715073585510254,
"log_odds_ratio": -0.3189467787742615,
"logits/chosen": -2.4626471996307373,
"logits/rejected": -2.1226587295532227,
"logps/chosen": -0.5422563552856445,
"logps/rejected": -1.709451675415039,
"loss": 1.2541,
"nll_loss": 0.9351384043693542,
"rewards/accuracies": 0.8843749761581421,
"rewards/chosen": -0.5422563552856445,
"rewards/margins": 1.1671955585479736,
"rewards/rejected": -1.709451675415039,
"step": 885
},
{
"epoch": 0.9349967170059094,
"grad_norm": 60.0,
"learning_rate": 2.5013719547874788e-08,
"log_odds_chosen": 1.6406991481781006,
"log_odds_ratio": -0.37183278799057007,
"logits/chosen": -2.503505229949951,
"logits/rejected": -2.177072525024414,
"logps/chosen": -0.5782598257064819,
"logps/rejected": -1.7351022958755493,
"loss": 1.2879,
"nll_loss": 0.9161151051521301,
"rewards/accuracies": 0.8531249761581421,
"rewards/chosen": -0.5782598257064819,
"rewards/margins": 1.1568424701690674,
"rewards/rejected": -1.7351022958755493,
"step": 890
},
{
"epoch": 0.9402495075508864,
"grad_norm": 30.5,
"learning_rate": 2.1095057974913177e-08,
"log_odds_chosen": 1.5425198078155518,
"log_odds_ratio": -0.3476109504699707,
"logits/chosen": -2.463806390762329,
"logits/rejected": -2.2360615730285645,
"logps/chosen": -0.5494548082351685,
"logps/rejected": -1.5607731342315674,
"loss": 1.2287,
"nll_loss": 0.8811271786689758,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.5494548082351685,
"rewards/margins": 1.011318325996399,
"rewards/rejected": -1.5607731342315674,
"step": 895
},
{
"epoch": 0.9455022980958634,
"grad_norm": 40.25,
"learning_rate": 1.7506793200248504e-08,
"log_odds_chosen": 1.79372239112854,
"log_odds_ratio": -0.34891271591186523,
"logits/chosen": -2.4137704372406006,
"logits/rejected": -2.1525025367736816,
"logps/chosen": -0.5806652307510376,
"logps/rejected": -1.8389291763305664,
"loss": 1.2788,
"nll_loss": 0.9298731684684753,
"rewards/accuracies": 0.8531249761581421,
"rewards/chosen": -0.5806652307510376,
"rewards/margins": 1.2582640647888184,
"rewards/rejected": -1.8389291763305664,
"step": 900
},
{
"epoch": 0.9507550886408405,
"grad_norm": 33.75,
"learning_rate": 1.4250136323285866e-08,
"log_odds_chosen": 1.7694854736328125,
"log_odds_ratio": -0.339056134223938,
"logits/chosen": -2.458627223968506,
"logits/rejected": -2.133309841156006,
"logps/chosen": -0.5246182680130005,
"logps/rejected": -1.748004674911499,
"loss": 1.2399,
"nll_loss": 0.9008275866508484,
"rewards/accuracies": 0.840624988079071,
"rewards/chosen": -0.5246182680130005,
"rewards/margins": 1.2233861684799194,
"rewards/rejected": -1.748004674911499,
"step": 905
},
{
"epoch": 0.9560078791858174,
"grad_norm": 42.25,
"learning_rate": 1.1326186520215885e-08,
"log_odds_chosen": 1.4994810819625854,
"log_odds_ratio": -0.3889666199684143,
"logits/chosen": -2.42987322807312,
"logits/rejected": -2.2474777698516846,
"logps/chosen": -0.5686417818069458,
"logps/rejected": -1.6017091274261475,
"loss": 1.3525,
"nll_loss": 0.9635759592056274,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.5686417818069458,
"rewards/margins": 1.0330675840377808,
"rewards/rejected": -1.6017091274261475,
"step": 910
},
{
"epoch": 0.9612606697307945,
"grad_norm": 77.0,
"learning_rate": 8.735930673024805e-09,
"log_odds_chosen": 1.6517369747161865,
"log_odds_ratio": -0.34624212980270386,
"logits/chosen": -2.3800384998321533,
"logits/rejected": -2.0897443294525146,
"logps/chosen": -0.5255088806152344,
"logps/rejected": -1.6485977172851562,
"loss": 1.3009,
"nll_loss": 0.9546435475349426,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -0.5255088806152344,
"rewards/margins": 1.1230888366699219,
"rewards/rejected": -1.6485977172851562,
"step": 915
},
{
"epoch": 0.9665134602757715,
"grad_norm": 30.875,
"learning_rate": 6.480243036404598e-09,
"log_odds_chosen": 1.8001991510391235,
"log_odds_ratio": -0.3332251012325287,
"logits/chosen": -2.499809980392456,
"logits/rejected": -2.291926860809326,
"logps/chosen": -0.5624955892562866,
"logps/rejected": -1.824375867843628,
"loss": 1.2743,
"nll_loss": 0.941113293170929,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.5624955892562866,
"rewards/margins": 1.2618802785873413,
"rewards/rejected": -1.824375867843628,
"step": 920
},
{
"epoch": 0.9717662508207485,
"grad_norm": 44.75,
"learning_rate": 4.559884942677783e-09,
"log_odds_chosen": 1.4665955305099487,
"log_odds_ratio": -0.38308554887771606,
"logits/chosen": -2.397916078567505,
"logits/rejected": -2.142017126083374,
"logps/chosen": -0.5283843874931335,
"logps/rejected": -1.4880872964859009,
"loss": 1.2288,
"nll_loss": 0.8457143902778625,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.5283843874931335,
"rewards/margins": 0.9597029685974121,
"rewards/rejected": -1.4880872964859009,
"step": 925
},
{
"epoch": 0.9770190413657256,
"grad_norm": 47.75,
"learning_rate": 2.9755045448351944e-09,
"log_odds_chosen": 1.4579670429229736,
"log_odds_ratio": -0.3762872815132141,
"logits/chosen": -2.4584195613861084,
"logits/rejected": -2.198525905609131,
"logps/chosen": -0.5691961050033569,
"logps/rejected": -1.5534415245056152,
"loss": 1.2789,
"nll_loss": 0.9026187062263489,
"rewards/accuracies": 0.8187500238418579,
"rewards/chosen": -0.5691961050033569,
"rewards/margins": 0.9842453002929688,
"rewards/rejected": -1.5534415245056152,
"step": 930
},
{
"epoch": 0.9822718319107026,
"grad_norm": 49.0,
"learning_rate": 1.7276365977730856e-09,
"log_odds_chosen": 1.5441417694091797,
"log_odds_ratio": -0.3624028265476227,
"logits/chosen": -2.535742998123169,
"logits/rejected": -2.1748859882354736,
"logps/chosen": -0.5510035753250122,
"logps/rejected": -1.6074680089950562,
"loss": 1.2792,
"nll_loss": 0.9167704582214355,
"rewards/accuracies": 0.8531249761581421,
"rewards/chosen": -0.5510035753250122,
"rewards/margins": 1.056464433670044,
"rewards/rejected": -1.6074680089950562,
"step": 935
},
{
"epoch": 0.9875246224556796,
"grad_norm": 50.0,
"learning_rate": 8.16702277804504e-10,
"log_odds_chosen": 1.6150617599487305,
"log_odds_ratio": -0.3433099687099457,
"logits/chosen": -2.4907350540161133,
"logits/rejected": -2.166508674621582,
"logps/chosen": -0.5353943109512329,
"logps/rejected": -1.6448442935943604,
"loss": 1.2318,
"nll_loss": 0.888446033000946,
"rewards/accuracies": 0.8656250238418579,
"rewards/chosen": -0.5353943109512329,
"rewards/margins": 1.1094499826431274,
"rewards/rejected": -1.6448442935943604,
"step": 940
},
{
"epoch": 0.9927774130006566,
"grad_norm": 44.75,
"learning_rate": 2.430090405054486e-10,
"log_odds_chosen": 1.457880973815918,
"log_odds_ratio": -0.36118173599243164,
"logits/chosen": -2.4720263481140137,
"logits/rejected": -2.178345203399658,
"logps/chosen": -0.5418224334716797,
"logps/rejected": -1.4925849437713623,
"loss": 1.309,
"nll_loss": 0.9478532671928406,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.5418224334716797,
"rewards/margins": 0.9507624506950378,
"rewards/rejected": -1.4925849437713623,
"step": 945
},
{
"epoch": 0.9980302035456337,
"grad_norm": 33.0,
"learning_rate": 6.750516943321294e-12,
"log_odds_chosen": 1.7491207122802734,
"log_odds_ratio": -0.319837361574173,
"logits/chosen": -2.4439542293548584,
"logits/rejected": -2.1569535732269287,
"logps/chosen": -0.5160128474235535,
"logps/rejected": -1.7111313343048096,
"loss": 1.2106,
"nll_loss": 0.8908060193061829,
"rewards/accuracies": 0.871874988079071,
"rewards/chosen": -0.5160128474235535,
"rewards/margins": 1.1951183080673218,
"rewards/rejected": -1.7111313343048096,
"step": 950
},
{
"epoch": 0.999080761654629,
"step": 951,
"total_flos": 0.0,
"train_loss": 1.3879666121600178,
"train_runtime": 22584.718,
"train_samples_per_second": 2.697,
"train_steps_per_second": 0.042
}
],
"logging_steps": 5,
"max_steps": 951,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}