uf-mistral-it-orpo-iopo-iter1 / trainer_state.json
nlee-208's picture
Model save
ac5cf27 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997038791827065,
"eval_steps": 500,
"global_step": 1688,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005922416345869114,
"grad_norm": 26.875,
"learning_rate": 2.9585798816568044e-08,
"log_odds_chosen": -0.4994420111179352,
"log_odds_ratio": -1.0620524883270264,
"logits/chosen": -2.227687358856201,
"logits/rejected": -2.213762044906616,
"logps/chosen": -0.7160366773605347,
"logps/rejected": -0.47193747758865356,
"loss": 1.3693,
"nll_loss": 1.2856990098953247,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.0716036707162857,
"rewards/margins": -0.02440992370247841,
"rewards/rejected": -0.047193750739097595,
"step": 10
},
{
"epoch": 0.011844832691738229,
"grad_norm": 26.5,
"learning_rate": 5.917159763313609e-08,
"log_odds_chosen": -0.6077697277069092,
"log_odds_ratio": -1.154677152633667,
"logits/chosen": -2.1866495609283447,
"logits/rejected": -2.1631338596343994,
"logps/chosen": -0.8245598077774048,
"logps/rejected": -0.4715619683265686,
"loss": 1.3378,
"nll_loss": 1.228305459022522,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.08245597779750824,
"rewards/margins": -0.03529978543519974,
"rewards/rejected": -0.0471561960875988,
"step": 20
},
{
"epoch": 0.017767249037607343,
"grad_norm": 29.125,
"learning_rate": 8.875739644970414e-08,
"log_odds_chosen": -0.5950562357902527,
"log_odds_ratio": -1.171638011932373,
"logits/chosen": -2.152902126312256,
"logits/rejected": -2.1443581581115723,
"logps/chosen": -0.854525089263916,
"logps/rejected": -0.49298763275146484,
"loss": 1.3488,
"nll_loss": 1.3134263753890991,
"rewards/accuracies": 0.3062500059604645,
"rewards/chosen": -0.08545249700546265,
"rewards/margins": -0.03615374490618706,
"rewards/rejected": -0.049298763275146484,
"step": 30
},
{
"epoch": 0.023689665383476458,
"grad_norm": 31.25,
"learning_rate": 1.1834319526627217e-07,
"log_odds_chosen": -0.5344940423965454,
"log_odds_ratio": -1.0923480987548828,
"logits/chosen": -2.219038486480713,
"logits/rejected": -2.2063724994659424,
"logps/chosen": -0.7574710845947266,
"logps/rejected": -0.4638025760650635,
"loss": 1.3817,
"nll_loss": 1.2359822988510132,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.07574710994958878,
"rewards/margins": -0.02936685085296631,
"rewards/rejected": -0.04638025909662247,
"step": 40
},
{
"epoch": 0.029612081729345572,
"grad_norm": 28.0,
"learning_rate": 1.4792899408284022e-07,
"log_odds_chosen": -0.4542032778263092,
"log_odds_ratio": -1.0256363153457642,
"logits/chosen": -2.1617987155914307,
"logits/rejected": -2.146223545074463,
"logps/chosen": -0.7006078958511353,
"logps/rejected": -0.47175368666648865,
"loss": 1.3127,
"nll_loss": 1.2409818172454834,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.07006079703569412,
"rewards/margins": -0.022885426878929138,
"rewards/rejected": -0.047175366431474686,
"step": 50
},
{
"epoch": 0.035534498075214686,
"grad_norm": 23.125,
"learning_rate": 1.7751479289940827e-07,
"log_odds_chosen": -0.6588231921195984,
"log_odds_ratio": -1.230991005897522,
"logits/chosen": -2.2183756828308105,
"logits/rejected": -2.187129259109497,
"logps/chosen": -0.8897625207901001,
"logps/rejected": -0.4612082540988922,
"loss": 1.3568,
"nll_loss": 1.2310936450958252,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.08897626399993896,
"rewards/margins": -0.04285542666912079,
"rewards/rejected": -0.04612082242965698,
"step": 60
},
{
"epoch": 0.041456914421083804,
"grad_norm": 30.75,
"learning_rate": 2.0710059171597633e-07,
"log_odds_chosen": -0.5367478132247925,
"log_odds_ratio": -1.1100060939788818,
"logits/chosen": -2.232348918914795,
"logits/rejected": -2.1998302936553955,
"logps/chosen": -0.7972711324691772,
"logps/rejected": -0.4634431302547455,
"loss": 1.3614,
"nll_loss": 1.2567493915557861,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.07972709834575653,
"rewards/margins": -0.03338279575109482,
"rewards/rejected": -0.04634431377053261,
"step": 70
},
{
"epoch": 0.047379330766952915,
"grad_norm": 28.875,
"learning_rate": 2.3668639053254435e-07,
"log_odds_chosen": -0.5755403637886047,
"log_odds_ratio": -1.156178593635559,
"logits/chosen": -2.197105884552002,
"logits/rejected": -2.186234474182129,
"logps/chosen": -0.7956789135932922,
"logps/rejected": -0.4599471688270569,
"loss": 1.3327,
"nll_loss": 1.219543695449829,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.07956788688898087,
"rewards/margins": -0.033573172986507416,
"rewards/rejected": -0.04599471390247345,
"step": 80
},
{
"epoch": 0.05330174711282203,
"grad_norm": 25.25,
"learning_rate": 2.662721893491124e-07,
"log_odds_chosen": -0.5014861226081848,
"log_odds_ratio": -1.0729024410247803,
"logits/chosen": -2.1807546615600586,
"logits/rejected": -2.1571853160858154,
"logps/chosen": -0.7198914289474487,
"logps/rejected": -0.46573418378829956,
"loss": 1.3113,
"nll_loss": 1.224487066268921,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.07198914140462875,
"rewards/margins": -0.025415724143385887,
"rewards/rejected": -0.046573419123888016,
"step": 90
},
{
"epoch": 0.059224163458691144,
"grad_norm": 25.125,
"learning_rate": 2.9585798816568045e-07,
"log_odds_chosen": -0.4174951910972595,
"log_odds_ratio": -0.9966305494308472,
"logits/chosen": -2.2450003623962402,
"logits/rejected": -2.199430465698242,
"logps/chosen": -0.6903594732284546,
"logps/rejected": -0.4920008182525635,
"loss": 1.2864,
"nll_loss": 1.2207610607147217,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.06903595477342606,
"rewards/margins": -0.01983586512506008,
"rewards/rejected": -0.04920008033514023,
"step": 100
},
{
"epoch": 0.06514657980456026,
"grad_norm": 19.375,
"learning_rate": 3.254437869822485e-07,
"log_odds_chosen": -0.4817837178707123,
"log_odds_ratio": -1.0484408140182495,
"logits/chosen": -2.195328950881958,
"logits/rejected": -2.172029972076416,
"logps/chosen": -0.7407166361808777,
"logps/rejected": -0.4809334874153137,
"loss": 1.2292,
"nll_loss": 1.1185578107833862,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.07407166808843613,
"rewards/margins": -0.025978317484259605,
"rewards/rejected": -0.04809335619211197,
"step": 110
},
{
"epoch": 0.07106899615042937,
"grad_norm": 26.25,
"learning_rate": 3.5502958579881655e-07,
"log_odds_chosen": -0.5086492300033569,
"log_odds_ratio": -1.073943018913269,
"logits/chosen": -2.2213022708892822,
"logits/rejected": -2.210648536682129,
"logps/chosen": -0.7544690370559692,
"logps/rejected": -0.47401171922683716,
"loss": 1.2703,
"nll_loss": 1.1549344062805176,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.07544689625501633,
"rewards/margins": -0.02804572507739067,
"rewards/rejected": -0.04740116745233536,
"step": 120
},
{
"epoch": 0.07699141249629848,
"grad_norm": 121.5,
"learning_rate": 3.8461538461538463e-07,
"log_odds_chosen": -0.6346783638000488,
"log_odds_ratio": -1.208389401435852,
"logits/chosen": -2.205939292907715,
"logits/rejected": -2.1982388496398926,
"logps/chosen": -0.8659466505050659,
"logps/rejected": -0.4501543939113617,
"loss": 1.3049,
"nll_loss": 1.1621254682540894,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.08659467846155167,
"rewards/margins": -0.0415792390704155,
"rewards/rejected": -0.04501544311642647,
"step": 130
},
{
"epoch": 0.08291382884216761,
"grad_norm": 18.625,
"learning_rate": 4.1420118343195265e-07,
"log_odds_chosen": -0.4532869756221771,
"log_odds_ratio": -1.0120022296905518,
"logits/chosen": -2.2359938621520996,
"logits/rejected": -2.2115871906280518,
"logps/chosen": -0.6659095287322998,
"logps/rejected": -0.4467584490776062,
"loss": 1.2027,
"nll_loss": 1.0797432661056519,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.06659095734357834,
"rewards/margins": -0.02191510982811451,
"rewards/rejected": -0.04467584565281868,
"step": 140
},
{
"epoch": 0.08883624518803672,
"grad_norm": 29.75,
"learning_rate": 4.437869822485207e-07,
"log_odds_chosen": -0.46737051010131836,
"log_odds_ratio": -1.0146253108978271,
"logits/chosen": -2.16318941116333,
"logits/rejected": -2.1556496620178223,
"logps/chosen": -0.7067540287971497,
"logps/rejected": -0.47525158524513245,
"loss": 1.217,
"nll_loss": 1.1824976205825806,
"rewards/accuracies": 0.29374998807907104,
"rewards/chosen": -0.07067539542913437,
"rewards/margins": -0.023150241002440453,
"rewards/rejected": -0.047525160014629364,
"step": 150
},
{
"epoch": 0.09475866153390583,
"grad_norm": 20.75,
"learning_rate": 4.733727810650887e-07,
"log_odds_chosen": -0.31778836250305176,
"log_odds_ratio": -0.9325827360153198,
"logits/chosen": -2.2458879947662354,
"logits/rejected": -2.2277491092681885,
"logps/chosen": -0.6050869822502136,
"logps/rejected": -0.4580734372138977,
"loss": 1.2157,
"nll_loss": 1.0979220867156982,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.06050870940089226,
"rewards/margins": -0.014701364561915398,
"rewards/rejected": -0.04580734297633171,
"step": 160
},
{
"epoch": 0.10068107787977496,
"grad_norm": 14.3125,
"learning_rate": 4.999994653198566e-07,
"log_odds_chosen": -0.44623684883117676,
"log_odds_ratio": -1.0507714748382568,
"logits/chosen": -2.273740530014038,
"logits/rejected": -2.248004198074341,
"logps/chosen": -0.744641900062561,
"logps/rejected": -0.4939740300178528,
"loss": 1.2442,
"nll_loss": 1.0892422199249268,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.07446418702602386,
"rewards/margins": -0.0250667966902256,
"rewards/rejected": -0.04939739406108856,
"step": 170
},
{
"epoch": 0.10660349422564407,
"grad_norm": 12.5625,
"learning_rate": 4.999353064699471e-07,
"log_odds_chosen": -0.5144436955451965,
"log_odds_ratio": -1.1169707775115967,
"logits/chosen": -2.2361178398132324,
"logits/rejected": -2.2026758193969727,
"logps/chosen": -0.8099610209465027,
"logps/rejected": -0.49819788336753845,
"loss": 1.1022,
"nll_loss": 1.0261476039886475,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.08099609613418579,
"rewards/margins": -0.031176313757896423,
"rewards/rejected": -0.049819789826869965,
"step": 180
},
{
"epoch": 0.11252591057151318,
"grad_norm": 12.875,
"learning_rate": 4.99764243036258e-07,
"log_odds_chosen": -0.4125841557979584,
"log_odds_ratio": -0.991108775138855,
"logits/chosen": -2.268022298812866,
"logits/rejected": -2.240299701690674,
"logps/chosen": -0.6463659405708313,
"logps/rejected": -0.4517286717891693,
"loss": 1.1318,
"nll_loss": 1.0371661186218262,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.06463660299777985,
"rewards/margins": -0.01946372725069523,
"rewards/rejected": -0.04517286270856857,
"step": 190
},
{
"epoch": 0.11844832691738229,
"grad_norm": 12.0,
"learning_rate": 4.994863481875841e-07,
"log_odds_chosen": -0.38528627157211304,
"log_odds_ratio": -0.9595619440078735,
"logits/chosen": -2.217349052429199,
"logits/rejected": -2.1852166652679443,
"logps/chosen": -0.6334083676338196,
"logps/rejected": -0.4435149133205414,
"loss": 1.1246,
"nll_loss": 0.9835959672927856,
"rewards/accuracies": 0.30000001192092896,
"rewards/chosen": -0.06334083527326584,
"rewards/margins": -0.01898934319615364,
"rewards/rejected": -0.044351495802402496,
"step": 200
},
{
"epoch": 0.12437074326325141,
"grad_norm": 11.5,
"learning_rate": 4.991017407876165e-07,
"log_odds_chosen": -0.429326593875885,
"log_odds_ratio": -1.002436876296997,
"logits/chosen": -2.224944591522217,
"logits/rejected": -2.1807491779327393,
"logps/chosen": -0.7087312936782837,
"logps/rejected": -0.49742716550827026,
"loss": 1.0953,
"nll_loss": 1.0195242166519165,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.07087312638759613,
"rewards/margins": -0.021130409091711044,
"rewards/rejected": -0.049742721021175385,
"step": 210
},
{
"epoch": 0.13029315960912052,
"grad_norm": 10.6875,
"learning_rate": 4.98610585344102e-07,
"log_odds_chosen": -0.2424849271774292,
"log_odds_ratio": -0.9048135876655579,
"logits/chosen": -2.2507550716400146,
"logits/rejected": -2.217257499694824,
"logps/chosen": -0.6068475246429443,
"logps/rejected": -0.4904823899269104,
"loss": 1.1278,
"nll_loss": 1.0603684186935425,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.06068475916981697,
"rewards/margins": -0.011636516079306602,
"rewards/rejected": -0.04904823377728462,
"step": 220
},
{
"epoch": 0.13621557595498965,
"grad_norm": 9.875,
"learning_rate": 4.980130919384768e-07,
"log_odds_chosen": -0.5562174916267395,
"log_odds_ratio": -1.0973405838012695,
"logits/chosen": -2.246185779571533,
"logits/rejected": -2.2379026412963867,
"logps/chosen": -0.7477759122848511,
"logps/rejected": -0.4505345821380615,
"loss": 1.1333,
"nll_loss": 1.0181388854980469,
"rewards/accuracies": 0.28125,
"rewards/chosen": -0.07477758824825287,
"rewards/margins": -0.029724130406975746,
"rewards/rejected": -0.04505345970392227,
"step": 230
},
{
"epoch": 0.14213799230085875,
"grad_norm": 11.5625,
"learning_rate": 4.973095161360105e-07,
"log_odds_chosen": -0.425253689289093,
"log_odds_ratio": -1.0029823780059814,
"logits/chosen": -2.242088794708252,
"logits/rejected": -2.2122817039489746,
"logps/chosen": -0.68077552318573,
"logps/rejected": -0.48119717836380005,
"loss": 1.1443,
"nll_loss": 1.063909649848938,
"rewards/accuracies": 0.29374998807907104,
"rewards/chosen": -0.06807754933834076,
"rewards/margins": -0.01995784044265747,
"rewards/rejected": -0.048119716346263885,
"step": 240
},
{
"epoch": 0.14806040864672787,
"grad_norm": 10.3125,
"learning_rate": 4.965001588764913e-07,
"log_odds_chosen": -0.4351120889186859,
"log_odds_ratio": -1.013584852218628,
"logits/chosen": -2.2702879905700684,
"logits/rejected": -2.2400031089782715,
"logps/chosen": -0.6880632638931274,
"logps/rejected": -0.4528827667236328,
"loss": 1.1299,
"nll_loss": 1.0191699266433716,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.06880633533000946,
"rewards/margins": -0.023518051952123642,
"rewards/rejected": -0.04528827592730522,
"step": 250
},
{
"epoch": 0.15398282499259697,
"grad_norm": 11.1875,
"learning_rate": 4.955853663455072e-07,
"log_odds_chosen": -0.30220693349838257,
"log_odds_ratio": -0.9368545413017273,
"logits/chosen": -2.257448673248291,
"logits/rejected": -2.227647542953491,
"logps/chosen": -0.6458665728569031,
"logps/rejected": -0.4764745235443115,
"loss": 1.0645,
"nll_loss": 0.9644678235054016,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.06458665430545807,
"rewards/margins": -0.016939211636781693,
"rewards/rejected": -0.04764745384454727,
"step": 260
},
{
"epoch": 0.1599052413384661,
"grad_norm": 10.875,
"learning_rate": 4.945655298263713e-07,
"log_odds_chosen": -0.41390785574913025,
"log_odds_ratio": -0.9837135076522827,
"logits/chosen": -2.20629620552063,
"logits/rejected": -2.1831986904144287,
"logps/chosen": -0.6674059629440308,
"logps/rejected": -0.46569353342056274,
"loss": 1.1528,
"nll_loss": 1.0888841152191162,
"rewards/accuracies": 0.3062500059604645,
"rewards/chosen": -0.06674060225486755,
"rewards/margins": -0.02017124928534031,
"rewards/rejected": -0.046569354832172394,
"step": 270
},
{
"epoch": 0.16582765768433522,
"grad_norm": 9.5625,
"learning_rate": 4.934410855327585e-07,
"log_odds_chosen": -0.3461267352104187,
"log_odds_ratio": -0.9425566792488098,
"logits/chosen": -2.2884914875030518,
"logits/rejected": -2.27152943611145,
"logps/chosen": -0.6492639780044556,
"logps/rejected": -0.46900925040245056,
"loss": 1.0682,
"nll_loss": 1.0291364192962646,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.0649264007806778,
"rewards/margins": -0.018025478348135948,
"rewards/rejected": -0.0469009205698967,
"step": 280
},
{
"epoch": 0.1717500740302043,
"grad_norm": 11.0,
"learning_rate": 4.922125144221252e-07,
"log_odds_chosen": -0.38331133127212524,
"log_odds_ratio": -0.9734469652175903,
"logits/chosen": -2.2513084411621094,
"logits/rejected": -2.199239492416382,
"logps/chosen": -0.6518736481666565,
"logps/rejected": -0.4689255356788635,
"loss": 1.1269,
"nll_loss": 1.0506547689437866,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.06518735736608505,
"rewards/margins": -0.018294811248779297,
"rewards/rejected": -0.04689255356788635,
"step": 290
},
{
"epoch": 0.17767249037607344,
"grad_norm": 10.625,
"learning_rate": 4.90880341989989e-07,
"log_odds_chosen": -0.295235276222229,
"log_odds_ratio": -0.9132793545722961,
"logits/chosen": -2.255086660385132,
"logits/rejected": -2.2318952083587646,
"logps/chosen": -0.6402678489685059,
"logps/rejected": -0.48136910796165466,
"loss": 1.0909,
"nll_loss": 1.0022283792495728,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.06402678042650223,
"rewards/margins": -0.015889868140220642,
"rewards/rejected": -0.048136912286281586,
"step": 300
},
{
"epoch": 0.18359490672194256,
"grad_norm": 9.875,
"learning_rate": 4.894451380451589e-07,
"log_odds_chosen": -0.4930775761604309,
"log_odds_ratio": -1.0459508895874023,
"logits/chosen": -2.2340633869171143,
"logits/rejected": -2.2229130268096924,
"logps/chosen": -0.7189785242080688,
"logps/rejected": -0.46092820167541504,
"loss": 1.116,
"nll_loss": 1.0077855587005615,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.07189784944057465,
"rewards/margins": -0.02580503560602665,
"rewards/rejected": -0.046092819422483444,
"step": 310
},
{
"epoch": 0.18951732306781166,
"grad_norm": 11.5625,
"learning_rate": 4.879075164660124e-07,
"log_odds_chosen": -0.29097312688827515,
"log_odds_ratio": -0.9061079025268555,
"logits/chosen": -2.238163471221924,
"logits/rejected": -2.2072105407714844,
"logps/chosen": -0.6175664067268372,
"logps/rejected": -0.47239384055137634,
"loss": 1.0495,
"nll_loss": 0.9289931058883667,
"rewards/accuracies": 0.33125001192092896,
"rewards/chosen": -0.06175662949681282,
"rewards/margins": -0.014517253264784813,
"rewards/rejected": -0.047239381819963455,
"step": 320
},
{
"epoch": 0.19543973941368079,
"grad_norm": 12.25,
"learning_rate": 4.862681349379212e-07,
"log_odds_chosen": -0.33382827043533325,
"log_odds_ratio": -0.939583420753479,
"logits/chosen": -2.244995594024658,
"logits/rejected": -2.1931443214416504,
"logps/chosen": -0.6333972811698914,
"logps/rejected": -0.4775928556919098,
"loss": 1.1124,
"nll_loss": 1.0409491062164307,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.0633397176861763,
"rewards/margins": -0.01558043621480465,
"rewards/rejected": -0.0477592833340168,
"step": 330
},
{
"epoch": 0.2013621557595499,
"grad_norm": 9.6875,
"learning_rate": 4.8452769467194e-07,
"log_odds_chosen": -0.3502793610095978,
"log_odds_ratio": -0.9458521604537964,
"logits/chosen": -2.2533793449401855,
"logits/rejected": -2.231985092163086,
"logps/chosen": -0.6348416209220886,
"logps/rejected": -0.46244215965270996,
"loss": 1.0872,
"nll_loss": 0.965823769569397,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.06348416954278946,
"rewards/margins": -0.017239956185221672,
"rewards/rejected": -0.04624421149492264,
"step": 340
},
{
"epoch": 0.207284572105419,
"grad_norm": 9.625,
"learning_rate": 4.82686940104879e-07,
"log_odds_chosen": -0.37014713883399963,
"log_odds_ratio": -0.9843534231185913,
"logits/chosen": -2.296128511428833,
"logits/rejected": -2.267141103744507,
"logps/chosen": -0.6616524457931519,
"logps/rejected": -0.4461567997932434,
"loss": 1.0383,
"nll_loss": 0.9294153451919556,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.0661652460694313,
"rewards/margins": -0.02154957316815853,
"rewards/rejected": -0.04461567848920822,
"step": 350
},
{
"epoch": 0.21320698845128813,
"grad_norm": 9.6875,
"learning_rate": 4.807466585808856e-07,
"log_odds_chosen": -0.2995724380016327,
"log_odds_ratio": -0.9168221354484558,
"logits/chosen": -2.274096727371216,
"logits/rejected": -2.2658305168151855,
"logps/chosen": -0.5940972566604614,
"logps/rejected": -0.46015462279319763,
"loss": 1.0942,
"nll_loss": 0.9911165237426758,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.0594097301363945,
"rewards/margins": -0.013394266366958618,
"rewards/rejected": -0.04601546376943588,
"step": 360
},
{
"epoch": 0.21912940479715723,
"grad_norm": 13.5625,
"learning_rate": 4.787076800146752e-07,
"log_odds_chosen": -0.27963608503341675,
"log_odds_ratio": -0.9352908134460449,
"logits/chosen": -2.2542636394500732,
"logits/rejected": -2.2058660984039307,
"logps/chosen": -0.6458699107170105,
"logps/rejected": -0.468344509601593,
"loss": 1.0125,
"nll_loss": 0.9038776159286499,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.06458699703216553,
"rewards/margins": -0.017752548679709435,
"rewards/rejected": -0.046834446489810944,
"step": 370
},
{
"epoch": 0.22505182114302635,
"grad_norm": 10.0,
"learning_rate": 4.765708765365526e-07,
"log_odds_chosen": -0.2566812038421631,
"log_odds_ratio": -0.9025079011917114,
"logits/chosen": -2.2573628425598145,
"logits/rejected": -2.2479588985443115,
"logps/chosen": -0.5893818140029907,
"logps/rejected": -0.4597233235836029,
"loss": 1.1093,
"nll_loss": 0.9725319147109985,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.05893818661570549,
"rewards/margins": -0.012965850532054901,
"rewards/rejected": -0.04597233235836029,
"step": 380
},
{
"epoch": 0.23097423748889548,
"grad_norm": 12.25,
"learning_rate": 4.7433716211937587e-07,
"log_odds_chosen": -0.4499928057193756,
"log_odds_ratio": -1.0134861469268799,
"logits/chosen": -2.3190252780914307,
"logits/rejected": -2.297466516494751,
"logps/chosen": -0.655422568321228,
"logps/rejected": -0.43357038497924805,
"loss": 1.0471,
"nll_loss": 1.008756399154663,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.06554224342107773,
"rewards/margins": -0.02218521013855934,
"rewards/rejected": -0.043357037007808685,
"step": 390
},
{
"epoch": 0.23689665383476458,
"grad_norm": 9.3125,
"learning_rate": 4.720074921876245e-07,
"log_odds_chosen": -0.3851686120033264,
"log_odds_ratio": -0.9778718948364258,
"logits/chosen": -2.325918674468994,
"logits/rejected": -2.2813212871551514,
"logps/chosen": -0.6249781847000122,
"logps/rejected": -0.45036381483078003,
"loss": 1.0507,
"nll_loss": 0.9533747434616089,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -0.0624978169798851,
"rewards/margins": -0.01746143028140068,
"rewards/rejected": -0.04503639414906502,
"step": 400
},
{
"epoch": 0.2428190701806337,
"grad_norm": 10.5625,
"learning_rate": 4.6958286320873593e-07,
"log_odds_chosen": -0.38822251558303833,
"log_odds_ratio": -0.9542675018310547,
"logits/chosen": -2.2724270820617676,
"logits/rejected": -2.27009916305542,
"logps/chosen": -0.6122742891311646,
"logps/rejected": -0.4287818372249603,
"loss": 1.0679,
"nll_loss": 1.0051120519638062,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.06122744083404541,
"rewards/margins": -0.018349256366491318,
"rewards/rejected": -0.04287818819284439,
"step": 410
},
{
"epoch": 0.24874148652650283,
"grad_norm": 10.25,
"learning_rate": 4.6706431226688804e-07,
"log_odds_chosen": -0.30081695318222046,
"log_odds_ratio": -0.921572208404541,
"logits/chosen": -2.2560360431671143,
"logits/rejected": -2.2262086868286133,
"logps/chosen": -0.6127408742904663,
"logps/rejected": -0.4595797061920166,
"loss": 1.0784,
"nll_loss": 0.9788911938667297,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.06127409264445305,
"rewards/margins": -0.015316121280193329,
"rewards/rejected": -0.04595796763896942,
"step": 420
},
{
"epoch": 0.25466390287237195,
"grad_norm": 9.375,
"learning_rate": 4.6445291661940777e-07,
"log_odds_chosen": -0.2526037096977234,
"log_odds_ratio": -0.8853398561477661,
"logits/chosen": -2.274932861328125,
"logits/rejected": -2.2737860679626465,
"logps/chosen": -0.5831697583198547,
"logps/rejected": -0.4629867672920227,
"loss": 1.0351,
"nll_loss": 0.9002013206481934,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.05831696838140488,
"rewards/margins": -0.012018295004963875,
"rewards/rejected": -0.04629867523908615,
"step": 430
},
{
"epoch": 0.26058631921824105,
"grad_norm": 13.0,
"learning_rate": 4.6174979323599715e-07,
"log_odds_chosen": -0.4437042772769928,
"log_odds_ratio": -1.0250940322875977,
"logits/chosen": -2.2592310905456543,
"logits/rejected": -2.2114596366882324,
"logps/chosen": -0.7022743821144104,
"logps/rejected": -0.4603559374809265,
"loss": 1.0967,
"nll_loss": 1.0961658954620361,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.07022743672132492,
"rewards/margins": -0.02419184148311615,
"rewards/rejected": -0.04603559896349907,
"step": 440
},
{
"epoch": 0.26650873556411014,
"grad_norm": 9.5,
"learning_rate": 4.5895609832097277e-07,
"log_odds_chosen": -0.3050179183483124,
"log_odds_ratio": -0.9421980977058411,
"logits/chosen": -2.2684884071350098,
"logits/rejected": -2.2559661865234375,
"logps/chosen": -0.6401418447494507,
"logps/rejected": -0.46939319372177124,
"loss": 1.0745,
"nll_loss": 0.96502685546875,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.06401418894529343,
"rewards/margins": -0.01707487180829048,
"rewards/rejected": -0.04693932086229324,
"step": 450
},
{
"epoch": 0.2724311519099793,
"grad_norm": 10.1875,
"learning_rate": 4.560730268187236e-07,
"log_odds_chosen": -0.26763516664505005,
"log_odds_ratio": -0.8960734605789185,
"logits/chosen": -2.266759157180786,
"logits/rejected": -2.230344533920288,
"logps/chosen": -0.57380610704422,
"logps/rejected": -0.45090922713279724,
"loss": 1.0544,
"nll_loss": 0.9469722509384155,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.05738060921430588,
"rewards/margins": -0.012289688922464848,
"rewards/rejected": -0.0450909249484539,
"step": 460
},
{
"epoch": 0.2783535682558484,
"grad_norm": 11.1875,
"learning_rate": 4.531018119025989e-07,
"log_odds_chosen": -0.19471798837184906,
"log_odds_ratio": -0.8877772092819214,
"logits/chosen": -2.325700283050537,
"logits/rejected": -2.3014023303985596,
"logps/chosen": -0.5948117971420288,
"logps/rejected": -0.5260331630706787,
"loss": 1.0872,
"nll_loss": 1.042905569076538,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.059481192380189896,
"rewards/margins": -0.006877871695905924,
"rewards/rejected": -0.05260331556200981,
"step": 470
},
{
"epoch": 0.2842759846017175,
"grad_norm": 10.1875,
"learning_rate": 4.5004372444744376e-07,
"log_odds_chosen": -0.20854365825653076,
"log_odds_ratio": -0.8700854182243347,
"logits/chosen": -2.267329454421997,
"logits/rejected": -2.2475056648254395,
"logps/chosen": -0.610100269317627,
"logps/rejected": -0.49854737520217896,
"loss": 1.0582,
"nll_loss": 0.982585608959198,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.061010025441646576,
"rewards/margins": -0.011155293323099613,
"rewards/rejected": -0.04985473304986954,
"step": 480
},
{
"epoch": 0.2901984009475866,
"grad_norm": 11.125,
"learning_rate": 4.4690007248600967e-07,
"log_odds_chosen": -0.30316418409347534,
"log_odds_ratio": -0.9258543848991394,
"logits/chosen": -2.260499954223633,
"logits/rejected": -2.2460737228393555,
"logps/chosen": -0.6183134913444519,
"logps/rejected": -0.4603392481803894,
"loss": 1.0569,
"nll_loss": 0.9751143455505371,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.06183135509490967,
"rewards/margins": -0.01579742692410946,
"rewards/rejected": -0.04603392630815506,
"step": 490
},
{
"epoch": 0.29612081729345574,
"grad_norm": 9.25,
"learning_rate": 4.436722006494701e-07,
"log_odds_chosen": -0.4622948169708252,
"log_odds_ratio": -1.0724523067474365,
"logits/chosen": -2.2528557777404785,
"logits/rejected": -2.2317535877227783,
"logps/chosen": -0.7585560083389282,
"logps/rejected": -0.4601530134677887,
"loss": 1.0779,
"nll_loss": 1.0056917667388916,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.07585560530424118,
"rewards/margins": -0.02984030917286873,
"rewards/rejected": -0.04601530730724335,
"step": 500
},
{
"epoch": 0.30204323363932484,
"grad_norm": 10.125,
"learning_rate": 4.4036148959228356e-07,
"log_odds_chosen": -0.37729692459106445,
"log_odds_ratio": -0.9907791018486023,
"logits/chosen": -2.285222291946411,
"logits/rejected": -2.2465076446533203,
"logps/chosen": -0.6608995199203491,
"logps/rejected": -0.44408687949180603,
"loss": 1.0854,
"nll_loss": 0.9470478892326355,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.0660899430513382,
"rewards/margins": -0.02168126031756401,
"rewards/rejected": -0.04440869390964508,
"step": 510
},
{
"epoch": 0.30796564998519393,
"grad_norm": 12.9375,
"learning_rate": 4.3696935540164705e-07,
"log_odds_chosen": -0.3114868998527527,
"log_odds_ratio": -0.9284585118293762,
"logits/chosen": -2.2520318031311035,
"logits/rejected": -2.2336666584014893,
"logps/chosen": -0.6092923879623413,
"logps/rejected": -0.4560086727142334,
"loss": 1.0234,
"nll_loss": 0.954501748085022,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.06092924624681473,
"rewards/margins": -0.01532837562263012,
"rewards/rejected": -0.04560086503624916,
"step": 520
},
{
"epoch": 0.3138880663310631,
"grad_norm": 9.1875,
"learning_rate": 4.334972489917947e-07,
"log_odds_chosen": -0.22460684180259705,
"log_odds_ratio": -0.88166743516922,
"logits/chosen": -2.313957691192627,
"logits/rejected": -2.2588186264038086,
"logps/chosen": -0.6013073325157166,
"logps/rejected": -0.47843700647354126,
"loss": 1.0456,
"nll_loss": 0.9358353614807129,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.060130733996629715,
"rewards/margins": -0.012287032790482044,
"rewards/rejected": -0.047843702137470245,
"step": 530
},
{
"epoch": 0.3198104826769322,
"grad_norm": 11.25,
"learning_rate": 4.299466554833997e-07,
"log_odds_chosen": -0.33192509412765503,
"log_odds_ratio": -0.94036465883255,
"logits/chosen": -2.2912707328796387,
"logits/rejected": -2.2435359954833984,
"logps/chosen": -0.5902704000473022,
"logps/rejected": -0.44104498624801636,
"loss": 1.0515,
"nll_loss": 0.9315252304077148,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -0.059027038514614105,
"rewards/margins": -0.014922534115612507,
"rewards/rejected": -0.044104501605033875,
"step": 540
},
{
"epoch": 0.3257328990228013,
"grad_norm": 8.9375,
"learning_rate": 4.263190935683449e-07,
"log_odds_chosen": -0.25842440128326416,
"log_odds_ratio": -0.893360435962677,
"logits/chosen": -2.2691588401794434,
"logits/rejected": -2.2356011867523193,
"logps/chosen": -0.5605894327163696,
"logps/rejected": -0.43656760454177856,
"loss": 0.9862,
"nll_loss": 0.8704695701599121,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.05605894327163696,
"rewards/margins": -0.012402191758155823,
"rewards/rejected": -0.04365675523877144,
"step": 550
},
{
"epoch": 0.33165531536867043,
"grad_norm": 10.875,
"learning_rate": 4.2261611486013437e-07,
"log_odds_chosen": -0.3279554545879364,
"log_odds_ratio": -0.9397815465927124,
"logits/chosen": -2.3104796409606934,
"logits/rejected": -2.275190830230713,
"logps/chosen": -0.6270398497581482,
"logps/rejected": -0.4670359194278717,
"loss": 1.0697,
"nll_loss": 0.977874755859375,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -0.06270398944616318,
"rewards/margins": -0.01600039377808571,
"rewards/rejected": -0.04670359194278717,
"step": 560
},
{
"epoch": 0.33757773171453953,
"grad_norm": 11.125,
"learning_rate": 4.188393032302233e-07,
"log_odds_chosen": -0.14010918140411377,
"log_odds_ratio": -0.8429776430130005,
"logits/chosen": -2.2512803077697754,
"logits/rejected": -2.1937472820281982,
"logps/chosen": -0.5634902715682983,
"logps/rejected": -0.5150736570358276,
"loss": 1.0249,
"nll_loss": 0.931064248085022,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.056349027901887894,
"rewards/margins": -0.0048416657373309135,
"rewards/rejected": -0.051507361233234406,
"step": 570
},
{
"epoch": 0.3435001480604086,
"grad_norm": 12.75,
"learning_rate": 4.1499027413055e-07,
"log_odds_chosen": -0.33234935998916626,
"log_odds_ratio": -0.9407118558883667,
"logits/chosen": -2.258405923843384,
"logits/rejected": -2.232956647872925,
"logps/chosen": -0.6220130920410156,
"logps/rejected": -0.4592718482017517,
"loss": 1.0413,
"nll_loss": 0.9290376901626587,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.06220130994915962,
"rewards/margins": -0.016274118795990944,
"rewards/rejected": -0.04592718556523323,
"step": 580
},
{
"epoch": 0.3494225644062778,
"grad_norm": 13.875,
"learning_rate": 4.1107067390256056e-07,
"log_odds_chosen": -0.35427385568618774,
"log_odds_ratio": -0.9841470718383789,
"logits/chosen": -2.305126428604126,
"logits/rejected": -2.280172824859619,
"logps/chosen": -0.696389377117157,
"logps/rejected": -0.4881146550178528,
"loss": 1.0718,
"nll_loss": 1.0334848165512085,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.0696389377117157,
"rewards/margins": -0.02082747593522072,
"rewards/rejected": -0.04881146177649498,
"step": 590
},
{
"epoch": 0.3553449807521469,
"grad_norm": 11.6875,
"learning_rate": 4.0708217907302047e-07,
"log_odds_chosen": -0.3386622369289398,
"log_odds_ratio": -0.9444282650947571,
"logits/chosen": -2.2589573860168457,
"logits/rejected": -2.2278530597686768,
"logps/chosen": -0.6211683750152588,
"logps/rejected": -0.46438631415367126,
"loss": 1.0621,
"nll_loss": 0.9823211431503296,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.062116838991642,
"rewards/margins": -0.01567821204662323,
"rewards/rejected": -0.04643862694501877,
"step": 600
},
{
"epoch": 0.361267397098016,
"grad_norm": 14.4375,
"learning_rate": 4.030264956369157e-07,
"log_odds_chosen": -0.32127273082733154,
"log_odds_ratio": -0.929902195930481,
"logits/chosen": -2.297096014022827,
"logits/rejected": -2.259603977203369,
"logps/chosen": -0.591595470905304,
"logps/rejected": -0.4399223327636719,
"loss": 1.0497,
"nll_loss": 0.9886807203292847,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": -0.0591595396399498,
"rewards/margins": -0.01516731083393097,
"rewards/rejected": -0.04399223253130913,
"step": 610
},
{
"epoch": 0.3671898134438851,
"grad_norm": 11.1875,
"learning_rate": 3.989053583277492e-07,
"log_odds_chosen": -0.42405062913894653,
"log_odds_ratio": -1.0016412734985352,
"logits/chosen": -2.3095479011535645,
"logits/rejected": -2.2935452461242676,
"logps/chosen": -0.6750982403755188,
"logps/rejected": -0.45489102602005005,
"loss": 1.0537,
"nll_loss": 0.9710051417350769,
"rewards/accuracies": 0.3187499940395355,
"rewards/chosen": -0.06750981509685516,
"rewards/margins": -0.022020723670721054,
"rewards/rejected": -0.04548909515142441,
"step": 620
},
{
"epoch": 0.3731122297897542,
"grad_norm": 13.4375,
"learning_rate": 3.947205298755447e-07,
"log_odds_chosen": -0.25669050216674805,
"log_odds_ratio": -0.9015368223190308,
"logits/chosen": -2.2679405212402344,
"logits/rejected": -2.2386162281036377,
"logps/chosen": -0.6160240173339844,
"logps/rejected": -0.48336100578308105,
"loss": 1.0648,
"nll_loss": 0.9532335996627808,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.061602406203746796,
"rewards/margins": -0.013266305439174175,
"rewards/rejected": -0.04833609610795975,
"step": 630
},
{
"epoch": 0.3790346461356233,
"grad_norm": 10.4375,
"learning_rate": 3.9047380025287634e-07,
"log_odds_chosen": -0.24768850207328796,
"log_odds_ratio": -0.891069769859314,
"logits/chosen": -2.275651216506958,
"logits/rejected": -2.247177839279175,
"logps/chosen": -0.5877569913864136,
"logps/rejected": -0.4681660532951355,
"loss": 1.0549,
"nll_loss": 0.9463118314743042,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.058775704354047775,
"rewards/margins": -0.011959095485508442,
"rewards/rejected": -0.04681660607457161,
"step": 640
},
{
"epoch": 0.3849570624814925,
"grad_norm": 12.1875,
"learning_rate": 3.8616698590924523e-07,
"log_odds_chosen": -0.2891980707645416,
"log_odds_ratio": -0.9127435684204102,
"logits/chosen": -2.296032428741455,
"logits/rejected": -2.2514827251434326,
"logps/chosen": -0.6284441351890564,
"logps/rejected": -0.4775362014770508,
"loss": 1.0297,
"nll_loss": 0.9506929516792297,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.062844417989254,
"rewards/margins": -0.01509079895913601,
"rewards/rejected": -0.04775362089276314,
"step": 650
},
{
"epoch": 0.39087947882736157,
"grad_norm": 9.9375,
"learning_rate": 3.8180192899413123e-07,
"log_odds_chosen": -0.3009001314640045,
"log_odds_ratio": -0.9173041582107544,
"logits/chosen": -2.292931079864502,
"logits/rejected": -2.2850821018218994,
"logps/chosen": -0.5977297425270081,
"logps/rejected": -0.4498085081577301,
"loss": 1.066,
"nll_loss": 0.9441615343093872,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.059772975742816925,
"rewards/margins": -0.014792119152843952,
"rewards/rejected": -0.04498085752129555,
"step": 660
},
{
"epoch": 0.39680189517323067,
"grad_norm": 9.6875,
"learning_rate": 3.7738049656905225e-07,
"log_odds_chosen": -0.2274588793516159,
"log_odds_ratio": -0.871192455291748,
"logits/chosen": -2.2281768321990967,
"logits/rejected": -2.1852290630340576,
"logps/chosen": -0.5783167481422424,
"logps/rejected": -0.472917377948761,
"loss": 1.0607,
"nll_loss": 0.9557689428329468,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.05783168226480484,
"rewards/margins": -0.010539938695728779,
"rewards/rejected": -0.04729173332452774,
"step": 670
},
{
"epoch": 0.4027243115190998,
"grad_norm": 10.375,
"learning_rate": 3.7290457980896787e-07,
"log_odds_chosen": -0.1645122915506363,
"log_odds_ratio": -0.8458727598190308,
"logits/chosen": -2.2992634773254395,
"logits/rejected": -2.270430564880371,
"logps/chosen": -0.5671563148498535,
"logps/rejected": -0.4864569306373596,
"loss": 1.0284,
"nll_loss": 0.9164050817489624,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.05671562999486923,
"rewards/margins": -0.00806993618607521,
"rewards/rejected": -0.04864569753408432,
"step": 680
},
{
"epoch": 0.4086467278649689,
"grad_norm": 10.125,
"learning_rate": 3.68376093193369e-07,
"log_odds_chosen": -0.2814542353153229,
"log_odds_ratio": -0.9015814661979675,
"logits/chosen": -2.3065972328186035,
"logits/rejected": -2.2681093215942383,
"logps/chosen": -0.5637949109077454,
"logps/rejected": -0.4352457523345947,
"loss": 1.0214,
"nll_loss": 0.91374272108078,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.05637948960065842,
"rewards/margins": -0.012854918837547302,
"rewards/rejected": -0.043524570763111115,
"step": 690
},
{
"epoch": 0.414569144210838,
"grad_norm": 58.5,
"learning_rate": 3.637969736873992e-07,
"log_odds_chosen": -0.21553269028663635,
"log_odds_ratio": -0.8870409727096558,
"logits/chosen": -2.2836763858795166,
"logits/rejected": -2.252403736114502,
"logps/chosen": -0.5681829452514648,
"logps/rejected": -0.465969979763031,
"loss": 1.0701,
"nll_loss": 0.9871380925178528,
"rewards/accuracies": 0.45625001192092896,
"rewards/chosen": -0.056818295270204544,
"rewards/margins": -0.010221302509307861,
"rewards/rejected": -0.04659699648618698,
"step": 700
},
{
"epoch": 0.4204915605567071,
"grad_norm": 10.5,
"learning_rate": 3.591691799133587e-07,
"log_odds_chosen": -0.19581297039985657,
"log_odds_ratio": -0.8488709330558777,
"logits/chosen": -2.3274245262145996,
"logits/rejected": -2.2992606163024902,
"logps/chosen": -0.5645796060562134,
"logps/rejected": -0.4613499641418457,
"loss": 1.0495,
"nll_loss": 0.9565572738647461,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.05645795539021492,
"rewards/margins": -0.010322963818907738,
"rewards/rejected": -0.04613499343395233,
"step": 710
},
{
"epoch": 0.42641397690257626,
"grad_norm": 9.75,
"learning_rate": 3.5449469131294476e-07,
"log_odds_chosen": -0.22600612044334412,
"log_odds_ratio": -0.8781830668449402,
"logits/chosen": -2.2927708625793457,
"logits/rejected": -2.2485132217407227,
"logps/chosen": -0.5577629804611206,
"logps/rejected": -0.44653376936912537,
"loss": 1.0248,
"nll_loss": 0.9297264814376831,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.055776309221982956,
"rewards/margins": -0.0111229307949543,
"rewards/rejected": -0.044653378427028656,
"step": 720
},
{
"epoch": 0.43233639324844536,
"grad_norm": 9.3125,
"learning_rate": 3.497755073005868e-07,
"log_odds_chosen": -0.09444288164377213,
"log_odds_ratio": -0.8072474598884583,
"logits/chosen": -2.290067672729492,
"logits/rejected": -2.257514238357544,
"logps/chosen": -0.5471974611282349,
"logps/rejected": -0.47366800904273987,
"loss": 1.0112,
"nll_loss": 0.8891817927360535,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.054719746112823486,
"rewards/margins": -0.007352945860475302,
"rewards/rejected": -0.04736679792404175,
"step": 730
},
{
"epoch": 0.43825880959431446,
"grad_norm": 16.75,
"learning_rate": 3.4501364640823926e-07,
"log_odds_chosen": -0.3251793384552002,
"log_odds_ratio": -0.9317482709884644,
"logits/chosen": -2.2995355129241943,
"logits/rejected": -2.2732508182525635,
"logps/chosen": -0.6547442674636841,
"logps/rejected": -0.4866989254951477,
"loss": 1.0482,
"nll_loss": 0.9714682698249817,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.06547442078590393,
"rewards/margins": -0.016804538667201996,
"rewards/rejected": -0.04866989329457283,
"step": 740
},
{
"epoch": 0.4441812259401836,
"grad_norm": 10.4375,
"learning_rate": 3.402111454219966e-07,
"log_odds_chosen": -0.17538635432720184,
"log_odds_ratio": -0.8506783246994019,
"logits/chosen": -2.3090875148773193,
"logits/rejected": -2.26053786277771,
"logps/chosen": -0.5713698863983154,
"logps/rejected": -0.47184181213378906,
"loss": 1.0275,
"nll_loss": 0.954795241355896,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.05713699012994766,
"rewards/margins": -0.009952803142368793,
"rewards/rejected": -0.047184187918901443,
"step": 750
},
{
"epoch": 0.4501036422860527,
"grad_norm": 9.3125,
"learning_rate": 3.353700585109005e-07,
"log_odds_chosen": -0.19826039671897888,
"log_odds_ratio": -0.8637887835502625,
"logits/chosen": -2.302405834197998,
"logits/rejected": -2.27463698387146,
"logps/chosen": -0.5740953683853149,
"logps/rejected": -0.4722967743873596,
"loss": 1.0239,
"nll_loss": 0.963403582572937,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.057409536093473434,
"rewards/margins": -0.010179854929447174,
"rewards/rejected": -0.04722967743873596,
"step": 760
},
{
"epoch": 0.4560260586319218,
"grad_norm": 9.875,
"learning_rate": 3.304924563483129e-07,
"log_odds_chosen": -0.22836697101593018,
"log_odds_ratio": -0.895135760307312,
"logits/chosen": -2.315516948699951,
"logits/rejected": -2.3024649620056152,
"logps/chosen": -0.6285193562507629,
"logps/rejected": -0.48862919211387634,
"loss": 1.073,
"nll_loss": 1.0095432996749878,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": -0.0628519356250763,
"rewards/margins": -0.013989018276333809,
"rewards/rejected": -0.04886292293667793,
"step": 770
},
{
"epoch": 0.46194847497779096,
"grad_norm": 11.125,
"learning_rate": 3.255804252262283e-07,
"log_odds_chosen": -0.19756431877613068,
"log_odds_ratio": -0.856968104839325,
"logits/chosen": -2.255115032196045,
"logits/rejected": -2.226313352584839,
"logps/chosen": -0.551701545715332,
"logps/rejected": -0.45012766122817993,
"loss": 1.0499,
"nll_loss": 0.9961403608322144,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.05517015606164932,
"rewards/margins": -0.010157393291592598,
"rewards/rejected": -0.045012760907411575,
"step": 780
},
{
"epoch": 0.46787089132366005,
"grad_norm": 8.375,
"learning_rate": 3.2063606616290626e-07,
"log_odds_chosen": -0.3132410943508148,
"log_odds_ratio": -0.9298326373100281,
"logits/chosen": -2.2360429763793945,
"logits/rejected": -2.1973369121551514,
"logps/chosen": -0.5941890478134155,
"logps/rejected": -0.44506731629371643,
"loss": 0.9654,
"nll_loss": 0.8383496999740601,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.05941891670227051,
"rewards/margins": -0.01491218339651823,
"rewards/rejected": -0.044506728649139404,
"step": 790
},
{
"epoch": 0.47379330766952915,
"grad_norm": 15.0625,
"learning_rate": 3.1566149400420523e-07,
"log_odds_chosen": -0.26251059770584106,
"log_odds_ratio": -0.8918318748474121,
"logits/chosen": -2.2902214527130127,
"logits/rejected": -2.2795047760009766,
"logps/chosen": -0.6117950081825256,
"logps/rejected": -0.4801320433616638,
"loss": 1.0567,
"nll_loss": 0.9525865316390991,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -0.0611795075237751,
"rewards/margins": -0.013166295364499092,
"rewards/rejected": -0.04801321029663086,
"step": 800
},
{
"epoch": 0.4797157240153983,
"grad_norm": 10.3125,
"learning_rate": 3.1065883651900087e-07,
"log_odds_chosen": -0.2203420102596283,
"log_odds_ratio": -0.8829119801521301,
"logits/chosen": -2.2788829803466797,
"logits/rejected": -2.2381834983825684,
"logps/chosen": -0.5892807841300964,
"logps/rejected": -0.48378220200538635,
"loss": 1.0678,
"nll_loss": 0.9220091104507446,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.05892808362841606,
"rewards/margins": -0.01054986473172903,
"rewards/rejected": -0.048378217965364456,
"step": 810
},
{
"epoch": 0.4856381403612674,
"grad_norm": 9.875,
"learning_rate": 3.056302334890786e-07,
"log_odds_chosen": -0.30824679136276245,
"log_odds_ratio": -0.9259847402572632,
"logits/chosen": -2.288405179977417,
"logits/rejected": -2.2682487964630127,
"logps/chosen": -0.6053352355957031,
"logps/rejected": -0.4507838189601898,
"loss": 1.0098,
"nll_loss": 0.9126564860343933,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.06053352355957031,
"rewards/margins": -0.01545514166355133,
"rewards/rejected": -0.04507838934659958,
"step": 820
},
{
"epoch": 0.4915605567071365,
"grad_norm": 12.6875,
"learning_rate": 3.0057783579388586e-07,
"log_odds_chosen": -0.15970291197299957,
"log_odds_ratio": -0.8330586552619934,
"logits/chosen": -2.2909493446350098,
"logits/rejected": -2.2521986961364746,
"logps/chosen": -0.5571908950805664,
"logps/rejected": -0.4815686345100403,
"loss": 1.0258,
"nll_loss": 0.9384473562240601,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.05571908876299858,
"rewards/margins": -0.007562229875475168,
"rewards/rejected": -0.04815686494112015,
"step": 830
},
{
"epoch": 0.49748297305300565,
"grad_norm": 11.75,
"learning_rate": 2.9550380449053907e-07,
"log_odds_chosen": -0.18619410693645477,
"log_odds_ratio": -0.8525155782699585,
"logits/chosen": -2.2423572540283203,
"logits/rejected": -2.221928596496582,
"logps/chosen": -0.5615742206573486,
"logps/rejected": -0.4591636657714844,
"loss": 1.0133,
"nll_loss": 0.8223134279251099,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.056157421320676804,
"rewards/margins": -0.010241055861115456,
"rewards/rejected": -0.0459163673222065,
"step": 840
},
{
"epoch": 0.5034053893988747,
"grad_norm": 8.625,
"learning_rate": 2.904103098894767e-07,
"log_odds_chosen": -0.22144293785095215,
"log_odds_ratio": -0.8922742009162903,
"logits/chosen": -2.280796527862549,
"logits/rejected": -2.2380261421203613,
"logps/chosen": -0.5996569991111755,
"logps/rejected": -0.4632148742675781,
"loss": 1.0102,
"nll_loss": 0.9282135963439941,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.059965699911117554,
"rewards/margins": -0.013644215650856495,
"rewards/rejected": -0.04632148891687393,
"step": 850
},
{
"epoch": 0.5093278057447439,
"grad_norm": 12.625,
"learning_rate": 2.852995306261545e-07,
"log_odds_chosen": -0.1986076533794403,
"log_odds_ratio": -0.8607484698295593,
"logits/chosen": -2.306536912918091,
"logits/rejected": -2.2707247734069824,
"logps/chosen": -0.575395405292511,
"logps/rejected": -0.4835848808288574,
"loss": 1.074,
"nll_loss": 1.0040955543518066,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.05753953382372856,
"rewards/margins": -0.009181044064462185,
"rewards/rejected": -0.0483584925532341,
"step": 860
},
{
"epoch": 0.515250222090613,
"grad_norm": 10.6875,
"learning_rate": 2.801736527291797e-07,
"log_odds_chosen": -0.26449286937713623,
"log_odds_ratio": -0.9028227925300598,
"logits/chosen": -2.275608777999878,
"logits/rejected": -2.233181953430176,
"logps/chosen": -0.61722731590271,
"logps/rejected": -0.4729304313659668,
"loss": 1.042,
"nll_loss": 0.908827006816864,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.061722736805677414,
"rewards/margins": -0.014429694041609764,
"rewards/rejected": -0.0472930371761322,
"step": 870
},
{
"epoch": 0.5211726384364821,
"grad_norm": 10.875,
"learning_rate": 2.750348686852836e-07,
"log_odds_chosen": -0.31994161009788513,
"log_odds_ratio": -0.9219182729721069,
"logits/chosen": -2.329312324523926,
"logits/rejected": -2.2651875019073486,
"logps/chosen": -0.6155102252960205,
"logps/rejected": -0.4632096290588379,
"loss": 1.0724,
"nll_loss": 1.0065295696258545,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.06155102327466011,
"rewards/margins": -0.015230064280331135,
"rewards/rejected": -0.04632095992565155,
"step": 880
},
{
"epoch": 0.5270950547823512,
"grad_norm": 11.875,
"learning_rate": 2.69885376501531e-07,
"log_odds_chosen": -0.23163005709648132,
"log_odds_ratio": -0.8846963047981262,
"logits/chosen": -2.261355400085449,
"logits/rejected": -2.2470784187316895,
"logps/chosen": -0.6110343933105469,
"logps/rejected": -0.4852830767631531,
"loss": 1.0546,
"nll_loss": 0.9538838267326355,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.06110344082117081,
"rewards/margins": -0.012575129978358746,
"rewards/rejected": -0.04852830991148949,
"step": 890
},
{
"epoch": 0.5330174711282203,
"grad_norm": 11.9375,
"learning_rate": 2.647273787651687e-07,
"log_odds_chosen": -0.18702737987041473,
"log_odds_ratio": -0.8396440744400024,
"logits/chosen": -2.2948384284973145,
"logits/rejected": -2.2751121520996094,
"logps/chosen": -0.5671176910400391,
"logps/rejected": -0.47453179955482483,
"loss": 1.0288,
"nll_loss": 0.9608666300773621,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.056711774319410324,
"rewards/margins": -0.009258597157895565,
"rewards/rejected": -0.047453176230192184,
"step": 900
},
{
"epoch": 0.5389398874740894,
"grad_norm": 16.75,
"learning_rate": 2.5956308170151526e-07,
"log_odds_chosen": -0.40357428789138794,
"log_odds_ratio": -1.0180401802062988,
"logits/chosen": -2.260730504989624,
"logits/rejected": -2.2328133583068848,
"logps/chosen": -0.7037028670310974,
"logps/rejected": -0.4578544497489929,
"loss": 1.1183,
"nll_loss": 0.9839082956314087,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.07037027925252914,
"rewards/margins": -0.02458484098315239,
"rewards/rejected": -0.04578544571995735,
"step": 910
},
{
"epoch": 0.5448623038199586,
"grad_norm": 10.3125,
"learning_rate": 2.543946942302944e-07,
"log_odds_chosen": -0.21979165077209473,
"log_odds_ratio": -0.8726961016654968,
"logits/chosen": -2.2551956176757812,
"logits/rejected": -2.2191715240478516,
"logps/chosen": -0.5772194862365723,
"logps/rejected": -0.45551061630249023,
"loss": 1.0123,
"nll_loss": 0.9414900541305542,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.057721953839063644,
"rewards/margins": -0.012170888483524323,
"rewards/rejected": -0.04555106535553932,
"step": 920
},
{
"epoch": 0.5507847201658277,
"grad_norm": 11.1875,
"learning_rate": 2.492244270208158e-07,
"log_odds_chosen": -0.1632816195487976,
"log_odds_ratio": -0.8366379737854004,
"logits/chosen": -2.2645580768585205,
"logits/rejected": -2.2385404109954834,
"logps/chosen": -0.5705746412277222,
"logps/rejected": -0.48298463225364685,
"loss": 0.9953,
"nll_loss": 0.9456483721733093,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.05705747753381729,
"rewards/margins": -0.008759009651839733,
"rewards/rejected": -0.048298463225364685,
"step": 930
},
{
"epoch": 0.5567071365116968,
"grad_norm": 9.5,
"learning_rate": 2.440544915464078e-07,
"log_odds_chosen": -0.2142259180545807,
"log_odds_ratio": -0.8674869537353516,
"logits/chosen": -2.294877290725708,
"logits/rejected": -2.2555816173553467,
"logps/chosen": -0.5593573451042175,
"logps/rejected": -0.45421138405799866,
"loss": 1.0237,
"nll_loss": 0.9162901043891907,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.055935733020305634,
"rewards/margins": -0.01051459088921547,
"rewards/rejected": -0.045421142131090164,
"step": 940
},
{
"epoch": 0.5626295528575659,
"grad_norm": 10.0625,
"learning_rate": 2.3888709913850593e-07,
"log_odds_chosen": -0.21557164192199707,
"log_odds_ratio": -0.8706417083740234,
"logits/chosen": -2.3428778648376465,
"logits/rejected": -2.3050456047058105,
"logps/chosen": -0.5851597785949707,
"logps/rejected": -0.4760478436946869,
"loss": 1.0825,
"nll_loss": 0.9482911825180054,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.05851597711443901,
"rewards/margins": -0.010911193676292896,
"rewards/rejected": -0.04760478436946869,
"step": 950
},
{
"epoch": 0.568551969203435,
"grad_norm": 9.75,
"learning_rate": 2.337244600408025e-07,
"log_odds_chosen": -0.30868110060691833,
"log_odds_ratio": -0.9379078149795532,
"logits/chosen": -2.3101601600646973,
"logits/rejected": -2.2805612087249756,
"logps/chosen": -0.6376503109931946,
"logps/rejected": -0.4714363217353821,
"loss": 1.051,
"nll_loss": 0.983268141746521,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.0637650191783905,
"rewards/margins": -0.016621392220258713,
"rewards/rejected": -0.04714363440871239,
"step": 960
},
{
"epoch": 0.5744743855493041,
"grad_norm": 11.9375,
"learning_rate": 2.2856878246386085e-07,
"log_odds_chosen": -0.20517487823963165,
"log_odds_ratio": -0.8652151226997375,
"logits/chosen": -2.306201457977295,
"logits/rejected": -2.283665180206299,
"logps/chosen": -0.5846830606460571,
"logps/rejected": -0.4740404486656189,
"loss": 1.0953,
"nll_loss": 1.0276809930801392,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.058468304574489594,
"rewards/margins": -0.011064260266721249,
"rewards/rejected": -0.04740404710173607,
"step": 970
},
{
"epoch": 0.5803968018951732,
"grad_norm": 13.0,
"learning_rate": 2.2342227164060035e-07,
"log_odds_chosen": -0.2963787019252777,
"log_odds_ratio": -0.9264262318611145,
"logits/chosen": -2.2660953998565674,
"logits/rejected": -2.211947441101074,
"logps/chosen": -0.6310227513313293,
"logps/rejected": -0.4772140085697174,
"loss": 1.0355,
"nll_loss": 0.916420578956604,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.06310227513313293,
"rewards/margins": -0.015380874276161194,
"rewards/rejected": -0.04772140458226204,
"step": 980
},
{
"epoch": 0.5863192182410424,
"grad_norm": 9.5625,
"learning_rate": 2.182871288830533e-07,
"log_odds_chosen": -0.3251541554927826,
"log_odds_ratio": -0.941790759563446,
"logits/chosen": -2.293196439743042,
"logits/rejected": -2.232034206390381,
"logps/chosen": -0.6307833790779114,
"logps/rejected": -0.4696255624294281,
"loss": 1.0677,
"nll_loss": 0.967657208442688,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.06307834386825562,
"rewards/margins": -0.016115780919790268,
"rewards/rejected": -0.04696255922317505,
"step": 990
},
{
"epoch": 0.5922416345869115,
"grad_norm": 9.6875,
"learning_rate": 2.131655506408007e-07,
"log_odds_chosen": -0.22425034642219543,
"log_odds_ratio": -0.8798470497131348,
"logits/chosen": -2.2940893173217773,
"logits/rejected": -2.254329204559326,
"logps/chosen": -0.5970818400382996,
"logps/rejected": -0.48467540740966797,
"loss": 1.0208,
"nll_loss": 0.9316588640213013,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.059708189219236374,
"rewards/margins": -0.011240655556321144,
"rewards/rejected": -0.04846753552556038,
"step": 1000
},
{
"epoch": 0.5981640509327806,
"grad_norm": 9.5,
"learning_rate": 2.0805972756148643e-07,
"log_odds_chosen": -0.3093208074569702,
"log_odds_ratio": -0.9420243501663208,
"logits/chosen": -2.2883636951446533,
"logits/rejected": -2.275327682495117,
"logps/chosen": -0.6675941348075867,
"logps/rejected": -0.47907954454421997,
"loss": 1.0708,
"nll_loss": 1.0012794733047485,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.06675940752029419,
"rewards/margins": -0.01885146275162697,
"rewards/rejected": -0.047907955944538116,
"step": 1010
},
{
"epoch": 0.6040864672786497,
"grad_norm": 9.1875,
"learning_rate": 2.0297184355381432e-07,
"log_odds_chosen": -0.2639048993587494,
"log_odds_ratio": -0.89494389295578,
"logits/chosen": -2.304008722305298,
"logits/rejected": -2.265723705291748,
"logps/chosen": -0.5768560767173767,
"logps/rejected": -0.4624248445034027,
"loss": 1.0328,
"nll_loss": 0.9577334523200989,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.05768561363220215,
"rewards/margins": -0.011443129740655422,
"rewards/rejected": -0.04624248296022415,
"step": 1020
},
{
"epoch": 0.6100088836245188,
"grad_norm": 9.125,
"learning_rate": 1.9790407485342638e-07,
"log_odds_chosen": -0.3557616174221039,
"log_odds_ratio": -0.9650157690048218,
"logits/chosen": -2.327831268310547,
"logits/rejected": -2.2884087562561035,
"logps/chosen": -0.6429619193077087,
"logps/rejected": -0.4408210217952728,
"loss": 1.0091,
"nll_loss": 0.9397379755973816,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.06429620087146759,
"rewards/margins": -0.02021409198641777,
"rewards/rejected": -0.04408210515975952,
"step": 1030
},
{
"epoch": 0.6159312999703879,
"grad_norm": 10.75,
"learning_rate": 1.928585890920641e-07,
"log_odds_chosen": -0.1900234967470169,
"log_odds_ratio": -0.8621436953544617,
"logits/chosen": -2.2921512126922607,
"logits/rejected": -2.2576987743377686,
"logps/chosen": -0.5736020803451538,
"logps/rejected": -0.46828731894493103,
"loss": 1.0474,
"nll_loss": 0.9162224531173706,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.0573602095246315,
"rewards/margins": -0.010531473904848099,
"rewards/rejected": -0.046828728169202805,
"step": 1040
},
{
"epoch": 0.6218537163162571,
"grad_norm": 11.875,
"learning_rate": 1.8783754437040902e-07,
"log_odds_chosen": -0.26852238178253174,
"log_odds_ratio": -0.9126049280166626,
"logits/chosen": -2.275580883026123,
"logits/rejected": -2.2431647777557373,
"logps/chosen": -0.5689065456390381,
"logps/rejected": -0.44645556807518005,
"loss": 1.0095,
"nll_loss": 0.9046837091445923,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.05689065903425217,
"rewards/margins": -0.012245100922882557,
"rewards/rejected": -0.044645555317401886,
"step": 1050
},
{
"epoch": 0.6277761326621262,
"grad_norm": 9.25,
"learning_rate": 1.8284308833500118e-07,
"log_odds_chosen": -0.2125154435634613,
"log_odds_ratio": -0.8751262426376343,
"logits/chosen": -2.277667760848999,
"logits/rejected": -2.253131866455078,
"logps/chosen": -0.5812402963638306,
"logps/rejected": -0.47419658303260803,
"loss": 1.0476,
"nll_loss": 0.93915194272995,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.05812402814626694,
"rewards/margins": -0.010704366490244865,
"rewards/rejected": -0.04741965979337692,
"step": 1060
},
{
"epoch": 0.6336985490079953,
"grad_norm": 11.0625,
"learning_rate": 1.7787735725962756e-07,
"log_odds_chosen": -0.27183157205581665,
"log_odds_ratio": -0.9005556106567383,
"logits/chosen": -2.2851767539978027,
"logits/rejected": -2.2494091987609863,
"logps/chosen": -0.613685667514801,
"logps/rejected": -0.47953805327415466,
"loss": 1.0919,
"nll_loss": 0.9954058527946472,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.06136856600642204,
"rewards/margins": -0.013414761051535606,
"rewards/rejected": -0.047953806817531586,
"step": 1070
},
{
"epoch": 0.6396209653538644,
"grad_norm": 9.375,
"learning_rate": 1.7294247513157616e-07,
"log_odds_chosen": -0.22400331497192383,
"log_odds_ratio": -0.8672366142272949,
"logits/chosen": -2.3089351654052734,
"logits/rejected": -2.2596447467803955,
"logps/chosen": -0.5711158514022827,
"logps/rejected": -0.46820420026779175,
"loss": 1.0251,
"nll_loss": 0.960826575756073,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.05711158365011215,
"rewards/margins": -0.010291163809597492,
"rewards/rejected": -0.046820417046546936,
"step": 1080
},
{
"epoch": 0.6455433816997335,
"grad_norm": 10.0625,
"learning_rate": 1.6804055274314494e-07,
"log_odds_chosen": -0.19274529814720154,
"log_odds_ratio": -0.8532935380935669,
"logits/chosen": -2.270355224609375,
"logits/rejected": -2.248356342315674,
"logps/chosen": -0.5621662735939026,
"logps/rejected": -0.47271862626075745,
"loss": 1.0217,
"nll_loss": 0.9073405265808105,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.056216634809970856,
"rewards/margins": -0.008944764733314514,
"rewards/rejected": -0.04727186635136604,
"step": 1090
},
{
"epoch": 0.6514657980456026,
"grad_norm": 10.25,
"learning_rate": 1.6317368678879496e-07,
"log_odds_chosen": -0.20030847191810608,
"log_odds_ratio": -0.8486258387565613,
"logits/chosen": -2.3088138103485107,
"logits/rejected": -2.27048659324646,
"logps/chosen": -0.5797799825668335,
"logps/rejected": -0.4854944348335266,
"loss": 1.0725,
"nll_loss": 0.9621385335922241,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.05797800421714783,
"rewards/margins": -0.009428557008504868,
"rewards/rejected": -0.04854945093393326,
"step": 1100
},
{
"epoch": 0.6573882143914718,
"grad_norm": 12.125,
"learning_rate": 1.5834395896833281e-07,
"log_odds_chosen": -0.3109692335128784,
"log_odds_ratio": -0.9263485670089722,
"logits/chosen": -2.3202879428863525,
"logits/rejected": -2.265725612640381,
"logps/chosen": -0.6155823469161987,
"logps/rejected": -0.4582076966762543,
"loss": 1.0499,
"nll_loss": 0.9659247398376465,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.06155823543667793,
"rewards/margins": -0.015737462788820267,
"rewards/rejected": -0.045820772647857666,
"step": 1110
},
{
"epoch": 0.6633106307373409,
"grad_norm": 9.9375,
"learning_rate": 1.535534350965075e-07,
"log_odds_chosen": -0.25020501017570496,
"log_odds_ratio": -0.8859984278678894,
"logits/chosen": -2.3179831504821777,
"logits/rejected": -2.3054070472717285,
"logps/chosen": -0.5626355409622192,
"logps/rejected": -0.434339702129364,
"loss": 1.0081,
"nll_loss": 0.9209376573562622,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": -0.056263554841279984,
"rewards/margins": -0.012829584069550037,
"rewards/rejected": -0.04343396797776222,
"step": 1120
},
{
"epoch": 0.66923304708321,
"grad_norm": 15.625,
"learning_rate": 1.4880416421940154e-07,
"log_odds_chosen": -0.23923833668231964,
"log_odds_ratio": -0.8853415250778198,
"logits/chosen": -2.26355242729187,
"logits/rejected": -2.240990161895752,
"logps/chosen": -0.6214331388473511,
"logps/rejected": -0.4842914938926697,
"loss": 1.1113,
"nll_loss": 1.0326354503631592,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.062143318355083466,
"rewards/margins": -0.01371416263282299,
"rewards/rejected": -0.048429153859615326,
"step": 1130
},
{
"epoch": 0.6751554634290791,
"grad_norm": 12.75,
"learning_rate": 1.4409817773799459e-07,
"log_odds_chosen": -0.23250596225261688,
"log_odds_ratio": -0.8853020668029785,
"logits/chosen": -2.288491725921631,
"logits/rejected": -2.24708890914917,
"logps/chosen": -0.6100078225135803,
"logps/rejected": -0.4815722405910492,
"loss": 1.0552,
"nll_loss": 0.9337055087089539,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.061000775545835495,
"rewards/margins": -0.01284355204552412,
"rewards/rejected": -0.0481572225689888,
"step": 1140
},
{
"epoch": 0.6810778797749482,
"grad_norm": 10.4375,
"learning_rate": 1.3943748853927385e-07,
"log_odds_chosen": -0.3103570342063904,
"log_odds_ratio": -0.9324914216995239,
"logits/chosen": -2.28434419631958,
"logits/rejected": -2.277893543243408,
"logps/chosen": -0.64482182264328,
"logps/rejected": -0.46989989280700684,
"loss": 1.035,
"nll_loss": 0.934810996055603,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.064482182264328,
"rewards/margins": -0.017492195591330528,
"rewards/rejected": -0.046989988535642624,
"step": 1150
},
{
"epoch": 0.6870002961208173,
"grad_norm": 11.375,
"learning_rate": 1.3482409013526436e-07,
"log_odds_chosen": -0.3323788642883301,
"log_odds_ratio": -0.9415693283081055,
"logits/chosen": -2.272247791290283,
"logits/rejected": -2.2672269344329834,
"logps/chosen": -0.6134747862815857,
"logps/rejected": -0.4571937918663025,
"loss": 1.0638,
"nll_loss": 0.9829813241958618,
"rewards/accuracies": 0.3812499940395355,
"rewards/chosen": -0.06134747713804245,
"rewards/margins": -0.01562810130417347,
"rewards/rejected": -0.04571938142180443,
"step": 1160
},
{
"epoch": 0.6929227124666865,
"grad_norm": 9.4375,
"learning_rate": 1.302599558103456e-07,
"log_odds_chosen": -0.23517660796642303,
"log_odds_ratio": -0.8992069363594055,
"logits/chosen": -2.3287193775177,
"logits/rejected": -2.293454885482788,
"logps/chosen": -0.6200941801071167,
"logps/rejected": -0.4878036081790924,
"loss": 1.0413,
"nll_loss": 0.9660770297050476,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -0.06200941652059555,
"rewards/margins": -0.013229051604866982,
"rewards/rejected": -0.04878035932779312,
"step": 1170
},
{
"epoch": 0.6988451288125556,
"grad_norm": 13.0625,
"learning_rate": 1.257470377772214e-07,
"log_odds_chosen": -0.27837398648262024,
"log_odds_ratio": -0.9113019704818726,
"logits/chosen": -2.3072619438171387,
"logits/rejected": -2.282047748565674,
"logps/chosen": -0.5952633023262024,
"logps/rejected": -0.4496152400970459,
"loss": 1.0661,
"nll_loss": 0.9518778920173645,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.05952633172273636,
"rewards/margins": -0.014564801938831806,
"rewards/rejected": -0.04496152698993683,
"step": 1180
},
{
"epoch": 0.7047675451584247,
"grad_norm": 13.5,
"learning_rate": 1.2128726634190046e-07,
"log_odds_chosen": -0.26337355375289917,
"log_odds_ratio": -0.8862990140914917,
"logits/chosen": -2.3180294036865234,
"logits/rejected": -2.274146556854248,
"logps/chosen": -0.5859608054161072,
"logps/rejected": -0.44980812072753906,
"loss": 1.0204,
"nll_loss": 0.91375333070755,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.05859608203172684,
"rewards/margins": -0.01361527293920517,
"rewards/rejected": -0.044980812817811966,
"step": 1190
},
{
"epoch": 0.7106899615042938,
"grad_norm": 13.3125,
"learning_rate": 1.1688254907804992e-07,
"log_odds_chosen": -0.2645830512046814,
"log_odds_ratio": -0.9049927592277527,
"logits/chosen": -2.2710115909576416,
"logits/rejected": -2.2327637672424316,
"logps/chosen": -0.6210035085678101,
"logps/rejected": -0.48435431718826294,
"loss": 1.0683,
"nll_loss": 0.9852622747421265,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.06210034340620041,
"rewards/margins": -0.01366492174565792,
"rewards/rejected": -0.048435427248477936,
"step": 1200
},
{
"epoch": 0.7166123778501629,
"grad_norm": 9.1875,
"learning_rate": 1.1253477001106956e-07,
"log_odds_chosen": -0.18010739982128143,
"log_odds_ratio": -0.848807156085968,
"logits/chosen": -2.2503340244293213,
"logits/rejected": -2.214433431625366,
"logps/chosen": -0.5777139663696289,
"logps/rejected": -0.48649734258651733,
"loss": 1.0408,
"nll_loss": 0.9145431518554688,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.05777139216661453,
"rewards/margins": -0.009121658280491829,
"rewards/rejected": -0.04864973947405815,
"step": 1210
},
{
"epoch": 0.722534794196032,
"grad_norm": 14.1875,
"learning_rate": 1.0824578881224065e-07,
"log_odds_chosen": -0.14203877747058868,
"log_odds_ratio": -0.8198834657669067,
"logits/chosen": -2.323948621749878,
"logits/rejected": -2.3119778633117676,
"logps/chosen": -0.5389841794967651,
"logps/rejected": -0.4636968672275543,
"loss": 0.9852,
"nll_loss": 0.871512770652771,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.053898416459560394,
"rewards/margins": -0.007528733462095261,
"rewards/rejected": -0.046369682997465134,
"step": 1220
},
{
"epoch": 0.728457210541901,
"grad_norm": 10.25,
"learning_rate": 1.0401744000328918e-07,
"log_odds_chosen": -0.19983641803264618,
"log_odds_ratio": -0.8725547790527344,
"logits/chosen": -2.268932342529297,
"logits/rejected": -2.2664635181427,
"logps/chosen": -0.5955653786659241,
"logps/rejected": -0.4910568296909332,
"loss": 1.0167,
"nll_loss": 0.9245740175247192,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.059556543827056885,
"rewards/margins": -0.010450851172208786,
"rewards/rejected": -0.0491056926548481,
"step": 1230
},
{
"epoch": 0.7343796268877703,
"grad_norm": 11.25,
"learning_rate": 9.985153217170902e-08,
"log_odds_chosen": -0.27591392397880554,
"log_odds_ratio": -0.9048240780830383,
"logits/chosen": -2.3324825763702393,
"logits/rejected": -2.3199105262756348,
"logps/chosen": -0.613168478012085,
"logps/rejected": -0.47140389680862427,
"loss": 1.1081,
"nll_loss": 1.0194193124771118,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.061316847801208496,
"rewards/margins": -0.01417645812034607,
"rewards/rejected": -0.047140393406152725,
"step": 1240
},
{
"epoch": 0.7403020432336394,
"grad_norm": 12.25,
"learning_rate": 9.574984719717553e-08,
"log_odds_chosen": -0.24321213364601135,
"log_odds_ratio": -0.89483243227005,
"logits/chosen": -2.3112952709198,
"logits/rejected": -2.2951555252075195,
"logps/chosen": -0.5895348191261292,
"logps/rejected": -0.4669637680053711,
"loss": 1.0306,
"nll_loss": 0.9830119013786316,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.05895348638296127,
"rewards/margins": -0.012257112190127373,
"rewards/rejected": -0.04669637233018875,
"step": 1250
},
{
"epoch": 0.7462244595795084,
"grad_norm": 9.9375,
"learning_rate": 9.171413948938459e-08,
"log_odds_chosen": -0.2236686646938324,
"log_odds_ratio": -0.879412829875946,
"logits/chosen": -2.3061726093292236,
"logits/rejected": -2.254133701324463,
"logps/chosen": -0.6122428178787231,
"logps/rejected": -0.49692878127098083,
"loss": 1.0596,
"nll_loss": 0.9902396202087402,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.061224281787872314,
"rewards/margins": -0.011531401425600052,
"rewards/rejected": -0.04969288408756256,
"step": 1260
},
{
"epoch": 0.7521468759253775,
"grad_norm": 9.4375,
"learning_rate": 8.774613523764049e-08,
"log_odds_chosen": -0.26704955101013184,
"log_odds_ratio": -0.8915314674377441,
"logits/chosen": -2.2866809368133545,
"logits/rejected": -2.239720582962036,
"logps/chosen": -0.5904482007026672,
"logps/rejected": -0.4593755304813385,
"loss": 1.0287,
"nll_loss": 0.9099699854850769,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.059044819325208664,
"rewards/margins": -0.013107270002365112,
"rewards/rejected": -0.04593754559755325,
"step": 1270
},
{
"epoch": 0.7580692922712466,
"grad_norm": 11.125,
"learning_rate": 8.384753167251412e-08,
"log_odds_chosen": -0.2359321415424347,
"log_odds_ratio": -0.8834274411201477,
"logits/chosen": -2.241650104522705,
"logits/rejected": -2.2175180912017822,
"logps/chosen": -0.5696910619735718,
"logps/rejected": -0.4511106610298157,
"loss": 0.9877,
"nll_loss": 0.8763992190361023,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.05696910619735718,
"rewards/margins": -0.01185804232954979,
"rewards/rejected": -0.045111071318387985,
"step": 1280
},
{
"epoch": 0.7639917086171157,
"grad_norm": 10.1875,
"learning_rate": 8.001999633988942e-08,
"log_odds_chosen": -0.26344627141952515,
"log_odds_ratio": -0.8965330123901367,
"logits/chosen": -2.317347764968872,
"logits/rejected": -2.2693257331848145,
"logps/chosen": -0.5864616632461548,
"logps/rejected": -0.45855003595352173,
"loss": 0.9993,
"nll_loss": 0.9034452438354492,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.058646153658628464,
"rewards/margins": -0.012791156768798828,
"rewards/rejected": -0.04585500434041023,
"step": 1290
},
{
"epoch": 0.769914124962985,
"grad_norm": 10.4375,
"learning_rate": 7.62651663877042e-08,
"log_odds_chosen": -0.17867620289325714,
"log_odds_ratio": -0.8561042547225952,
"logits/chosen": -2.2582385540008545,
"logits/rejected": -2.232391357421875,
"logps/chosen": -0.5790480971336365,
"logps/rejected": -0.4850679337978363,
"loss": 1.0993,
"nll_loss": 0.9781789779663086,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.057904817163944244,
"rewards/margins": -0.009398018009960651,
"rewards/rejected": -0.04850679263472557,
"step": 1300
},
{
"epoch": 0.775836541308854,
"grad_norm": 9.0,
"learning_rate": 7.258464786569549e-08,
"log_odds_chosen": -0.2144562005996704,
"log_odds_ratio": -0.8685463070869446,
"logits/chosen": -2.322035551071167,
"logits/rejected": -2.2717068195343018,
"logps/chosen": -0.5770824551582336,
"logps/rejected": -0.47164034843444824,
"loss": 1.0633,
"nll_loss": 0.9638098478317261,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.057708241045475006,
"rewards/margins": -0.010544205084443092,
"rewards/rejected": -0.04716403782367706,
"step": 1310
},
{
"epoch": 0.7817589576547231,
"grad_norm": 12.375,
"learning_rate": 6.898001503844483e-08,
"log_odds_chosen": -0.3992167115211487,
"log_odds_ratio": -1.0115876197814941,
"logits/chosen": -2.3506951332092285,
"logits/rejected": -2.3084568977355957,
"logps/chosen": -0.7245315313339233,
"logps/rejected": -0.4761766493320465,
"loss": 1.0509,
"nll_loss": 1.0009998083114624,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.07245315611362457,
"rewards/margins": -0.0248354934155941,
"rewards/rejected": -0.04761766642332077,
"step": 1320
},
{
"epoch": 0.7876813740005922,
"grad_norm": 11.3125,
"learning_rate": 6.545280971202014e-08,
"log_odds_chosen": -0.17274455726146698,
"log_odds_ratio": -0.8463727831840515,
"logits/chosen": -2.310338020324707,
"logits/rejected": -2.2806801795959473,
"logps/chosen": -0.5623282194137573,
"logps/rejected": -0.46932634711265564,
"loss": 1.0128,
"nll_loss": 0.9555832147598267,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.05623283237218857,
"rewards/margins": -0.009300192818045616,
"rewards/rejected": -0.046932633966207504,
"step": 1330
},
{
"epoch": 0.7936037903464613,
"grad_norm": 11.0625,
"learning_rate": 6.200454057450022e-08,
"log_odds_chosen": -0.2566189169883728,
"log_odds_ratio": -0.8830870389938354,
"logits/chosen": -2.2640976905822754,
"logits/rejected": -2.2190680503845215,
"logps/chosen": -0.6031737327575684,
"logps/rejected": -0.4697316586971283,
"loss": 1.0756,
"nll_loss": 0.9159650802612305,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.060317374765872955,
"rewards/margins": -0.01334420870989561,
"rewards/rejected": -0.04697316139936447,
"step": 1340
},
{
"epoch": 0.7995262066923304,
"grad_norm": 12.0,
"learning_rate": 5.863668255066492e-08,
"log_odds_chosen": -0.2177290916442871,
"log_odds_ratio": -0.8585535287857056,
"logits/chosen": -2.262441396713257,
"logits/rejected": -2.231968402862549,
"logps/chosen": -0.5860260128974915,
"logps/rejected": -0.47981762886047363,
"loss": 1.0081,
"nll_loss": 0.9461213946342468,
"rewards/accuracies": 0.3687500059604645,
"rewards/chosen": -0.05860259383916855,
"rewards/margins": -0.010620838031172752,
"rewards/rejected": -0.047981761395931244,
"step": 1350
},
{
"epoch": 0.8054486230381996,
"grad_norm": 9.625,
"learning_rate": 5.53506761711274e-08,
"log_odds_chosen": -0.21258850395679474,
"log_odds_ratio": -0.8654868006706238,
"logits/chosen": -2.2940022945404053,
"logits/rejected": -2.264361619949341,
"logps/chosen": -0.5948741436004639,
"logps/rejected": -0.48127132654190063,
"loss": 1.0435,
"nll_loss": 1.0004308223724365,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.059487421065568924,
"rewards/margins": -0.0113602876663208,
"rewards/rejected": -0.04812713339924812,
"step": 1360
},
{
"epoch": 0.8113710393840687,
"grad_norm": 14.375,
"learning_rate": 5.2147926956177174e-08,
"log_odds_chosen": -0.3361436724662781,
"log_odds_ratio": -0.9543386697769165,
"logits/chosen": -2.2842912673950195,
"logits/rejected": -2.2753098011016846,
"logps/chosen": -0.6304486989974976,
"logps/rejected": -0.4559609293937683,
"loss": 1.0422,
"nll_loss": 0.9697739481925964,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.06304488331079483,
"rewards/margins": -0.0174487866461277,
"rewards/rejected": -0.04559609293937683,
"step": 1370
},
{
"epoch": 0.8172934557299378,
"grad_norm": 22.625,
"learning_rate": 4.902980481459834e-08,
"log_odds_chosen": -0.18400034308433533,
"log_odds_ratio": -0.8533352017402649,
"logits/chosen": -2.267984390258789,
"logits/rejected": -2.240002155303955,
"logps/chosen": -0.5833351016044617,
"logps/rejected": -0.4882822632789612,
"loss": 1.0013,
"nll_loss": 0.9279516935348511,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.058333516120910645,
"rewards/margins": -0.009505288675427437,
"rewards/rejected": -0.04882822558283806,
"step": 1380
},
{
"epoch": 0.8232158720758069,
"grad_norm": 11.8125,
"learning_rate": 4.5997643457719646e-08,
"log_odds_chosen": -0.2714422643184662,
"log_odds_ratio": -0.8982048034667969,
"logits/chosen": -2.2855401039123535,
"logits/rejected": -2.2796995639801025,
"logps/chosen": -0.5933629274368286,
"logps/rejected": -0.45899391174316406,
"loss": 0.9938,
"nll_loss": 0.9157652854919434,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.05933629721403122,
"rewards/margins": -0.01343690324574709,
"rewards/rejected": -0.045899391174316406,
"step": 1390
},
{
"epoch": 0.829138288421676,
"grad_norm": 11.125,
"learning_rate": 4.305273982894772e-08,
"log_odds_chosen": -0.24461349844932556,
"log_odds_ratio": -0.8896273374557495,
"logits/chosen": -2.3211405277252197,
"logits/rejected": -2.279554843902588,
"logps/chosen": -0.6189180612564087,
"logps/rejected": -0.4841720461845398,
"loss": 1.041,
"nll_loss": 0.9456349611282349,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.06189180538058281,
"rewards/margins": -0.013474604114890099,
"rewards/rejected": -0.04841720312833786,
"step": 1400
},
{
"epoch": 0.8350607047675451,
"grad_norm": 11.8125,
"learning_rate": 4.0196353549026786e-08,
"log_odds_chosen": -0.1991504579782486,
"log_odds_ratio": -0.8548718690872192,
"logits/chosen": -2.288534641265869,
"logits/rejected": -2.2532122135162354,
"logps/chosen": -0.5849851965904236,
"logps/rejected": -0.48299694061279297,
"loss": 1.0681,
"nll_loss": 1.0149555206298828,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.05849852040410042,
"rewards/margins": -0.010198831558227539,
"rewards/rejected": -0.04829969257116318,
"step": 1410
},
{
"epoch": 0.8409831211134142,
"grad_norm": 9.625,
"learning_rate": 3.742970637726181e-08,
"log_odds_chosen": -0.09389691054821014,
"log_odds_ratio": -0.8085994720458984,
"logits/chosen": -2.3118512630462646,
"logits/rejected": -2.2662172317504883,
"logps/chosen": -0.5374116897583008,
"logps/rejected": -0.4831947386264801,
"loss": 1.0166,
"nll_loss": 0.9142959713935852,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.05374116823077202,
"rewards/margins": -0.005421696230769157,
"rewards/rejected": -0.04831947013735771,
"step": 1420
},
{
"epoch": 0.8469055374592834,
"grad_norm": 13.0,
"learning_rate": 3.4753981688937284e-08,
"log_odds_chosen": -0.23033122718334198,
"log_odds_ratio": -0.8797691464424133,
"logits/chosen": -2.2840065956115723,
"logits/rejected": -2.2577414512634277,
"logps/chosen": -0.5791336297988892,
"logps/rejected": -0.46595969796180725,
"loss": 1.0562,
"nll_loss": 0.9663812518119812,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.05791335552930832,
"rewards/margins": -0.011317392811179161,
"rewards/rejected": -0.046595968306064606,
"step": 1430
},
{
"epoch": 0.8528279538051525,
"grad_norm": 16.5,
"learning_rate": 3.217032396915265e-08,
"log_odds_chosen": -0.28934675455093384,
"log_odds_ratio": -0.925268292427063,
"logits/chosen": -2.294243335723877,
"logits/rejected": -2.26255464553833,
"logps/chosen": -0.6588538885116577,
"logps/rejected": -0.4810880720615387,
"loss": 1.0625,
"nll_loss": 0.9974772334098816,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -0.06588538736104965,
"rewards/margins": -0.01777658611536026,
"rewards/rejected": -0.04810880497097969,
"step": 1440
},
{
"epoch": 0.8587503701510216,
"grad_norm": 13.5625,
"learning_rate": 2.9679838323293404e-08,
"log_odds_chosen": -0.30326423048973083,
"log_odds_ratio": -0.9402921795845032,
"logits/chosen": -2.285403251647949,
"logits/rejected": -2.2570960521698,
"logps/chosen": -0.6499019265174866,
"logps/rejected": -0.485442578792572,
"loss": 1.0074,
"nll_loss": 0.9335571527481079,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.06499020010232925,
"rewards/margins": -0.016445934772491455,
"rewards/rejected": -0.0485442578792572,
"step": 1450
},
{
"epoch": 0.8646727864968907,
"grad_norm": 11.875,
"learning_rate": 2.728359000434488e-08,
"log_odds_chosen": -0.25829392671585083,
"log_odds_ratio": -0.8930153846740723,
"logits/chosen": -2.316516399383545,
"logits/rejected": -2.283731460571289,
"logps/chosen": -0.5545108318328857,
"logps/rejected": -0.4498627185821533,
"loss": 1.048,
"nll_loss": 0.9053192138671875,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.055451083928346634,
"rewards/margins": -0.010464807972311974,
"rewards/rejected": -0.04498627781867981,
"step": 1460
},
{
"epoch": 0.8705952028427598,
"grad_norm": 10.75,
"learning_rate": 2.498260395725302e-08,
"log_odds_chosen": -0.25851163268089294,
"log_odds_ratio": -0.8944876790046692,
"logits/chosen": -2.281040906906128,
"logits/rejected": -2.26870059967041,
"logps/chosen": -0.6054626703262329,
"logps/rejected": -0.48731446266174316,
"loss": 1.0483,
"nll_loss": 0.9450349807739258,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.060546260327100754,
"rewards/margins": -0.011814813129603863,
"rewards/rejected": -0.048731446266174316,
"step": 1470
},
{
"epoch": 0.8765176191886289,
"grad_norm": 10.3125,
"learning_rate": 2.2777864380525426e-08,
"log_odds_chosen": -0.20190663635730743,
"log_odds_ratio": -0.8694218397140503,
"logits/chosen": -2.288378953933716,
"logits/rejected": -2.2683846950531006,
"logps/chosen": -0.5955201387405396,
"logps/rejected": -0.4755355417728424,
"loss": 1.0093,
"nll_loss": 0.8863022923469543,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.059552013874053955,
"rewards/margins": -0.01199845876544714,
"rewards/rejected": -0.04755355045199394,
"step": 1480
},
{
"epoch": 0.8824400355344981,
"grad_norm": 11.125,
"learning_rate": 2.0670314305261423e-08,
"log_odds_chosen": -0.21881277859210968,
"log_odds_ratio": -0.8681440353393555,
"logits/chosen": -2.3011648654937744,
"logits/rejected": -2.2739992141723633,
"logps/chosen": -0.5647403597831726,
"logps/rejected": -0.46096763014793396,
"loss": 0.9903,
"nll_loss": 0.9155017733573914,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.05647404119372368,
"rewards/margins": -0.010377271100878716,
"rewards/rejected": -0.046096768230199814,
"step": 1490
},
{
"epoch": 0.8883624518803672,
"grad_norm": 15.4375,
"learning_rate": 1.866085519178995e-08,
"log_odds_chosen": -0.21367135643959045,
"log_odds_ratio": -0.8818863034248352,
"logits/chosen": -2.283823013305664,
"logits/rejected": -2.262935161590576,
"logps/chosen": -0.6190184354782104,
"logps/rejected": -0.518616259098053,
"loss": 1.0823,
"nll_loss": 1.0067201852798462,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.06190184876322746,
"rewards/margins": -0.010040223598480225,
"rewards/rejected": -0.05186162516474724,
"step": 1500
},
{
"epoch": 0.8942848682262363,
"grad_norm": 10.0625,
"learning_rate": 1.675034654408894e-08,
"log_odds_chosen": -0.2969823479652405,
"log_odds_ratio": -0.9049533605575562,
"logits/chosen": -2.3211445808410645,
"logits/rejected": -2.293593168258667,
"logps/chosen": -0.5710967779159546,
"logps/rejected": -0.44247856736183167,
"loss": 1.0186,
"nll_loss": 0.9544011354446411,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.057109683752059937,
"rewards/margins": -0.012861823663115501,
"rewards/rejected": -0.044247858226299286,
"step": 1510
},
{
"epoch": 0.9002072845721054,
"grad_norm": 11.8125,
"learning_rate": 1.4939605542150595e-08,
"log_odds_chosen": -0.20066659152507782,
"log_odds_ratio": -0.880477249622345,
"logits/chosen": -2.306097984313965,
"logits/rejected": -2.2691056728363037,
"logps/chosen": -0.630598247051239,
"logps/rejected": -0.5075589418411255,
"loss": 1.0954,
"nll_loss": 0.9971143007278442,
"rewards/accuracies": 0.48124998807907104,
"rewards/chosen": -0.06305982172489166,
"rewards/margins": -0.01230393536388874,
"rewards/rejected": -0.05075589567422867,
"step": 1520
},
{
"epoch": 0.9061297009179745,
"grad_norm": 11.0,
"learning_rate": 1.3229406692449791e-08,
"log_odds_chosen": -0.14233054220676422,
"log_odds_ratio": -0.8427847623825073,
"logits/chosen": -2.2426674365997314,
"logits/rejected": -2.2174274921417236,
"logps/chosen": -0.5756295919418335,
"logps/rejected": -0.4939804971218109,
"loss": 1.0628,
"nll_loss": 0.9542373418807983,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.05756296589970589,
"rewards/margins": -0.008164914324879646,
"rewards/rejected": -0.04939804598689079,
"step": 1530
},
{
"epoch": 0.9120521172638436,
"grad_norm": 10.6875,
"learning_rate": 1.162048149666503e-08,
"log_odds_chosen": -0.209940105676651,
"log_odds_ratio": -0.8944632411003113,
"logits/chosen": -2.2973880767822266,
"logits/rejected": -2.255645990371704,
"logps/chosen": -0.6111503839492798,
"logps/rejected": -0.49909108877182007,
"loss": 1.0425,
"nll_loss": 0.9487366676330566,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": -0.061115045100450516,
"rewards/margins": -0.011205929331481457,
"rewards/rejected": -0.049909114837646484,
"step": 1540
},
{
"epoch": 0.9179745336097128,
"grad_norm": 9.8125,
"learning_rate": 1.0113518138794047e-08,
"log_odds_chosen": -0.25878992676734924,
"log_odds_ratio": -0.899122416973114,
"logits/chosen": -2.2492969036102295,
"logits/rejected": -2.2273764610290527,
"logps/chosen": -0.5986303091049194,
"logps/rejected": -0.47187572717666626,
"loss": 1.0612,
"nll_loss": 0.9412651062011719,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.059863023459911346,
"rewards/margins": -0.012675456702709198,
"rewards/rejected": -0.04718757048249245,
"step": 1550
},
{
"epoch": 0.9238969499555819,
"grad_norm": 10.375,
"learning_rate": 8.709161190797565e-09,
"log_odds_chosen": -0.14045746624469757,
"log_odds_ratio": -0.8357732892036438,
"logits/chosen": -2.3169333934783936,
"logits/rejected": -2.2879374027252197,
"logps/chosen": -0.5618830919265747,
"logps/rejected": -0.4810701012611389,
"loss": 1.031,
"nll_loss": 0.9191296696662903,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.05618830770254135,
"rewards/margins": -0.008081300184130669,
"rewards/rejected": -0.04810700938105583,
"step": 1560
},
{
"epoch": 0.929819366301451,
"grad_norm": 15.3125,
"learning_rate": 7.408011336897141e-09,
"log_odds_chosen": -0.323073148727417,
"log_odds_ratio": -0.9851021766662598,
"logits/chosen": -2.3374483585357666,
"logits/rejected": -2.3257203102111816,
"logps/chosen": -0.7131141424179077,
"logps/rejected": -0.4996616244316101,
"loss": 1.0776,
"nll_loss": 1.01613450050354,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.07131141424179077,
"rewards/margins": -0.02134525403380394,
"rewards/rejected": -0.04996616020798683,
"step": 1570
},
{
"epoch": 0.9357417826473201,
"grad_norm": 9.5625,
"learning_rate": 6.210625116645135e-09,
"log_odds_chosen": -0.32444125413894653,
"log_odds_ratio": -0.9329547882080078,
"logits/chosen": -2.342031955718994,
"logits/rejected": -2.3026318550109863,
"logps/chosen": -0.6195459365844727,
"logps/rejected": -0.45777615904808044,
"loss": 1.0033,
"nll_loss": 0.8760407567024231,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.061954595148563385,
"rewards/margins": -0.0161769799888134,
"rewards/rejected": -0.04577761888504028,
"step": 1580
},
{
"epoch": 0.9416641989931892,
"grad_norm": 10.625,
"learning_rate": 5.117514686876378e-09,
"log_odds_chosen": -0.20949645340442657,
"log_odds_ratio": -0.8756229281425476,
"logits/chosen": -2.30104398727417,
"logits/rejected": -2.2671799659729004,
"logps/chosen": -0.5797516703605652,
"logps/rejected": -0.4763546586036682,
"loss": 1.0455,
"nll_loss": 0.9568120837211609,
"rewards/accuracies": 0.4312500059604645,
"rewards/chosen": -0.057975172996520996,
"rewards/margins": -0.010339704342186451,
"rewards/rejected": -0.04763546586036682,
"step": 1590
},
{
"epoch": 0.9475866153390583,
"grad_norm": 10.0,
"learning_rate": 4.1291476026441565e-09,
"log_odds_chosen": -0.14046767354011536,
"log_odds_ratio": -0.8268812894821167,
"logits/chosen": -2.2659006118774414,
"logits/rejected": -2.245576858520508,
"logps/chosen": -0.5697029829025269,
"logps/rejected": -0.4846652150154114,
"loss": 0.9915,
"nll_loss": 0.8766274452209473,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.056970298290252686,
"rewards/margins": -0.00850378442555666,
"rewards/rejected": -0.0484665185213089,
"step": 1600
},
{
"epoch": 0.9535090316849274,
"grad_norm": 9.8125,
"learning_rate": 3.2459466172331253e-09,
"log_odds_chosen": -0.25180304050445557,
"log_odds_ratio": -0.9306501150131226,
"logits/chosen": -2.274780035018921,
"logits/rejected": -2.255272626876831,
"logps/chosen": -0.6529628038406372,
"logps/rejected": -0.48409169912338257,
"loss": 1.0873,
"nll_loss": 0.9862693548202515,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.06529629230499268,
"rewards/margins": -0.016887117177248,
"rewards/rejected": -0.048409171402454376,
"step": 1610
},
{
"epoch": 0.9594314480307966,
"grad_norm": 13.125,
"learning_rate": 2.4682895013354854e-09,
"log_odds_chosen": -0.230398491024971,
"log_odds_ratio": -0.8930587768554688,
"logits/chosen": -2.2783544063568115,
"logits/rejected": -2.2587246894836426,
"logps/chosen": -0.6128379702568054,
"logps/rejected": -0.46944743394851685,
"loss": 1.0177,
"nll_loss": 0.9610903859138489,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.06128380447626114,
"rewards/margins": -0.014339059591293335,
"rewards/rejected": -0.0469447486102581,
"step": 1620
},
{
"epoch": 0.9653538643766657,
"grad_norm": 25.0,
"learning_rate": 1.7965088814675677e-09,
"log_odds_chosen": -0.3568347692489624,
"log_odds_ratio": -0.9671844244003296,
"logits/chosen": -2.2762491703033447,
"logits/rejected": -2.2589855194091797,
"logps/chosen": -0.6517866253852844,
"logps/rejected": -0.4649588167667389,
"loss": 1.037,
"nll_loss": 0.9754410982131958,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.06517866253852844,
"rewards/margins": -0.018682777881622314,
"rewards/rejected": -0.04649588465690613,
"step": 1630
},
{
"epoch": 0.9712762807225348,
"grad_norm": 10.8125,
"learning_rate": 1.2308920976958348e-09,
"log_odds_chosen": -0.1785418540239334,
"log_odds_ratio": -0.8583124876022339,
"logits/chosen": -2.2591869831085205,
"logits/rejected": -2.2376914024353027,
"logps/chosen": -0.5955510139465332,
"logps/rejected": -0.4926881790161133,
"loss": 1.0026,
"nll_loss": 0.9147430658340454,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.05955510213971138,
"rewards/margins": -0.010286283679306507,
"rewards/rejected": -0.04926881566643715,
"step": 1640
},
{
"epoch": 0.9771986970684039,
"grad_norm": 11.9375,
"learning_rate": 7.716810807330276e-10,
"log_odds_chosen": -0.30667099356651306,
"log_odds_ratio": -0.9143903851509094,
"logits/chosen": -2.2759385108947754,
"logits/rejected": -2.2378878593444824,
"logps/chosen": -0.6037041544914246,
"logps/rejected": -0.45009493827819824,
"loss": 1.0344,
"nll_loss": 0.93921959400177,
"rewards/accuracies": 0.35624998807907104,
"rewards/chosen": -0.06037042289972305,
"rewards/margins": -0.015360923483967781,
"rewards/rejected": -0.045009493827819824,
"step": 1650
},
{
"epoch": 0.983121113414273,
"grad_norm": 17.25,
"learning_rate": 4.190722484575804e-10,
"log_odds_chosen": -0.24070534110069275,
"log_odds_ratio": -0.9141713976860046,
"logits/chosen": -2.285658597946167,
"logits/rejected": -2.2573189735412598,
"logps/chosen": -0.6545957326889038,
"logps/rejected": -0.4952670931816101,
"loss": 1.0545,
"nll_loss": 0.9895190000534058,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": -0.06545957177877426,
"rewards/margins": -0.01593286357820034,
"rewards/rejected": -0.04952671006321907,
"step": 1660
},
{
"epoch": 0.9890435297601421,
"grad_norm": 9.1875,
"learning_rate": 1.732164218998522e-10,
"log_odds_chosen": -0.2650103271007538,
"log_odds_ratio": -0.8960719108581543,
"logits/chosen": -2.2581698894500732,
"logits/rejected": -2.2162814140319824,
"logps/chosen": -0.6056646704673767,
"logps/rejected": -0.4769059717655182,
"loss": 1.0107,
"nll_loss": 0.9156764149665833,
"rewards/accuracies": 0.39375001192092896,
"rewards/chosen": -0.06056647375226021,
"rewards/margins": -0.0128758754581213,
"rewards/rejected": -0.04769059270620346,
"step": 1670
},
{
"epoch": 0.9949659461060113,
"grad_norm": 12.0,
"learning_rate": 3.4218760731730136e-11,
"log_odds_chosen": -0.21042411029338837,
"log_odds_ratio": -0.8711256980895996,
"logits/chosen": -2.333160638809204,
"logits/rejected": -2.2931103706359863,
"logps/chosen": -0.5873175859451294,
"logps/rejected": -0.47885292768478394,
"loss": 1.066,
"nll_loss": 0.9840106964111328,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.05873175337910652,
"rewards/margins": -0.010846461169421673,
"rewards/rejected": -0.047885291278362274,
"step": 1680
},
{
"epoch": 0.9997038791827065,
"step": 1688,
"total_flos": 0.0,
"train_loss": 1.076995034918401,
"train_runtime": 25716.0251,
"train_samples_per_second": 2.101,
"train_steps_per_second": 0.066
}
],
"logging_steps": 10,
"max_steps": 1688,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}