{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997038791827065, "eval_steps": 500, "global_step": 1688, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005922416345869114, "grad_norm": 26.875, "learning_rate": 2.9585798816568044e-08, "log_odds_chosen": -0.4994420111179352, "log_odds_ratio": -1.0620524883270264, "logits/chosen": -2.227687358856201, "logits/rejected": -2.213762044906616, "logps/chosen": -0.7160366773605347, "logps/rejected": -0.47193747758865356, "loss": 1.3693, "nll_loss": 1.2856990098953247, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.0716036707162857, "rewards/margins": -0.02440992370247841, "rewards/rejected": -0.047193750739097595, "step": 10 }, { "epoch": 0.011844832691738229, "grad_norm": 26.5, "learning_rate": 5.917159763313609e-08, "log_odds_chosen": -0.6077697277069092, "log_odds_ratio": -1.154677152633667, "logits/chosen": -2.1866495609283447, "logits/rejected": -2.1631338596343994, "logps/chosen": -0.8245598077774048, "logps/rejected": -0.4715619683265686, "loss": 1.3378, "nll_loss": 1.228305459022522, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.08245597779750824, "rewards/margins": -0.03529978543519974, "rewards/rejected": -0.0471561960875988, "step": 20 }, { "epoch": 0.017767249037607343, "grad_norm": 29.125, "learning_rate": 8.875739644970414e-08, "log_odds_chosen": -0.5950562357902527, "log_odds_ratio": -1.171638011932373, "logits/chosen": -2.152902126312256, "logits/rejected": -2.1443581581115723, "logps/chosen": -0.854525089263916, "logps/rejected": -0.49298763275146484, "loss": 1.3488, "nll_loss": 1.3134263753890991, "rewards/accuracies": 0.3062500059604645, "rewards/chosen": -0.08545249700546265, "rewards/margins": -0.03615374490618706, "rewards/rejected": -0.049298763275146484, "step": 30 }, { "epoch": 0.023689665383476458, "grad_norm": 31.25, "learning_rate": 1.1834319526627217e-07, "log_odds_chosen": -0.5344940423965454, "log_odds_ratio": -1.0923480987548828, "logits/chosen": -2.219038486480713, "logits/rejected": -2.2063724994659424, "logps/chosen": -0.7574710845947266, "logps/rejected": -0.4638025760650635, "loss": 1.3817, "nll_loss": 1.2359822988510132, "rewards/accuracies": 0.33125001192092896, "rewards/chosen": -0.07574710994958878, "rewards/margins": -0.02936685085296631, "rewards/rejected": -0.04638025909662247, "step": 40 }, { "epoch": 0.029612081729345572, "grad_norm": 28.0, "learning_rate": 1.4792899408284022e-07, "log_odds_chosen": -0.4542032778263092, "log_odds_ratio": -1.0256363153457642, "logits/chosen": -2.1617987155914307, "logits/rejected": -2.146223545074463, "logps/chosen": -0.7006078958511353, "logps/rejected": -0.47175368666648865, "loss": 1.3127, "nll_loss": 1.2409818172454834, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.07006079703569412, "rewards/margins": -0.022885426878929138, "rewards/rejected": -0.047175366431474686, "step": 50 }, { "epoch": 0.035534498075214686, "grad_norm": 23.125, "learning_rate": 1.7751479289940827e-07, "log_odds_chosen": -0.6588231921195984, "log_odds_ratio": -1.230991005897522, "logits/chosen": -2.2183756828308105, "logits/rejected": -2.187129259109497, "logps/chosen": -0.8897625207901001, "logps/rejected": -0.4612082540988922, "loss": 1.3568, "nll_loss": 1.2310936450958252, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -0.08897626399993896, "rewards/margins": -0.04285542666912079, "rewards/rejected": -0.04612082242965698, "step": 60 }, { "epoch": 0.041456914421083804, "grad_norm": 30.75, "learning_rate": 2.0710059171597633e-07, "log_odds_chosen": -0.5367478132247925, "log_odds_ratio": -1.1100060939788818, "logits/chosen": -2.232348918914795, "logits/rejected": -2.1998302936553955, "logps/chosen": -0.7972711324691772, "logps/rejected": -0.4634431302547455, "loss": 1.3614, "nll_loss": 1.2567493915557861, "rewards/accuracies": 0.375, "rewards/chosen": -0.07972709834575653, "rewards/margins": -0.03338279575109482, "rewards/rejected": -0.04634431377053261, "step": 70 }, { "epoch": 0.047379330766952915, "grad_norm": 28.875, "learning_rate": 2.3668639053254435e-07, "log_odds_chosen": -0.5755403637886047, "log_odds_ratio": -1.156178593635559, "logits/chosen": -2.197105884552002, "logits/rejected": -2.186234474182129, "logps/chosen": -0.7956789135932922, "logps/rejected": -0.4599471688270569, "loss": 1.3327, "nll_loss": 1.219543695449829, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.07956788688898087, "rewards/margins": -0.033573172986507416, "rewards/rejected": -0.04599471390247345, "step": 80 }, { "epoch": 0.05330174711282203, "grad_norm": 25.25, "learning_rate": 2.662721893491124e-07, "log_odds_chosen": -0.5014861226081848, "log_odds_ratio": -1.0729024410247803, "logits/chosen": -2.1807546615600586, "logits/rejected": -2.1571853160858154, "logps/chosen": -0.7198914289474487, "logps/rejected": -0.46573418378829956, "loss": 1.3113, "nll_loss": 1.224487066268921, "rewards/accuracies": 0.3125, "rewards/chosen": -0.07198914140462875, "rewards/margins": -0.025415724143385887, "rewards/rejected": -0.046573419123888016, "step": 90 }, { "epoch": 0.059224163458691144, "grad_norm": 25.125, "learning_rate": 2.9585798816568045e-07, "log_odds_chosen": -0.4174951910972595, "log_odds_ratio": -0.9966305494308472, "logits/chosen": -2.2450003623962402, "logits/rejected": -2.199430465698242, "logps/chosen": -0.6903594732284546, "logps/rejected": -0.4920008182525635, "loss": 1.2864, "nll_loss": 1.2207610607147217, "rewards/accuracies": 0.33125001192092896, "rewards/chosen": -0.06903595477342606, "rewards/margins": -0.01983586512506008, "rewards/rejected": -0.04920008033514023, "step": 100 }, { "epoch": 0.06514657980456026, "grad_norm": 19.375, "learning_rate": 3.254437869822485e-07, "log_odds_chosen": -0.4817837178707123, "log_odds_ratio": -1.0484408140182495, "logits/chosen": -2.195328950881958, "logits/rejected": -2.172029972076416, "logps/chosen": -0.7407166361808777, "logps/rejected": -0.4809334874153137, "loss": 1.2292, "nll_loss": 1.1185578107833862, "rewards/accuracies": 0.3125, "rewards/chosen": -0.07407166808843613, "rewards/margins": -0.025978317484259605, "rewards/rejected": -0.04809335619211197, "step": 110 }, { "epoch": 0.07106899615042937, "grad_norm": 26.25, "learning_rate": 3.5502958579881655e-07, "log_odds_chosen": -0.5086492300033569, "log_odds_ratio": -1.073943018913269, "logits/chosen": -2.2213022708892822, "logits/rejected": -2.210648536682129, "logps/chosen": -0.7544690370559692, "logps/rejected": -0.47401171922683716, "loss": 1.2703, "nll_loss": 1.1549344062805176, "rewards/accuracies": 0.3375000059604645, "rewards/chosen": -0.07544689625501633, "rewards/margins": -0.02804572507739067, "rewards/rejected": -0.04740116745233536, "step": 120 }, { "epoch": 0.07699141249629848, "grad_norm": 121.5, "learning_rate": 3.8461538461538463e-07, "log_odds_chosen": -0.6346783638000488, "log_odds_ratio": -1.208389401435852, "logits/chosen": -2.205939292907715, "logits/rejected": -2.1982388496398926, "logps/chosen": -0.8659466505050659, "logps/rejected": -0.4501543939113617, "loss": 1.3049, "nll_loss": 1.1621254682540894, "rewards/accuracies": 0.34375, "rewards/chosen": -0.08659467846155167, "rewards/margins": -0.0415792390704155, "rewards/rejected": -0.04501544311642647, "step": 130 }, { "epoch": 0.08291382884216761, "grad_norm": 18.625, "learning_rate": 4.1420118343195265e-07, "log_odds_chosen": -0.4532869756221771, "log_odds_ratio": -1.0120022296905518, "logits/chosen": -2.2359938621520996, "logits/rejected": -2.2115871906280518, "logps/chosen": -0.6659095287322998, "logps/rejected": -0.4467584490776062, "loss": 1.2027, "nll_loss": 1.0797432661056519, "rewards/accuracies": 0.33125001192092896, "rewards/chosen": -0.06659095734357834, "rewards/margins": -0.02191510982811451, "rewards/rejected": -0.04467584565281868, "step": 140 }, { "epoch": 0.08883624518803672, "grad_norm": 29.75, "learning_rate": 4.437869822485207e-07, "log_odds_chosen": -0.46737051010131836, "log_odds_ratio": -1.0146253108978271, "logits/chosen": -2.16318941116333, "logits/rejected": -2.1556496620178223, "logps/chosen": -0.7067540287971497, "logps/rejected": -0.47525158524513245, "loss": 1.217, "nll_loss": 1.1824976205825806, "rewards/accuracies": 0.29374998807907104, "rewards/chosen": -0.07067539542913437, "rewards/margins": -0.023150241002440453, "rewards/rejected": -0.047525160014629364, "step": 150 }, { "epoch": 0.09475866153390583, "grad_norm": 20.75, "learning_rate": 4.733727810650887e-07, "log_odds_chosen": -0.31778836250305176, "log_odds_ratio": -0.9325827360153198, "logits/chosen": -2.2458879947662354, "logits/rejected": -2.2277491092681885, "logps/chosen": -0.6050869822502136, "logps/rejected": -0.4580734372138977, "loss": 1.2157, "nll_loss": 1.0979220867156982, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06050870940089226, "rewards/margins": -0.014701364561915398, "rewards/rejected": -0.04580734297633171, "step": 160 }, { "epoch": 0.10068107787977496, "grad_norm": 14.3125, "learning_rate": 4.999994653198566e-07, "log_odds_chosen": -0.44623684883117676, "log_odds_ratio": -1.0507714748382568, "logits/chosen": -2.273740530014038, "logits/rejected": -2.248004198074341, "logps/chosen": -0.744641900062561, "logps/rejected": -0.4939740300178528, "loss": 1.2442, "nll_loss": 1.0892422199249268, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.07446418702602386, "rewards/margins": -0.0250667966902256, "rewards/rejected": -0.04939739406108856, "step": 170 }, { "epoch": 0.10660349422564407, "grad_norm": 12.5625, "learning_rate": 4.999353064699471e-07, "log_odds_chosen": -0.5144436955451965, "log_odds_ratio": -1.1169707775115967, "logits/chosen": -2.2361178398132324, "logits/rejected": -2.2026758193969727, "logps/chosen": -0.8099610209465027, "logps/rejected": -0.49819788336753845, "loss": 1.1022, "nll_loss": 1.0261476039886475, "rewards/accuracies": 0.33125001192092896, "rewards/chosen": -0.08099609613418579, "rewards/margins": -0.031176313757896423, "rewards/rejected": -0.049819789826869965, "step": 180 }, { "epoch": 0.11252591057151318, "grad_norm": 12.875, "learning_rate": 4.99764243036258e-07, "log_odds_chosen": -0.4125841557979584, "log_odds_ratio": -0.991108775138855, "logits/chosen": -2.268022298812866, "logits/rejected": -2.240299701690674, "logps/chosen": -0.6463659405708313, "logps/rejected": -0.4517286717891693, "loss": 1.1318, "nll_loss": 1.0371661186218262, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": -0.06463660299777985, "rewards/margins": -0.01946372725069523, "rewards/rejected": -0.04517286270856857, "step": 190 }, { "epoch": 0.11844832691738229, "grad_norm": 12.0, "learning_rate": 4.994863481875841e-07, "log_odds_chosen": -0.38528627157211304, "log_odds_ratio": -0.9595619440078735, "logits/chosen": -2.217349052429199, "logits/rejected": -2.1852166652679443, "logps/chosen": -0.6334083676338196, "logps/rejected": -0.4435149133205414, "loss": 1.1246, "nll_loss": 0.9835959672927856, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.06334083527326584, "rewards/margins": -0.01898934319615364, "rewards/rejected": -0.044351495802402496, "step": 200 }, { "epoch": 0.12437074326325141, "grad_norm": 11.5, "learning_rate": 4.991017407876165e-07, "log_odds_chosen": -0.429326593875885, "log_odds_ratio": -1.002436876296997, "logits/chosen": -2.224944591522217, "logits/rejected": -2.1807491779327393, "logps/chosen": -0.7087312936782837, "logps/rejected": -0.49742716550827026, "loss": 1.0953, "nll_loss": 1.0195242166519165, "rewards/accuracies": 0.3375000059604645, "rewards/chosen": -0.07087312638759613, "rewards/margins": -0.021130409091711044, "rewards/rejected": -0.049742721021175385, "step": 210 }, { "epoch": 0.13029315960912052, "grad_norm": 10.6875, "learning_rate": 4.98610585344102e-07, "log_odds_chosen": -0.2424849271774292, "log_odds_ratio": -0.9048135876655579, "logits/chosen": -2.2507550716400146, "logits/rejected": -2.217257499694824, "logps/chosen": -0.6068475246429443, "logps/rejected": -0.4904823899269104, "loss": 1.1278, "nll_loss": 1.0603684186935425, "rewards/accuracies": 0.34375, "rewards/chosen": -0.06068475916981697, "rewards/margins": -0.011636516079306602, "rewards/rejected": -0.04904823377728462, "step": 220 }, { "epoch": 0.13621557595498965, "grad_norm": 9.875, "learning_rate": 4.980130919384768e-07, "log_odds_chosen": -0.5562174916267395, "log_odds_ratio": -1.0973405838012695, "logits/chosen": -2.246185779571533, "logits/rejected": -2.2379026412963867, "logps/chosen": -0.7477759122848511, "logps/rejected": -0.4505345821380615, "loss": 1.1333, "nll_loss": 1.0181388854980469, "rewards/accuracies": 0.28125, "rewards/chosen": -0.07477758824825287, "rewards/margins": -0.029724130406975746, "rewards/rejected": -0.04505345970392227, "step": 230 }, { "epoch": 0.14213799230085875, "grad_norm": 11.5625, "learning_rate": 4.973095161360105e-07, "log_odds_chosen": -0.425253689289093, "log_odds_ratio": -1.0029823780059814, "logits/chosen": -2.242088794708252, "logits/rejected": -2.2122817039489746, "logps/chosen": -0.68077552318573, "logps/rejected": -0.48119717836380005, "loss": 1.1443, "nll_loss": 1.063909649848938, "rewards/accuracies": 0.29374998807907104, "rewards/chosen": -0.06807754933834076, "rewards/margins": -0.01995784044265747, "rewards/rejected": -0.048119716346263885, "step": 240 }, { "epoch": 0.14806040864672787, "grad_norm": 10.3125, "learning_rate": 4.965001588764913e-07, "log_odds_chosen": -0.4351120889186859, "log_odds_ratio": -1.013584852218628, "logits/chosen": -2.2702879905700684, "logits/rejected": -2.2400031089782715, "logps/chosen": -0.6880632638931274, "logps/rejected": -0.4528827667236328, "loss": 1.1299, "nll_loss": 1.0191699266433716, "rewards/accuracies": 0.34375, "rewards/chosen": -0.06880633533000946, "rewards/margins": -0.023518051952123642, "rewards/rejected": -0.04528827592730522, "step": 250 }, { "epoch": 0.15398282499259697, "grad_norm": 11.1875, "learning_rate": 4.955853663455072e-07, "log_odds_chosen": -0.30220693349838257, "log_odds_ratio": -0.9368545413017273, "logits/chosen": -2.257448673248291, "logits/rejected": -2.227647542953491, "logps/chosen": -0.6458665728569031, "logps/rejected": -0.4764745235443115, "loss": 1.0645, "nll_loss": 0.9644678235054016, "rewards/accuracies": 0.375, "rewards/chosen": -0.06458665430545807, "rewards/margins": -0.016939211636781693, "rewards/rejected": -0.04764745384454727, "step": 260 }, { "epoch": 0.1599052413384661, "grad_norm": 10.875, "learning_rate": 4.945655298263713e-07, "log_odds_chosen": -0.41390785574913025, "log_odds_ratio": -0.9837135076522827, "logits/chosen": -2.20629620552063, "logits/rejected": -2.1831986904144287, "logps/chosen": -0.6674059629440308, "logps/rejected": -0.46569353342056274, "loss": 1.1528, "nll_loss": 1.0888841152191162, "rewards/accuracies": 0.3062500059604645, "rewards/chosen": -0.06674060225486755, "rewards/margins": -0.02017124928534031, "rewards/rejected": -0.046569354832172394, "step": 270 }, { "epoch": 0.16582765768433522, "grad_norm": 9.5625, "learning_rate": 4.934410855327585e-07, "log_odds_chosen": -0.3461267352104187, "log_odds_ratio": -0.9425566792488098, "logits/chosen": -2.2884914875030518, "logits/rejected": -2.27152943611145, "logps/chosen": -0.6492639780044556, "logps/rejected": -0.46900925040245056, "loss": 1.0682, "nll_loss": 1.0291364192962646, "rewards/accuracies": 0.34375, "rewards/chosen": -0.0649264007806778, "rewards/margins": -0.018025478348135948, "rewards/rejected": -0.0469009205698967, "step": 280 }, { "epoch": 0.1717500740302043, "grad_norm": 11.0, "learning_rate": 4.922125144221252e-07, "log_odds_chosen": -0.38331133127212524, "log_odds_ratio": -0.9734469652175903, "logits/chosen": -2.2513084411621094, "logits/rejected": -2.199239492416382, "logps/chosen": -0.6518736481666565, "logps/rejected": -0.4689255356788635, "loss": 1.1269, "nll_loss": 1.0506547689437866, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": -0.06518735736608505, "rewards/margins": -0.018294811248779297, "rewards/rejected": -0.04689255356788635, "step": 290 }, { "epoch": 0.17767249037607344, "grad_norm": 10.625, "learning_rate": 4.90880341989989e-07, "log_odds_chosen": -0.295235276222229, "log_odds_ratio": -0.9132793545722961, "logits/chosen": -2.255086660385132, "logits/rejected": -2.2318952083587646, "logps/chosen": -0.6402678489685059, "logps/rejected": -0.48136910796165466, "loss": 1.0909, "nll_loss": 1.0022283792495728, "rewards/accuracies": 0.34375, "rewards/chosen": -0.06402678042650223, "rewards/margins": -0.015889868140220642, "rewards/rejected": -0.048136912286281586, "step": 300 }, { "epoch": 0.18359490672194256, "grad_norm": 9.875, "learning_rate": 4.894451380451589e-07, "log_odds_chosen": -0.4930775761604309, "log_odds_ratio": -1.0459508895874023, "logits/chosen": -2.2340633869171143, "logits/rejected": -2.2229130268096924, "logps/chosen": -0.7189785242080688, "logps/rejected": -0.46092820167541504, "loss": 1.116, "nll_loss": 1.0077855587005615, "rewards/accuracies": 0.3187499940395355, "rewards/chosen": -0.07189784944057465, "rewards/margins": -0.02580503560602665, "rewards/rejected": -0.046092819422483444, "step": 310 }, { "epoch": 0.18951732306781166, "grad_norm": 11.5625, "learning_rate": 4.879075164660124e-07, "log_odds_chosen": -0.29097312688827515, "log_odds_ratio": -0.9061079025268555, "logits/chosen": -2.238163471221924, "logits/rejected": -2.2072105407714844, "logps/chosen": -0.6175664067268372, "logps/rejected": -0.47239384055137634, "loss": 1.0495, "nll_loss": 0.9289931058883667, "rewards/accuracies": 0.33125001192092896, "rewards/chosen": -0.06175662949681282, "rewards/margins": -0.014517253264784813, "rewards/rejected": -0.047239381819963455, "step": 320 }, { "epoch": 0.19543973941368079, "grad_norm": 12.25, "learning_rate": 4.862681349379212e-07, "log_odds_chosen": -0.33382827043533325, "log_odds_ratio": -0.939583420753479, "logits/chosen": -2.244995594024658, "logits/rejected": -2.1931443214416504, "logps/chosen": -0.6333972811698914, "logps/rejected": -0.4775928556919098, "loss": 1.1124, "nll_loss": 1.0409491062164307, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": -0.0633397176861763, "rewards/margins": -0.01558043621480465, "rewards/rejected": -0.0477592833340168, "step": 330 }, { "epoch": 0.2013621557595499, "grad_norm": 9.6875, "learning_rate": 4.8452769467194e-07, "log_odds_chosen": -0.3502793610095978, "log_odds_ratio": -0.9458521604537964, "logits/chosen": -2.2533793449401855, "logits/rejected": -2.231985092163086, "logps/chosen": -0.6348416209220886, "logps/rejected": -0.46244215965270996, "loss": 1.0872, "nll_loss": 0.965823769569397, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": -0.06348416954278946, "rewards/margins": -0.017239956185221672, "rewards/rejected": -0.04624421149492264, "step": 340 }, { "epoch": 0.207284572105419, "grad_norm": 9.625, "learning_rate": 4.82686940104879e-07, "log_odds_chosen": -0.37014713883399963, "log_odds_ratio": -0.9843534231185913, "logits/chosen": -2.296128511428833, "logits/rejected": -2.267141103744507, "logps/chosen": -0.6616524457931519, "logps/rejected": -0.4461567997932434, "loss": 1.0383, "nll_loss": 0.9294153451919556, "rewards/accuracies": 0.34375, "rewards/chosen": -0.0661652460694313, "rewards/margins": -0.02154957316815853, "rewards/rejected": -0.04461567848920822, "step": 350 }, { "epoch": 0.21320698845128813, "grad_norm": 9.6875, "learning_rate": 4.807466585808856e-07, "log_odds_chosen": -0.2995724380016327, "log_odds_ratio": -0.9168221354484558, "logits/chosen": -2.274096727371216, "logits/rejected": -2.2658305168151855, "logps/chosen": -0.5940972566604614, "logps/rejected": -0.46015462279319763, "loss": 1.0942, "nll_loss": 0.9911165237426758, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.0594097301363945, "rewards/margins": -0.013394266366958618, "rewards/rejected": -0.04601546376943588, "step": 360 }, { "epoch": 0.21912940479715723, "grad_norm": 13.5625, "learning_rate": 4.787076800146752e-07, "log_odds_chosen": -0.27963608503341675, "log_odds_ratio": -0.9352908134460449, "logits/chosen": -2.2542636394500732, "logits/rejected": -2.2058660984039307, "logps/chosen": -0.6458699107170105, "logps/rejected": -0.468344509601593, "loss": 1.0125, "nll_loss": 0.9038776159286499, "rewards/accuracies": 0.4375, "rewards/chosen": -0.06458699703216553, "rewards/margins": -0.017752548679709435, "rewards/rejected": -0.046834446489810944, "step": 370 }, { "epoch": 0.22505182114302635, "grad_norm": 10.0, "learning_rate": 4.765708765365526e-07, "log_odds_chosen": -0.2566812038421631, "log_odds_ratio": -0.9025079011917114, "logits/chosen": -2.2573628425598145, "logits/rejected": -2.2479588985443115, "logps/chosen": -0.5893818140029907, "logps/rejected": -0.4597233235836029, "loss": 1.1093, "nll_loss": 0.9725319147109985, "rewards/accuracies": 0.40625, "rewards/chosen": -0.05893818661570549, "rewards/margins": -0.012965850532054901, "rewards/rejected": -0.04597233235836029, "step": 380 }, { "epoch": 0.23097423748889548, "grad_norm": 12.25, "learning_rate": 4.7433716211937587e-07, "log_odds_chosen": -0.4499928057193756, "log_odds_ratio": -1.0134861469268799, "logits/chosen": -2.3190252780914307, "logits/rejected": -2.297466516494751, "logps/chosen": -0.655422568321228, "logps/rejected": -0.43357038497924805, "loss": 1.0471, "nll_loss": 1.008756399154663, "rewards/accuracies": 0.34375, "rewards/chosen": -0.06554224342107773, "rewards/margins": -0.02218521013855934, "rewards/rejected": -0.043357037007808685, "step": 390 }, { "epoch": 0.23689665383476458, "grad_norm": 9.3125, "learning_rate": 4.720074921876245e-07, "log_odds_chosen": -0.3851686120033264, "log_odds_ratio": -0.9778718948364258, "logits/chosen": -2.325918674468994, "logits/rejected": -2.2813212871551514, "logps/chosen": -0.6249781847000122, "logps/rejected": -0.45036381483078003, "loss": 1.0507, "nll_loss": 0.9533747434616089, "rewards/accuracies": 0.3812499940395355, "rewards/chosen": -0.0624978169798851, "rewards/margins": -0.01746143028140068, "rewards/rejected": -0.04503639414906502, "step": 400 }, { "epoch": 0.2428190701806337, "grad_norm": 10.5625, "learning_rate": 4.6958286320873593e-07, "log_odds_chosen": -0.38822251558303833, "log_odds_ratio": -0.9542675018310547, "logits/chosen": -2.2724270820617676, "logits/rejected": -2.27009916305542, "logps/chosen": -0.6122742891311646, "logps/rejected": -0.4287818372249603, "loss": 1.0679, "nll_loss": 1.0051120519638062, "rewards/accuracies": 0.3125, "rewards/chosen": -0.06122744083404541, "rewards/margins": -0.018349256366491318, "rewards/rejected": -0.04287818819284439, "step": 410 }, { "epoch": 0.24874148652650283, "grad_norm": 10.25, "learning_rate": 4.6706431226688804e-07, "log_odds_chosen": -0.30081695318222046, "log_odds_ratio": -0.921572208404541, "logits/chosen": -2.2560360431671143, "logits/rejected": -2.2262086868286133, "logps/chosen": -0.6127408742904663, "logps/rejected": -0.4595797061920166, "loss": 1.0784, "nll_loss": 0.9788911938667297, "rewards/accuracies": 0.375, "rewards/chosen": -0.06127409264445305, "rewards/margins": -0.015316121280193329, "rewards/rejected": -0.04595796763896942, "step": 420 }, { "epoch": 0.25466390287237195, "grad_norm": 9.375, "learning_rate": 4.6445291661940777e-07, "log_odds_chosen": -0.2526037096977234, "log_odds_ratio": -0.8853398561477661, "logits/chosen": -2.274932861328125, "logits/rejected": -2.2737860679626465, "logps/chosen": -0.5831697583198547, "logps/rejected": -0.4629867672920227, "loss": 1.0351, "nll_loss": 0.9002013206481934, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05831696838140488, "rewards/margins": -0.012018295004963875, "rewards/rejected": -0.04629867523908615, "step": 430 }, { "epoch": 0.26058631921824105, "grad_norm": 13.0, "learning_rate": 4.6174979323599715e-07, "log_odds_chosen": -0.4437042772769928, "log_odds_ratio": -1.0250940322875977, "logits/chosen": -2.2592310905456543, "logits/rejected": -2.2114596366882324, "logps/chosen": -0.7022743821144104, "logps/rejected": -0.4603559374809265, "loss": 1.0967, "nll_loss": 1.0961658954620361, "rewards/accuracies": 0.3187499940395355, "rewards/chosen": -0.07022743672132492, "rewards/margins": -0.02419184148311615, "rewards/rejected": -0.04603559896349907, "step": 440 }, { "epoch": 0.26650873556411014, "grad_norm": 9.5, "learning_rate": 4.5895609832097277e-07, "log_odds_chosen": -0.3050179183483124, "log_odds_ratio": -0.9421980977058411, "logits/chosen": -2.2684884071350098, "logits/rejected": -2.2559661865234375, "logps/chosen": -0.6401418447494507, "logps/rejected": -0.46939319372177124, "loss": 1.0745, "nll_loss": 0.96502685546875, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -0.06401418894529343, "rewards/margins": -0.01707487180829048, "rewards/rejected": -0.04693932086229324, "step": 450 }, { "epoch": 0.2724311519099793, "grad_norm": 10.1875, "learning_rate": 4.560730268187236e-07, "log_odds_chosen": -0.26763516664505005, "log_odds_ratio": -0.8960734605789185, "logits/chosen": -2.266759157180786, "logits/rejected": -2.230344533920288, "logps/chosen": -0.57380610704422, "logps/rejected": -0.45090922713279724, "loss": 1.0544, "nll_loss": 0.9469722509384155, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.05738060921430588, "rewards/margins": -0.012289688922464848, "rewards/rejected": -0.0450909249484539, "step": 460 }, { "epoch": 0.2783535682558484, "grad_norm": 11.1875, "learning_rate": 4.531018119025989e-07, "log_odds_chosen": -0.19471798837184906, "log_odds_ratio": -0.8877772092819214, "logits/chosen": -2.325700283050537, "logits/rejected": -2.3014023303985596, "logps/chosen": -0.5948117971420288, "logps/rejected": -0.5260331630706787, "loss": 1.0872, "nll_loss": 1.042905569076538, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.059481192380189896, "rewards/margins": -0.006877871695905924, "rewards/rejected": -0.05260331556200981, "step": 470 }, { "epoch": 0.2842759846017175, "grad_norm": 10.1875, "learning_rate": 4.5004372444744376e-07, "log_odds_chosen": -0.20854365825653076, "log_odds_ratio": -0.8700854182243347, "logits/chosen": -2.267329454421997, "logits/rejected": -2.2475056648254395, "logps/chosen": -0.610100269317627, "logps/rejected": -0.49854737520217896, "loss": 1.0582, "nll_loss": 0.982585608959198, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.061010025441646576, "rewards/margins": -0.011155293323099613, "rewards/rejected": -0.04985473304986954, "step": 480 }, { "epoch": 0.2901984009475866, "grad_norm": 11.125, "learning_rate": 4.4690007248600967e-07, "log_odds_chosen": -0.30316418409347534, "log_odds_ratio": -0.9258543848991394, "logits/chosen": -2.260499954223633, "logits/rejected": -2.2460737228393555, "logps/chosen": -0.6183134913444519, "logps/rejected": -0.4603392481803894, "loss": 1.0569, "nll_loss": 0.9751143455505371, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -0.06183135509490967, "rewards/margins": -0.01579742692410946, "rewards/rejected": -0.04603392630815506, "step": 490 }, { "epoch": 0.29612081729345574, "grad_norm": 9.25, "learning_rate": 4.436722006494701e-07, "log_odds_chosen": -0.4622948169708252, "log_odds_ratio": -1.0724523067474365, "logits/chosen": -2.2528557777404785, "logits/rejected": -2.2317535877227783, "logps/chosen": -0.7585560083389282, "logps/rejected": -0.4601530134677887, "loss": 1.0779, "nll_loss": 1.0056917667388916, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": -0.07585560530424118, "rewards/margins": -0.02984030917286873, "rewards/rejected": -0.04601530730724335, "step": 500 }, { "epoch": 0.30204323363932484, "grad_norm": 10.125, "learning_rate": 4.4036148959228356e-07, "log_odds_chosen": -0.37729692459106445, "log_odds_ratio": -0.9907791018486023, "logits/chosen": -2.285222291946411, "logits/rejected": -2.2465076446533203, "logps/chosen": -0.6608995199203491, "logps/rejected": -0.44408687949180603, "loss": 1.0854, "nll_loss": 0.9470478892326355, "rewards/accuracies": 0.375, "rewards/chosen": -0.0660899430513382, "rewards/margins": -0.02168126031756401, "rewards/rejected": -0.04440869390964508, "step": 510 }, { "epoch": 0.30796564998519393, "grad_norm": 12.9375, "learning_rate": 4.3696935540164705e-07, "log_odds_chosen": -0.3114868998527527, "log_odds_ratio": -0.9284585118293762, "logits/chosen": -2.2520318031311035, "logits/rejected": -2.2336666584014893, "logps/chosen": -0.6092923879623413, "logps/rejected": -0.4560086727142334, "loss": 1.0234, "nll_loss": 0.954501748085022, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.06092924624681473, "rewards/margins": -0.01532837562263012, "rewards/rejected": -0.04560086503624916, "step": 520 }, { "epoch": 0.3138880663310631, "grad_norm": 9.1875, "learning_rate": 4.334972489917947e-07, "log_odds_chosen": -0.22460684180259705, "log_odds_ratio": -0.88166743516922, "logits/chosen": -2.313957691192627, "logits/rejected": -2.2588186264038086, "logps/chosen": -0.6013073325157166, "logps/rejected": -0.47843700647354126, "loss": 1.0456, "nll_loss": 0.9358353614807129, "rewards/accuracies": 0.4375, "rewards/chosen": -0.060130733996629715, "rewards/margins": -0.012287032790482044, "rewards/rejected": -0.047843702137470245, "step": 530 }, { "epoch": 0.3198104826769322, "grad_norm": 11.25, "learning_rate": 4.299466554833997e-07, "log_odds_chosen": -0.33192509412765503, "log_odds_ratio": -0.94036465883255, "logits/chosen": -2.2912707328796387, "logits/rejected": -2.2435359954833984, "logps/chosen": -0.5902704000473022, "logps/rejected": -0.44104498624801636, "loss": 1.0515, "nll_loss": 0.9315252304077148, "rewards/accuracies": 0.3812499940395355, "rewards/chosen": -0.059027038514614105, "rewards/margins": -0.014922534115612507, "rewards/rejected": -0.044104501605033875, "step": 540 }, { "epoch": 0.3257328990228013, "grad_norm": 8.9375, "learning_rate": 4.263190935683449e-07, "log_odds_chosen": -0.25842440128326416, "log_odds_ratio": -0.893360435962677, "logits/chosen": -2.2691588401794434, "logits/rejected": -2.2356011867523193, "logps/chosen": -0.5605894327163696, "logps/rejected": -0.43656760454177856, "loss": 0.9862, "nll_loss": 0.8704695701599121, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.05605894327163696, "rewards/margins": -0.012402191758155823, "rewards/rejected": -0.04365675523877144, "step": 550 }, { "epoch": 0.33165531536867043, "grad_norm": 10.875, "learning_rate": 4.2261611486013437e-07, "log_odds_chosen": -0.3279554545879364, "log_odds_ratio": -0.9397815465927124, "logits/chosen": -2.3104796409606934, "logits/rejected": -2.275190830230713, "logps/chosen": -0.6270398497581482, "logps/rejected": -0.4670359194278717, "loss": 1.0697, "nll_loss": 0.977874755859375, "rewards/accuracies": 0.3812499940395355, "rewards/chosen": -0.06270398944616318, "rewards/margins": -0.01600039377808571, "rewards/rejected": -0.04670359194278717, "step": 560 }, { "epoch": 0.33757773171453953, "grad_norm": 11.125, "learning_rate": 4.188393032302233e-07, "log_odds_chosen": -0.14010918140411377, "log_odds_ratio": -0.8429776430130005, "logits/chosen": -2.2512803077697754, "logits/rejected": -2.1937472820281982, "logps/chosen": -0.5634902715682983, "logps/rejected": -0.5150736570358276, "loss": 1.0249, "nll_loss": 0.931064248085022, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.056349027901887894, "rewards/margins": -0.0048416657373309135, "rewards/rejected": -0.051507361233234406, "step": 570 }, { "epoch": 0.3435001480604086, "grad_norm": 12.75, "learning_rate": 4.1499027413055e-07, "log_odds_chosen": -0.33234935998916626, "log_odds_ratio": -0.9407118558883667, "logits/chosen": -2.258405923843384, "logits/rejected": -2.232956647872925, "logps/chosen": -0.6220130920410156, "logps/rejected": -0.4592718482017517, "loss": 1.0413, "nll_loss": 0.9290376901626587, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.06220130994915962, "rewards/margins": -0.016274118795990944, "rewards/rejected": -0.04592718556523323, "step": 580 }, { "epoch": 0.3494225644062778, "grad_norm": 13.875, "learning_rate": 4.1107067390256056e-07, "log_odds_chosen": -0.35427385568618774, "log_odds_ratio": -0.9841470718383789, "logits/chosen": -2.305126428604126, "logits/rejected": -2.280172824859619, "logps/chosen": -0.696389377117157, "logps/rejected": -0.4881146550178528, "loss": 1.0718, "nll_loss": 1.0334848165512085, "rewards/accuracies": 0.375, "rewards/chosen": -0.0696389377117157, "rewards/margins": -0.02082747593522072, "rewards/rejected": -0.04881146177649498, "step": 590 }, { "epoch": 0.3553449807521469, "grad_norm": 11.6875, "learning_rate": 4.0708217907302047e-07, "log_odds_chosen": -0.3386622369289398, "log_odds_ratio": -0.9444282650947571, "logits/chosen": -2.2589573860168457, "logits/rejected": -2.2278530597686768, "logps/chosen": -0.6211683750152588, "logps/rejected": -0.46438631415367126, "loss": 1.0621, "nll_loss": 0.9823211431503296, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": -0.062116838991642, "rewards/margins": -0.01567821204662323, "rewards/rejected": -0.04643862694501877, "step": 600 }, { "epoch": 0.361267397098016, "grad_norm": 14.4375, "learning_rate": 4.030264956369157e-07, "log_odds_chosen": -0.32127273082733154, "log_odds_ratio": -0.929902195930481, "logits/chosen": -2.297096014022827, "logits/rejected": -2.259603977203369, "logps/chosen": -0.591595470905304, "logps/rejected": -0.4399223327636719, "loss": 1.0497, "nll_loss": 0.9886807203292847, "rewards/accuracies": 0.3375000059604645, "rewards/chosen": -0.0591595396399498, "rewards/margins": -0.01516731083393097, "rewards/rejected": -0.04399223253130913, "step": 610 }, { "epoch": 0.3671898134438851, "grad_norm": 11.1875, "learning_rate": 3.989053583277492e-07, "log_odds_chosen": -0.42405062913894653, "log_odds_ratio": -1.0016412734985352, "logits/chosen": -2.3095479011535645, "logits/rejected": -2.2935452461242676, "logps/chosen": -0.6750982403755188, "logps/rejected": -0.45489102602005005, "loss": 1.0537, "nll_loss": 0.9710051417350769, "rewards/accuracies": 0.3187499940395355, "rewards/chosen": -0.06750981509685516, "rewards/margins": -0.022020723670721054, "rewards/rejected": -0.04548909515142441, "step": 620 }, { "epoch": 0.3731122297897542, "grad_norm": 13.4375, "learning_rate": 3.947205298755447e-07, "log_odds_chosen": -0.25669050216674805, "log_odds_ratio": -0.9015368223190308, "logits/chosen": -2.2679405212402344, "logits/rejected": -2.2386162281036377, "logps/chosen": -0.6160240173339844, "logps/rejected": -0.48336100578308105, "loss": 1.0648, "nll_loss": 0.9532335996627808, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.061602406203746796, "rewards/margins": -0.013266305439174175, "rewards/rejected": -0.04833609610795975, "step": 630 }, { "epoch": 0.3790346461356233, "grad_norm": 10.4375, "learning_rate": 3.9047380025287634e-07, "log_odds_chosen": -0.24768850207328796, "log_odds_ratio": -0.891069769859314, "logits/chosen": -2.275651216506958, "logits/rejected": -2.247177839279175, "logps/chosen": -0.5877569913864136, "logps/rejected": -0.4681660532951355, "loss": 1.0549, "nll_loss": 0.9463118314743042, "rewards/accuracies": 0.4375, "rewards/chosen": -0.058775704354047775, "rewards/margins": -0.011959095485508442, "rewards/rejected": -0.04681660607457161, "step": 640 }, { "epoch": 0.3849570624814925, "grad_norm": 12.1875, "learning_rate": 3.8616698590924523e-07, "log_odds_chosen": -0.2891980707645416, "log_odds_ratio": -0.9127435684204102, "logits/chosen": -2.296032428741455, "logits/rejected": -2.2514827251434326, "logps/chosen": -0.6284441351890564, "logps/rejected": -0.4775362014770508, "loss": 1.0297, "nll_loss": 0.9506929516792297, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.062844417989254, "rewards/margins": -0.01509079895913601, "rewards/rejected": -0.04775362089276314, "step": 650 }, { "epoch": 0.39087947882736157, "grad_norm": 9.9375, "learning_rate": 3.8180192899413123e-07, "log_odds_chosen": -0.3009001314640045, "log_odds_ratio": -0.9173041582107544, "logits/chosen": -2.292931079864502, "logits/rejected": -2.2850821018218994, "logps/chosen": -0.5977297425270081, "logps/rejected": -0.4498085081577301, "loss": 1.066, "nll_loss": 0.9441615343093872, "rewards/accuracies": 0.375, "rewards/chosen": -0.059772975742816925, "rewards/margins": -0.014792119152843952, "rewards/rejected": -0.04498085752129555, "step": 660 }, { "epoch": 0.39680189517323067, "grad_norm": 9.6875, "learning_rate": 3.7738049656905225e-07, "log_odds_chosen": -0.2274588793516159, "log_odds_ratio": -0.871192455291748, "logits/chosen": -2.2281768321990967, "logits/rejected": -2.1852290630340576, "logps/chosen": -0.5783167481422424, "logps/rejected": -0.472917377948761, "loss": 1.0607, "nll_loss": 0.9557689428329468, "rewards/accuracies": 0.4375, "rewards/chosen": -0.05783168226480484, "rewards/margins": -0.010539938695728779, "rewards/rejected": -0.04729173332452774, "step": 670 }, { "epoch": 0.4027243115190998, "grad_norm": 10.375, "learning_rate": 3.7290457980896787e-07, "log_odds_chosen": -0.1645122915506363, "log_odds_ratio": -0.8458727598190308, "logits/chosen": -2.2992634773254395, "logits/rejected": -2.270430564880371, "logps/chosen": -0.5671563148498535, "logps/rejected": -0.4864569306373596, "loss": 1.0284, "nll_loss": 0.9164050817489624, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.05671562999486923, "rewards/margins": -0.00806993618607521, "rewards/rejected": -0.04864569753408432, "step": 680 }, { "epoch": 0.4086467278649689, "grad_norm": 10.125, "learning_rate": 3.68376093193369e-07, "log_odds_chosen": -0.2814542353153229, "log_odds_ratio": -0.9015814661979675, "logits/chosen": -2.3065972328186035, "logits/rejected": -2.2681093215942383, "logps/chosen": -0.5637949109077454, "logps/rejected": -0.4352457523345947, "loss": 1.0214, "nll_loss": 0.91374272108078, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": -0.05637948960065842, "rewards/margins": -0.012854918837547302, "rewards/rejected": -0.043524570763111115, "step": 690 }, { "epoch": 0.414569144210838, "grad_norm": 58.5, "learning_rate": 3.637969736873992e-07, "log_odds_chosen": -0.21553269028663635, "log_odds_ratio": -0.8870409727096558, "logits/chosen": -2.2836763858795166, "logits/rejected": -2.252403736114502, "logps/chosen": -0.5681829452514648, "logps/rejected": -0.465969979763031, "loss": 1.0701, "nll_loss": 0.9871380925178528, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.056818295270204544, "rewards/margins": -0.010221302509307861, "rewards/rejected": -0.04659699648618698, "step": 700 }, { "epoch": 0.4204915605567071, "grad_norm": 10.5, "learning_rate": 3.591691799133587e-07, "log_odds_chosen": -0.19581297039985657, "log_odds_ratio": -0.8488709330558777, "logits/chosen": -2.3274245262145996, "logits/rejected": -2.2992606163024902, "logps/chosen": -0.5645796060562134, "logps/rejected": -0.4613499641418457, "loss": 1.0495, "nll_loss": 0.9565572738647461, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.05645795539021492, "rewards/margins": -0.010322963818907738, "rewards/rejected": -0.04613499343395233, "step": 710 }, { "epoch": 0.42641397690257626, "grad_norm": 9.75, "learning_rate": 3.5449469131294476e-07, "log_odds_chosen": -0.22600612044334412, "log_odds_ratio": -0.8781830668449402, "logits/chosen": -2.2927708625793457, "logits/rejected": -2.2485132217407227, "logps/chosen": -0.5577629804611206, "logps/rejected": -0.44653376936912537, "loss": 1.0248, "nll_loss": 0.9297264814376831, "rewards/accuracies": 0.4375, "rewards/chosen": -0.055776309221982956, "rewards/margins": -0.0111229307949543, "rewards/rejected": -0.044653378427028656, "step": 720 }, { "epoch": 0.43233639324844536, "grad_norm": 9.3125, "learning_rate": 3.497755073005868e-07, "log_odds_chosen": -0.09444288164377213, "log_odds_ratio": -0.8072474598884583, "logits/chosen": -2.290067672729492, "logits/rejected": -2.257514238357544, "logps/chosen": -0.5471974611282349, "logps/rejected": -0.47366800904273987, "loss": 1.0112, "nll_loss": 0.8891817927360535, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.054719746112823486, "rewards/margins": -0.007352945860475302, "rewards/rejected": -0.04736679792404175, "step": 730 }, { "epoch": 0.43825880959431446, "grad_norm": 16.75, "learning_rate": 3.4501364640823926e-07, "log_odds_chosen": -0.3251793384552002, "log_odds_ratio": -0.9317482709884644, "logits/chosen": -2.2995355129241943, "logits/rejected": -2.2732508182525635, "logps/chosen": -0.6547442674636841, "logps/rejected": -0.4866989254951477, "loss": 1.0482, "nll_loss": 0.9714682698249817, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -0.06547442078590393, "rewards/margins": -0.016804538667201996, "rewards/rejected": -0.04866989329457283, "step": 740 }, { "epoch": 0.4441812259401836, "grad_norm": 10.4375, "learning_rate": 3.402111454219966e-07, "log_odds_chosen": -0.17538635432720184, "log_odds_ratio": -0.8506783246994019, "logits/chosen": -2.3090875148773193, "logits/rejected": -2.26053786277771, "logps/chosen": -0.5713698863983154, "logps/rejected": -0.47184181213378906, "loss": 1.0275, "nll_loss": 0.954795241355896, "rewards/accuracies": 0.5, "rewards/chosen": -0.05713699012994766, "rewards/margins": -0.009952803142368793, "rewards/rejected": -0.047184187918901443, "step": 750 }, { "epoch": 0.4501036422860527, "grad_norm": 9.3125, "learning_rate": 3.353700585109005e-07, "log_odds_chosen": -0.19826039671897888, "log_odds_ratio": -0.8637887835502625, "logits/chosen": -2.302405834197998, "logits/rejected": -2.27463698387146, "logps/chosen": -0.5740953683853149, "logps/rejected": -0.4722967743873596, "loss": 1.0239, "nll_loss": 0.963403582572937, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.057409536093473434, "rewards/margins": -0.010179854929447174, "rewards/rejected": -0.04722967743873596, "step": 760 }, { "epoch": 0.4560260586319218, "grad_norm": 9.875, "learning_rate": 3.304924563483129e-07, "log_odds_chosen": -0.22836697101593018, "log_odds_ratio": -0.895135760307312, "logits/chosen": -2.315516948699951, "logits/rejected": -2.3024649620056152, "logps/chosen": -0.6285193562507629, "logps/rejected": -0.48862919211387634, "loss": 1.073, "nll_loss": 1.0095432996749878, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.0628519356250763, "rewards/margins": -0.013989018276333809, "rewards/rejected": -0.04886292293667793, "step": 770 }, { "epoch": 0.46194847497779096, "grad_norm": 11.125, "learning_rate": 3.255804252262283e-07, "log_odds_chosen": -0.19756431877613068, "log_odds_ratio": -0.856968104839325, "logits/chosen": -2.255115032196045, "logits/rejected": -2.226313352584839, "logps/chosen": -0.551701545715332, "logps/rejected": -0.45012766122817993, "loss": 1.0499, "nll_loss": 0.9961403608322144, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.05517015606164932, "rewards/margins": -0.010157393291592598, "rewards/rejected": -0.045012760907411575, "step": 780 }, { "epoch": 0.46787089132366005, "grad_norm": 8.375, "learning_rate": 3.2063606616290626e-07, "log_odds_chosen": -0.3132410943508148, "log_odds_ratio": -0.9298326373100281, "logits/chosen": -2.2360429763793945, "logits/rejected": -2.1973369121551514, "logps/chosen": -0.5941890478134155, "logps/rejected": -0.44506731629371643, "loss": 0.9654, "nll_loss": 0.8383496999740601, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.05941891670227051, "rewards/margins": -0.01491218339651823, "rewards/rejected": -0.044506728649139404, "step": 790 }, { "epoch": 0.47379330766952915, "grad_norm": 15.0625, "learning_rate": 3.1566149400420523e-07, "log_odds_chosen": -0.26251059770584106, "log_odds_ratio": -0.8918318748474121, "logits/chosen": -2.2902214527130127, "logits/rejected": -2.2795047760009766, "logps/chosen": -0.6117950081825256, "logps/rejected": -0.4801320433616638, "loss": 1.0567, "nll_loss": 0.9525865316390991, "rewards/accuracies": 0.4312500059604645, "rewards/chosen": -0.0611795075237751, "rewards/margins": -0.013166295364499092, "rewards/rejected": -0.04801321029663086, "step": 800 }, { "epoch": 0.4797157240153983, "grad_norm": 10.3125, "learning_rate": 3.1065883651900087e-07, "log_odds_chosen": -0.2203420102596283, "log_odds_ratio": -0.8829119801521301, "logits/chosen": -2.2788829803466797, "logits/rejected": -2.2381834983825684, "logps/chosen": -0.5892807841300964, "logps/rejected": -0.48378220200538635, "loss": 1.0678, "nll_loss": 0.9220091104507446, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.05892808362841606, "rewards/margins": -0.01054986473172903, "rewards/rejected": -0.048378217965364456, "step": 810 }, { "epoch": 0.4856381403612674, "grad_norm": 9.875, "learning_rate": 3.056302334890786e-07, "log_odds_chosen": -0.30824679136276245, "log_odds_ratio": -0.9259847402572632, "logits/chosen": -2.288405179977417, "logits/rejected": -2.2682487964630127, "logps/chosen": -0.6053352355957031, "logps/rejected": -0.4507838189601898, "loss": 1.0098, "nll_loss": 0.9126564860343933, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -0.06053352355957031, "rewards/margins": -0.01545514166355133, "rewards/rejected": -0.04507838934659958, "step": 820 }, { "epoch": 0.4915605567071365, "grad_norm": 12.6875, "learning_rate": 3.0057783579388586e-07, "log_odds_chosen": -0.15970291197299957, "log_odds_ratio": -0.8330586552619934, "logits/chosen": -2.2909493446350098, "logits/rejected": -2.2521986961364746, "logps/chosen": -0.5571908950805664, "logps/rejected": -0.4815686345100403, "loss": 1.0258, "nll_loss": 0.9384473562240601, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.05571908876299858, "rewards/margins": -0.007562229875475168, "rewards/rejected": -0.04815686494112015, "step": 830 }, { "epoch": 0.49748297305300565, "grad_norm": 11.75, "learning_rate": 2.9550380449053907e-07, "log_odds_chosen": -0.18619410693645477, "log_odds_ratio": -0.8525155782699585, "logits/chosen": -2.2423572540283203, "logits/rejected": -2.221928596496582, "logps/chosen": -0.5615742206573486, "logps/rejected": -0.4591636657714844, "loss": 1.0133, "nll_loss": 0.8223134279251099, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.056157421320676804, "rewards/margins": -0.010241055861115456, "rewards/rejected": -0.0459163673222065, "step": 840 }, { "epoch": 0.5034053893988747, "grad_norm": 8.625, "learning_rate": 2.904103098894767e-07, "log_odds_chosen": -0.22144293785095215, "log_odds_ratio": -0.8922742009162903, "logits/chosen": -2.280796527862549, "logits/rejected": -2.2380261421203613, "logps/chosen": -0.5996569991111755, "logps/rejected": -0.4632148742675781, "loss": 1.0102, "nll_loss": 0.9282135963439941, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.059965699911117554, "rewards/margins": -0.013644215650856495, "rewards/rejected": -0.04632148891687393, "step": 850 }, { "epoch": 0.5093278057447439, "grad_norm": 12.625, "learning_rate": 2.852995306261545e-07, "log_odds_chosen": -0.1986076533794403, "log_odds_ratio": -0.8607484698295593, "logits/chosen": -2.306536912918091, "logits/rejected": -2.2707247734069824, "logps/chosen": -0.575395405292511, "logps/rejected": -0.4835848808288574, "loss": 1.074, "nll_loss": 1.0040955543518066, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.05753953382372856, "rewards/margins": -0.009181044064462185, "rewards/rejected": -0.0483584925532341, "step": 860 }, { "epoch": 0.515250222090613, "grad_norm": 10.6875, "learning_rate": 2.801736527291797e-07, "log_odds_chosen": -0.26449286937713623, "log_odds_ratio": -0.9028227925300598, "logits/chosen": -2.275608777999878, "logits/rejected": -2.233181953430176, "logps/chosen": -0.61722731590271, "logps/rejected": -0.4729304313659668, "loss": 1.042, "nll_loss": 0.908827006816864, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": -0.061722736805677414, "rewards/margins": -0.014429694041609764, "rewards/rejected": -0.0472930371761322, "step": 870 }, { "epoch": 0.5211726384364821, "grad_norm": 10.875, "learning_rate": 2.750348686852836e-07, "log_odds_chosen": -0.31994161009788513, "log_odds_ratio": -0.9219182729721069, "logits/chosen": -2.329312324523926, "logits/rejected": -2.2651875019073486, "logps/chosen": -0.6155102252960205, "logps/rejected": -0.4632096290588379, "loss": 1.0724, "nll_loss": 1.0065295696258545, "rewards/accuracies": 0.375, "rewards/chosen": -0.06155102327466011, "rewards/margins": -0.015230064280331135, "rewards/rejected": -0.04632095992565155, "step": 880 }, { "epoch": 0.5270950547823512, "grad_norm": 11.875, "learning_rate": 2.69885376501531e-07, "log_odds_chosen": -0.23163005709648132, "log_odds_ratio": -0.8846963047981262, "logits/chosen": -2.261355400085449, "logits/rejected": -2.2470784187316895, "logps/chosen": -0.6110343933105469, "logps/rejected": -0.4852830767631531, "loss": 1.0546, "nll_loss": 0.9538838267326355, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.06110344082117081, "rewards/margins": -0.012575129978358746, "rewards/rejected": -0.04852830991148949, "step": 890 }, { "epoch": 0.5330174711282203, "grad_norm": 11.9375, "learning_rate": 2.647273787651687e-07, "log_odds_chosen": -0.18702737987041473, "log_odds_ratio": -0.8396440744400024, "logits/chosen": -2.2948384284973145, "logits/rejected": -2.2751121520996094, "logps/chosen": -0.5671176910400391, "logps/rejected": -0.47453179955482483, "loss": 1.0288, "nll_loss": 0.9608666300773621, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.056711774319410324, "rewards/margins": -0.009258597157895565, "rewards/rejected": -0.047453176230192184, "step": 900 }, { "epoch": 0.5389398874740894, "grad_norm": 16.75, "learning_rate": 2.5956308170151526e-07, "log_odds_chosen": -0.40357428789138794, "log_odds_ratio": -1.0180401802062988, "logits/chosen": -2.260730504989624, "logits/rejected": -2.2328133583068848, "logps/chosen": -0.7037028670310974, "logps/rejected": -0.4578544497489929, "loss": 1.1183, "nll_loss": 0.9839082956314087, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.07037027925252914, "rewards/margins": -0.02458484098315239, "rewards/rejected": -0.04578544571995735, "step": 910 }, { "epoch": 0.5448623038199586, "grad_norm": 10.3125, "learning_rate": 2.543946942302944e-07, "log_odds_chosen": -0.21979165077209473, "log_odds_ratio": -0.8726961016654968, "logits/chosen": -2.2551956176757812, "logits/rejected": -2.2191715240478516, "logps/chosen": -0.5772194862365723, "logps/rejected": -0.45551061630249023, "loss": 1.0123, "nll_loss": 0.9414900541305542, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.057721953839063644, "rewards/margins": -0.012170888483524323, "rewards/rejected": -0.04555106535553932, "step": 920 }, { "epoch": 0.5507847201658277, "grad_norm": 11.1875, "learning_rate": 2.492244270208158e-07, "log_odds_chosen": -0.1632816195487976, "log_odds_ratio": -0.8366379737854004, "logits/chosen": -2.2645580768585205, "logits/rejected": -2.2385404109954834, "logps/chosen": -0.5705746412277222, "logps/rejected": -0.48298463225364685, "loss": 0.9953, "nll_loss": 0.9456483721733093, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.05705747753381729, "rewards/margins": -0.008759009651839733, "rewards/rejected": -0.048298463225364685, "step": 930 }, { "epoch": 0.5567071365116968, "grad_norm": 9.5, "learning_rate": 2.440544915464078e-07, "log_odds_chosen": -0.2142259180545807, "log_odds_ratio": -0.8674869537353516, "logits/chosen": -2.294877290725708, "logits/rejected": -2.2555816173553467, "logps/chosen": -0.5593573451042175, "logps/rejected": -0.45421138405799866, "loss": 1.0237, "nll_loss": 0.9162901043891907, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.055935733020305634, "rewards/margins": -0.01051459088921547, "rewards/rejected": -0.045421142131090164, "step": 940 }, { "epoch": 0.5626295528575659, "grad_norm": 10.0625, "learning_rate": 2.3888709913850593e-07, "log_odds_chosen": -0.21557164192199707, "log_odds_ratio": -0.8706417083740234, "logits/chosen": -2.3428778648376465, "logits/rejected": -2.3050456047058105, "logps/chosen": -0.5851597785949707, "logps/rejected": -0.4760478436946869, "loss": 1.0825, "nll_loss": 0.9482911825180054, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05851597711443901, "rewards/margins": -0.010911193676292896, "rewards/rejected": -0.04760478436946869, "step": 950 }, { "epoch": 0.568551969203435, "grad_norm": 9.75, "learning_rate": 2.337244600408025e-07, "log_odds_chosen": -0.30868110060691833, "log_odds_ratio": -0.9379078149795532, "logits/chosen": -2.3101601600646973, "logits/rejected": -2.2805612087249756, "logps/chosen": -0.6376503109931946, "logps/rejected": -0.4714363217353821, "loss": 1.051, "nll_loss": 0.983268141746521, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -0.0637650191783905, "rewards/margins": -0.016621392220258713, "rewards/rejected": -0.04714363440871239, "step": 960 }, { "epoch": 0.5744743855493041, "grad_norm": 11.9375, "learning_rate": 2.2856878246386085e-07, "log_odds_chosen": -0.20517487823963165, "log_odds_ratio": -0.8652151226997375, "logits/chosen": -2.306201457977295, "logits/rejected": -2.283665180206299, "logps/chosen": -0.5846830606460571, "logps/rejected": -0.4740404486656189, "loss": 1.0953, "nll_loss": 1.0276809930801392, "rewards/accuracies": 0.40625, "rewards/chosen": -0.058468304574489594, "rewards/margins": -0.011064260266721249, "rewards/rejected": -0.04740404710173607, "step": 970 }, { "epoch": 0.5803968018951732, "grad_norm": 13.0, "learning_rate": 2.2342227164060035e-07, "log_odds_chosen": -0.2963787019252777, "log_odds_ratio": -0.9264262318611145, "logits/chosen": -2.2660953998565674, "logits/rejected": -2.211947441101074, "logps/chosen": -0.6310227513313293, "logps/rejected": -0.4772140085697174, "loss": 1.0355, "nll_loss": 0.916420578956604, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06310227513313293, "rewards/margins": -0.015380874276161194, "rewards/rejected": -0.04772140458226204, "step": 980 }, { "epoch": 0.5863192182410424, "grad_norm": 9.5625, "learning_rate": 2.182871288830533e-07, "log_odds_chosen": -0.3251541554927826, "log_odds_ratio": -0.941790759563446, "logits/chosen": -2.293196439743042, "logits/rejected": -2.232034206390381, "logps/chosen": -0.6307833790779114, "logps/rejected": -0.4696255624294281, "loss": 1.0677, "nll_loss": 0.967657208442688, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": -0.06307834386825562, "rewards/margins": -0.016115780919790268, "rewards/rejected": -0.04696255922317505, "step": 990 }, { "epoch": 0.5922416345869115, "grad_norm": 9.6875, "learning_rate": 2.131655506408007e-07, "log_odds_chosen": -0.22425034642219543, "log_odds_ratio": -0.8798470497131348, "logits/chosen": -2.2940893173217773, "logits/rejected": -2.254329204559326, "logps/chosen": -0.5970818400382996, "logps/rejected": -0.48467540740966797, "loss": 1.0208, "nll_loss": 0.9316588640213013, "rewards/accuracies": 0.4375, "rewards/chosen": -0.059708189219236374, "rewards/margins": -0.011240655556321144, "rewards/rejected": -0.04846753552556038, "step": 1000 }, { "epoch": 0.5981640509327806, "grad_norm": 9.5, "learning_rate": 2.0805972756148643e-07, "log_odds_chosen": -0.3093208074569702, "log_odds_ratio": -0.9420243501663208, "logits/chosen": -2.2883636951446533, "logits/rejected": -2.275327682495117, "logps/chosen": -0.6675941348075867, "logps/rejected": -0.47907954454421997, "loss": 1.0708, "nll_loss": 1.0012794733047485, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -0.06675940752029419, "rewards/margins": -0.01885146275162697, "rewards/rejected": -0.047907955944538116, "step": 1010 }, { "epoch": 0.6040864672786497, "grad_norm": 9.1875, "learning_rate": 2.0297184355381432e-07, "log_odds_chosen": -0.2639048993587494, "log_odds_ratio": -0.89494389295578, "logits/chosen": -2.304008722305298, "logits/rejected": -2.265723705291748, "logps/chosen": -0.5768560767173767, "logps/rejected": -0.4624248445034027, "loss": 1.0328, "nll_loss": 0.9577334523200989, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.05768561363220215, "rewards/margins": -0.011443129740655422, "rewards/rejected": -0.04624248296022415, "step": 1020 }, { "epoch": 0.6100088836245188, "grad_norm": 9.125, "learning_rate": 1.9790407485342638e-07, "log_odds_chosen": -0.3557616174221039, "log_odds_ratio": -0.9650157690048218, "logits/chosen": -2.327831268310547, "logits/rejected": -2.2884087562561035, "logps/chosen": -0.6429619193077087, "logps/rejected": -0.4408210217952728, "loss": 1.0091, "nll_loss": 0.9397379755973816, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.06429620087146759, "rewards/margins": -0.02021409198641777, "rewards/rejected": -0.04408210515975952, "step": 1030 }, { "epoch": 0.6159312999703879, "grad_norm": 10.75, "learning_rate": 1.928585890920641e-07, "log_odds_chosen": -0.1900234967470169, "log_odds_ratio": -0.8621436953544617, "logits/chosen": -2.2921512126922607, "logits/rejected": -2.2576987743377686, "logps/chosen": -0.5736020803451538, "logps/rejected": -0.46828731894493103, "loss": 1.0474, "nll_loss": 0.9162224531173706, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.0573602095246315, "rewards/margins": -0.010531473904848099, "rewards/rejected": -0.046828728169202805, "step": 1040 }, { "epoch": 0.6218537163162571, "grad_norm": 11.875, "learning_rate": 1.8783754437040902e-07, "log_odds_chosen": -0.26852238178253174, "log_odds_ratio": -0.9126049280166626, "logits/chosen": -2.275580883026123, "logits/rejected": -2.2431647777557373, "logps/chosen": -0.5689065456390381, "logps/rejected": -0.44645556807518005, "loss": 1.0095, "nll_loss": 0.9046837091445923, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": -0.05689065903425217, "rewards/margins": -0.012245100922882557, "rewards/rejected": -0.044645555317401886, "step": 1050 }, { "epoch": 0.6277761326621262, "grad_norm": 9.25, "learning_rate": 1.8284308833500118e-07, "log_odds_chosen": -0.2125154435634613, "log_odds_ratio": -0.8751262426376343, "logits/chosen": -2.277667760848999, "logits/rejected": -2.253131866455078, "logps/chosen": -0.5812402963638306, "logps/rejected": -0.47419658303260803, "loss": 1.0476, "nll_loss": 0.93915194272995, "rewards/accuracies": 0.46875, "rewards/chosen": -0.05812402814626694, "rewards/margins": -0.010704366490244865, "rewards/rejected": -0.04741965979337692, "step": 1060 }, { "epoch": 0.6336985490079953, "grad_norm": 11.0625, "learning_rate": 1.7787735725962756e-07, "log_odds_chosen": -0.27183157205581665, "log_odds_ratio": -0.9005556106567383, "logits/chosen": -2.2851767539978027, "logits/rejected": -2.2494091987609863, "logps/chosen": -0.613685667514801, "logps/rejected": -0.47953805327415466, "loss": 1.0919, "nll_loss": 0.9954058527946472, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.06136856600642204, "rewards/margins": -0.013414761051535606, "rewards/rejected": -0.047953806817531586, "step": 1070 }, { "epoch": 0.6396209653538644, "grad_norm": 9.375, "learning_rate": 1.7294247513157616e-07, "log_odds_chosen": -0.22400331497192383, "log_odds_ratio": -0.8672366142272949, "logits/chosen": -2.3089351654052734, "logits/rejected": -2.2596447467803955, "logps/chosen": -0.5711158514022827, "logps/rejected": -0.46820420026779175, "loss": 1.0251, "nll_loss": 0.960826575756073, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05711158365011215, "rewards/margins": -0.010291163809597492, "rewards/rejected": -0.046820417046546936, "step": 1080 }, { "epoch": 0.6455433816997335, "grad_norm": 10.0625, "learning_rate": 1.6804055274314494e-07, "log_odds_chosen": -0.19274529814720154, "log_odds_ratio": -0.8532935380935669, "logits/chosen": -2.270355224609375, "logits/rejected": -2.248356342315674, "logps/chosen": -0.5621662735939026, "logps/rejected": -0.47271862626075745, "loss": 1.0217, "nll_loss": 0.9073405265808105, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.056216634809970856, "rewards/margins": -0.008944764733314514, "rewards/rejected": -0.04727186635136604, "step": 1090 }, { "epoch": 0.6514657980456026, "grad_norm": 10.25, "learning_rate": 1.6317368678879496e-07, "log_odds_chosen": -0.20030847191810608, "log_odds_ratio": -0.8486258387565613, "logits/chosen": -2.3088138103485107, "logits/rejected": -2.27048659324646, "logps/chosen": -0.5797799825668335, "logps/rejected": -0.4854944348335266, "loss": 1.0725, "nll_loss": 0.9621385335922241, "rewards/accuracies": 0.40625, "rewards/chosen": -0.05797800421714783, "rewards/margins": -0.009428557008504868, "rewards/rejected": -0.04854945093393326, "step": 1100 }, { "epoch": 0.6573882143914718, "grad_norm": 12.125, "learning_rate": 1.5834395896833281e-07, "log_odds_chosen": -0.3109692335128784, "log_odds_ratio": -0.9263485670089722, "logits/chosen": -2.3202879428863525, "logits/rejected": -2.265725612640381, "logps/chosen": -0.6155823469161987, "logps/rejected": -0.4582076966762543, "loss": 1.0499, "nll_loss": 0.9659247398376465, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.06155823543667793, "rewards/margins": -0.015737462788820267, "rewards/rejected": -0.045820772647857666, "step": 1110 }, { "epoch": 0.6633106307373409, "grad_norm": 9.9375, "learning_rate": 1.535534350965075e-07, "log_odds_chosen": -0.25020501017570496, "log_odds_ratio": -0.8859984278678894, "logits/chosen": -2.3179831504821777, "logits/rejected": -2.3054070472717285, "logps/chosen": -0.5626355409622192, "logps/rejected": -0.434339702129364, "loss": 1.0081, "nll_loss": 0.9209376573562622, "rewards/accuracies": 0.41874998807907104, "rewards/chosen": -0.056263554841279984, "rewards/margins": -0.012829584069550037, "rewards/rejected": -0.04343396797776222, "step": 1120 }, { "epoch": 0.66923304708321, "grad_norm": 15.625, "learning_rate": 1.4880416421940154e-07, "log_odds_chosen": -0.23923833668231964, "log_odds_ratio": -0.8853415250778198, "logits/chosen": -2.26355242729187, "logits/rejected": -2.240990161895752, "logps/chosen": -0.6214331388473511, "logps/rejected": -0.4842914938926697, "loss": 1.1113, "nll_loss": 1.0326354503631592, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.062143318355083466, "rewards/margins": -0.01371416263282299, "rewards/rejected": -0.048429153859615326, "step": 1130 }, { "epoch": 0.6751554634290791, "grad_norm": 12.75, "learning_rate": 1.4409817773799459e-07, "log_odds_chosen": -0.23250596225261688, "log_odds_ratio": -0.8853020668029785, "logits/chosen": -2.288491725921631, "logits/rejected": -2.24708890914917, "logps/chosen": -0.6100078225135803, "logps/rejected": -0.4815722405910492, "loss": 1.0552, "nll_loss": 0.9337055087089539, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.061000775545835495, "rewards/margins": -0.01284355204552412, "rewards/rejected": -0.0481572225689888, "step": 1140 }, { "epoch": 0.6810778797749482, "grad_norm": 10.4375, "learning_rate": 1.3943748853927385e-07, "log_odds_chosen": -0.3103570342063904, "log_odds_ratio": -0.9324914216995239, "logits/chosen": -2.28434419631958, "logits/rejected": -2.277893543243408, "logps/chosen": -0.64482182264328, "logps/rejected": -0.46989989280700684, "loss": 1.035, "nll_loss": 0.934810996055603, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.064482182264328, "rewards/margins": -0.017492195591330528, "rewards/rejected": -0.046989988535642624, "step": 1150 }, { "epoch": 0.6870002961208173, "grad_norm": 11.375, "learning_rate": 1.3482409013526436e-07, "log_odds_chosen": -0.3323788642883301, "log_odds_ratio": -0.9415693283081055, "logits/chosen": -2.272247791290283, "logits/rejected": -2.2672269344329834, "logps/chosen": -0.6134747862815857, "logps/rejected": -0.4571937918663025, "loss": 1.0638, "nll_loss": 0.9829813241958618, "rewards/accuracies": 0.3812499940395355, "rewards/chosen": -0.06134747713804245, "rewards/margins": -0.01562810130417347, "rewards/rejected": -0.04571938142180443, "step": 1160 }, { "epoch": 0.6929227124666865, "grad_norm": 9.4375, "learning_rate": 1.302599558103456e-07, "log_odds_chosen": -0.23517660796642303, "log_odds_ratio": -0.8992069363594055, "logits/chosen": -2.3287193775177, "logits/rejected": -2.293454885482788, "logps/chosen": -0.6200941801071167, "logps/rejected": -0.4878036081790924, "loss": 1.0413, "nll_loss": 0.9660770297050476, "rewards/accuracies": 0.4312500059604645, "rewards/chosen": -0.06200941652059555, "rewards/margins": -0.013229051604866982, "rewards/rejected": -0.04878035932779312, "step": 1170 }, { "epoch": 0.6988451288125556, "grad_norm": 13.0625, "learning_rate": 1.257470377772214e-07, "log_odds_chosen": -0.27837398648262024, "log_odds_ratio": -0.9113019704818726, "logits/chosen": -2.3072619438171387, "logits/rejected": -2.282047748565674, "logps/chosen": -0.5952633023262024, "logps/rejected": -0.4496152400970459, "loss": 1.0661, "nll_loss": 0.9518778920173645, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": -0.05952633172273636, "rewards/margins": -0.014564801938831806, "rewards/rejected": -0.04496152698993683, "step": 1180 }, { "epoch": 0.7047675451584247, "grad_norm": 13.5, "learning_rate": 1.2128726634190046e-07, "log_odds_chosen": -0.26337355375289917, "log_odds_ratio": -0.8862990140914917, "logits/chosen": -2.3180294036865234, "logits/rejected": -2.274146556854248, "logps/chosen": -0.5859608054161072, "logps/rejected": -0.44980812072753906, "loss": 1.0204, "nll_loss": 0.91375333070755, "rewards/accuracies": 0.375, "rewards/chosen": -0.05859608203172684, "rewards/margins": -0.01361527293920517, "rewards/rejected": -0.044980812817811966, "step": 1190 }, { "epoch": 0.7106899615042938, "grad_norm": 13.3125, "learning_rate": 1.1688254907804992e-07, "log_odds_chosen": -0.2645830512046814, "log_odds_ratio": -0.9049927592277527, "logits/chosen": -2.2710115909576416, "logits/rejected": -2.2327637672424316, "logps/chosen": -0.6210035085678101, "logps/rejected": -0.48435431718826294, "loss": 1.0683, "nll_loss": 0.9852622747421265, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -0.06210034340620041, "rewards/margins": -0.01366492174565792, "rewards/rejected": -0.048435427248477936, "step": 1200 }, { "epoch": 0.7166123778501629, "grad_norm": 9.1875, "learning_rate": 1.1253477001106956e-07, "log_odds_chosen": -0.18010739982128143, "log_odds_ratio": -0.848807156085968, "logits/chosen": -2.2503340244293213, "logits/rejected": -2.214433431625366, "logps/chosen": -0.5777139663696289, "logps/rejected": -0.48649734258651733, "loss": 1.0408, "nll_loss": 0.9145431518554688, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.05777139216661453, "rewards/margins": -0.009121658280491829, "rewards/rejected": -0.04864973947405815, "step": 1210 }, { "epoch": 0.722534794196032, "grad_norm": 14.1875, "learning_rate": 1.0824578881224065e-07, "log_odds_chosen": -0.14203877747058868, "log_odds_ratio": -0.8198834657669067, "logits/chosen": -2.323948621749878, "logits/rejected": -2.3119778633117676, "logps/chosen": -0.5389841794967651, "logps/rejected": -0.4636968672275543, "loss": 0.9852, "nll_loss": 0.871512770652771, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.053898416459560394, "rewards/margins": -0.007528733462095261, "rewards/rejected": -0.046369682997465134, "step": 1220 }, { "epoch": 0.728457210541901, "grad_norm": 10.25, "learning_rate": 1.0401744000328918e-07, "log_odds_chosen": -0.19983641803264618, "log_odds_ratio": -0.8725547790527344, "logits/chosen": -2.268932342529297, "logits/rejected": -2.2664635181427, "logps/chosen": -0.5955653786659241, "logps/rejected": -0.4910568296909332, "loss": 1.0167, "nll_loss": 0.9245740175247192, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.059556543827056885, "rewards/margins": -0.010450851172208786, "rewards/rejected": -0.0491056926548481, "step": 1230 }, { "epoch": 0.7343796268877703, "grad_norm": 11.25, "learning_rate": 9.985153217170902e-08, "log_odds_chosen": -0.27591392397880554, "log_odds_ratio": -0.9048240780830383, "logits/chosen": -2.3324825763702393, "logits/rejected": -2.3199105262756348, "logps/chosen": -0.613168478012085, "logps/rejected": -0.47140389680862427, "loss": 1.1081, "nll_loss": 1.0194193124771118, "rewards/accuracies": 0.40625, "rewards/chosen": -0.061316847801208496, "rewards/margins": -0.01417645812034607, "rewards/rejected": -0.047140393406152725, "step": 1240 }, { "epoch": 0.7403020432336394, "grad_norm": 12.25, "learning_rate": 9.574984719717553e-08, "log_odds_chosen": -0.24321213364601135, "log_odds_ratio": -0.89483243227005, "logits/chosen": -2.3112952709198, "logits/rejected": -2.2951555252075195, "logps/chosen": -0.5895348191261292, "logps/rejected": -0.4669637680053711, "loss": 1.0306, "nll_loss": 0.9830119013786316, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.05895348638296127, "rewards/margins": -0.012257112190127373, "rewards/rejected": -0.04669637233018875, "step": 1250 }, { "epoch": 0.7462244595795084, "grad_norm": 9.9375, "learning_rate": 9.171413948938459e-08, "log_odds_chosen": -0.2236686646938324, "log_odds_ratio": -0.879412829875946, "logits/chosen": -2.3061726093292236, "logits/rejected": -2.254133701324463, "logps/chosen": -0.6122428178787231, "logps/rejected": -0.49692878127098083, "loss": 1.0596, "nll_loss": 0.9902396202087402, "rewards/accuracies": 0.4375, "rewards/chosen": -0.061224281787872314, "rewards/margins": -0.011531401425600052, "rewards/rejected": -0.04969288408756256, "step": 1260 }, { "epoch": 0.7521468759253775, "grad_norm": 9.4375, "learning_rate": 8.774613523764049e-08, "log_odds_chosen": -0.26704955101013184, "log_odds_ratio": -0.8915314674377441, "logits/chosen": -2.2866809368133545, "logits/rejected": -2.239720582962036, "logps/chosen": -0.5904482007026672, "logps/rejected": -0.4593755304813385, "loss": 1.0287, "nll_loss": 0.9099699854850769, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": -0.059044819325208664, "rewards/margins": -0.013107270002365112, "rewards/rejected": -0.04593754559755325, "step": 1270 }, { "epoch": 0.7580692922712466, "grad_norm": 11.125, "learning_rate": 8.384753167251412e-08, "log_odds_chosen": -0.2359321415424347, "log_odds_ratio": -0.8834274411201477, "logits/chosen": -2.241650104522705, "logits/rejected": -2.2175180912017822, "logps/chosen": -0.5696910619735718, "logps/rejected": -0.4511106610298157, "loss": 0.9877, "nll_loss": 0.8763992190361023, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.05696910619735718, "rewards/margins": -0.01185804232954979, "rewards/rejected": -0.045111071318387985, "step": 1280 }, { "epoch": 0.7639917086171157, "grad_norm": 10.1875, "learning_rate": 8.001999633988942e-08, "log_odds_chosen": -0.26344627141952515, "log_odds_ratio": -0.8965330123901367, "logits/chosen": -2.317347764968872, "logits/rejected": -2.2693257331848145, "logps/chosen": -0.5864616632461548, "logps/rejected": -0.45855003595352173, "loss": 0.9993, "nll_loss": 0.9034452438354492, "rewards/accuracies": 0.40625, "rewards/chosen": -0.058646153658628464, "rewards/margins": -0.012791156768798828, "rewards/rejected": -0.04585500434041023, "step": 1290 }, { "epoch": 0.769914124962985, "grad_norm": 10.4375, "learning_rate": 7.62651663877042e-08, "log_odds_chosen": -0.17867620289325714, "log_odds_ratio": -0.8561042547225952, "logits/chosen": -2.2582385540008545, "logits/rejected": -2.232391357421875, "logps/chosen": -0.5790480971336365, "logps/rejected": -0.4850679337978363, "loss": 1.0993, "nll_loss": 0.9781789779663086, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.057904817163944244, "rewards/margins": -0.009398018009960651, "rewards/rejected": -0.04850679263472557, "step": 1300 }, { "epoch": 0.775836541308854, "grad_norm": 9.0, "learning_rate": 7.258464786569549e-08, "log_odds_chosen": -0.2144562005996704, "log_odds_ratio": -0.8685463070869446, "logits/chosen": -2.322035551071167, "logits/rejected": -2.2717068195343018, "logps/chosen": -0.5770824551582336, "logps/rejected": -0.47164034843444824, "loss": 1.0633, "nll_loss": 0.9638098478317261, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.057708241045475006, "rewards/margins": -0.010544205084443092, "rewards/rejected": -0.04716403782367706, "step": 1310 }, { "epoch": 0.7817589576547231, "grad_norm": 12.375, "learning_rate": 6.898001503844483e-08, "log_odds_chosen": -0.3992167115211487, "log_odds_ratio": -1.0115876197814941, "logits/chosen": -2.3506951332092285, "logits/rejected": -2.3084568977355957, "logps/chosen": -0.7245315313339233, "logps/rejected": -0.4761766493320465, "loss": 1.0509, "nll_loss": 1.0009998083114624, "rewards/accuracies": 0.40625, "rewards/chosen": -0.07245315611362457, "rewards/margins": -0.0248354934155941, "rewards/rejected": -0.04761766642332077, "step": 1320 }, { "epoch": 0.7876813740005922, "grad_norm": 11.3125, "learning_rate": 6.545280971202014e-08, "log_odds_chosen": -0.17274455726146698, "log_odds_ratio": -0.8463727831840515, "logits/chosen": -2.310338020324707, "logits/rejected": -2.2806801795959473, "logps/chosen": -0.5623282194137573, "logps/rejected": -0.46932634711265564, "loss": 1.0128, "nll_loss": 0.9555832147598267, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.05623283237218857, "rewards/margins": -0.009300192818045616, "rewards/rejected": -0.046932633966207504, "step": 1330 }, { "epoch": 0.7936037903464613, "grad_norm": 11.0625, "learning_rate": 6.200454057450022e-08, "log_odds_chosen": -0.2566189169883728, "log_odds_ratio": -0.8830870389938354, "logits/chosen": -2.2640976905822754, "logits/rejected": -2.2190680503845215, "logps/chosen": -0.6031737327575684, "logps/rejected": -0.4697316586971283, "loss": 1.0756, "nll_loss": 0.9159650802612305, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.060317374765872955, "rewards/margins": -0.01334420870989561, "rewards/rejected": -0.04697316139936447, "step": 1340 }, { "epoch": 0.7995262066923304, "grad_norm": 12.0, "learning_rate": 5.863668255066492e-08, "log_odds_chosen": -0.2177290916442871, "log_odds_ratio": -0.8585535287857056, "logits/chosen": -2.262441396713257, "logits/rejected": -2.231968402862549, "logps/chosen": -0.5860260128974915, "logps/rejected": -0.47981762886047363, "loss": 1.0081, "nll_loss": 0.9461213946342468, "rewards/accuracies": 0.3687500059604645, "rewards/chosen": -0.05860259383916855, "rewards/margins": -0.010620838031172752, "rewards/rejected": -0.047981761395931244, "step": 1350 }, { "epoch": 0.8054486230381996, "grad_norm": 9.625, "learning_rate": 5.53506761711274e-08, "log_odds_chosen": -0.21258850395679474, "log_odds_ratio": -0.8654868006706238, "logits/chosen": -2.2940022945404053, "logits/rejected": -2.264361619949341, "logps/chosen": -0.5948741436004639, "logps/rejected": -0.48127132654190063, "loss": 1.0435, "nll_loss": 1.0004308223724365, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.059487421065568924, "rewards/margins": -0.0113602876663208, "rewards/rejected": -0.04812713339924812, "step": 1360 }, { "epoch": 0.8113710393840687, "grad_norm": 14.375, "learning_rate": 5.2147926956177174e-08, "log_odds_chosen": -0.3361436724662781, "log_odds_ratio": -0.9543386697769165, "logits/chosen": -2.2842912673950195, "logits/rejected": -2.2753098011016846, "logps/chosen": -0.6304486989974976, "logps/rejected": -0.4559609293937683, "loss": 1.0422, "nll_loss": 0.9697739481925964, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": -0.06304488331079483, "rewards/margins": -0.0174487866461277, "rewards/rejected": -0.04559609293937683, "step": 1370 }, { "epoch": 0.8172934557299378, "grad_norm": 22.625, "learning_rate": 4.902980481459834e-08, "log_odds_chosen": -0.18400034308433533, "log_odds_ratio": -0.8533352017402649, "logits/chosen": -2.267984390258789, "logits/rejected": -2.240002155303955, "logps/chosen": -0.5833351016044617, "logps/rejected": -0.4882822632789612, "loss": 1.0013, "nll_loss": 0.9279516935348511, "rewards/accuracies": 0.40625, "rewards/chosen": -0.058333516120910645, "rewards/margins": -0.009505288675427437, "rewards/rejected": -0.04882822558283806, "step": 1380 }, { "epoch": 0.8232158720758069, "grad_norm": 11.8125, "learning_rate": 4.5997643457719646e-08, "log_odds_chosen": -0.2714422643184662, "log_odds_ratio": -0.8982048034667969, "logits/chosen": -2.2855401039123535, "logits/rejected": -2.2796995639801025, "logps/chosen": -0.5933629274368286, "logps/rejected": -0.45899391174316406, "loss": 0.9938, "nll_loss": 0.9157652854919434, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.05933629721403122, "rewards/margins": -0.01343690324574709, "rewards/rejected": -0.045899391174316406, "step": 1390 }, { "epoch": 0.829138288421676, "grad_norm": 11.125, "learning_rate": 4.305273982894772e-08, "log_odds_chosen": -0.24461349844932556, "log_odds_ratio": -0.8896273374557495, "logits/chosen": -2.3211405277252197, "logits/rejected": -2.279554843902588, "logps/chosen": -0.6189180612564087, "logps/rejected": -0.4841720461845398, "loss": 1.041, "nll_loss": 0.9456349611282349, "rewards/accuracies": 0.4375, "rewards/chosen": -0.06189180538058281, "rewards/margins": -0.013474604114890099, "rewards/rejected": -0.04841720312833786, "step": 1400 }, { "epoch": 0.8350607047675451, "grad_norm": 11.8125, "learning_rate": 4.0196353549026786e-08, "log_odds_chosen": -0.1991504579782486, "log_odds_ratio": -0.8548718690872192, "logits/chosen": -2.288534641265869, "logits/rejected": -2.2532122135162354, "logps/chosen": -0.5849851965904236, "logps/rejected": -0.48299694061279297, "loss": 1.0681, "nll_loss": 1.0149555206298828, "rewards/accuracies": 0.4375, "rewards/chosen": -0.05849852040410042, "rewards/margins": -0.010198831558227539, "rewards/rejected": -0.04829969257116318, "step": 1410 }, { "epoch": 0.8409831211134142, "grad_norm": 9.625, "learning_rate": 3.742970637726181e-08, "log_odds_chosen": -0.09389691054821014, "log_odds_ratio": -0.8085994720458984, "logits/chosen": -2.3118512630462646, "logits/rejected": -2.2662172317504883, "logps/chosen": -0.5374116897583008, "logps/rejected": -0.4831947386264801, "loss": 1.0166, "nll_loss": 0.9142959713935852, "rewards/accuracies": 0.46875, "rewards/chosen": -0.05374116823077202, "rewards/margins": -0.005421696230769157, "rewards/rejected": -0.04831947013735771, "step": 1420 }, { "epoch": 0.8469055374592834, "grad_norm": 13.0, "learning_rate": 3.4753981688937284e-08, "log_odds_chosen": -0.23033122718334198, "log_odds_ratio": -0.8797691464424133, "logits/chosen": -2.2840065956115723, "logits/rejected": -2.2577414512634277, "logps/chosen": -0.5791336297988892, "logps/rejected": -0.46595969796180725, "loss": 1.0562, "nll_loss": 0.9663812518119812, "rewards/accuracies": 0.40625, "rewards/chosen": -0.05791335552930832, "rewards/margins": -0.011317392811179161, "rewards/rejected": -0.046595968306064606, "step": 1430 }, { "epoch": 0.8528279538051525, "grad_norm": 16.5, "learning_rate": 3.217032396915265e-08, "log_odds_chosen": -0.28934675455093384, "log_odds_ratio": -0.925268292427063, "logits/chosen": -2.294243335723877, "logits/rejected": -2.26255464553833, "logps/chosen": -0.6588538885116577, "logps/rejected": -0.4810880720615387, "loss": 1.0625, "nll_loss": 0.9974772334098816, "rewards/accuracies": 0.4312500059604645, "rewards/chosen": -0.06588538736104965, "rewards/margins": -0.01777658611536026, "rewards/rejected": -0.04810880497097969, "step": 1440 }, { "epoch": 0.8587503701510216, "grad_norm": 13.5625, "learning_rate": 2.9679838323293404e-08, "log_odds_chosen": -0.30326423048973083, "log_odds_ratio": -0.9402921795845032, "logits/chosen": -2.285403251647949, "logits/rejected": -2.2570960521698, "logps/chosen": -0.6499019265174866, "logps/rejected": -0.485442578792572, "loss": 1.0074, "nll_loss": 0.9335571527481079, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": -0.06499020010232925, "rewards/margins": -0.016445934772491455, "rewards/rejected": -0.0485442578792572, "step": 1450 }, { "epoch": 0.8646727864968907, "grad_norm": 11.875, "learning_rate": 2.728359000434488e-08, "log_odds_chosen": -0.25829392671585083, "log_odds_ratio": -0.8930153846740723, "logits/chosen": -2.316516399383545, "logits/rejected": -2.283731460571289, "logps/chosen": -0.5545108318328857, "logps/rejected": -0.4498627185821533, "loss": 1.048, "nll_loss": 0.9053192138671875, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.055451083928346634, "rewards/margins": -0.010464807972311974, "rewards/rejected": -0.04498627781867981, "step": 1460 }, { "epoch": 0.8705952028427598, "grad_norm": 10.75, "learning_rate": 2.498260395725302e-08, "log_odds_chosen": -0.25851163268089294, "log_odds_ratio": -0.8944876790046692, "logits/chosen": -2.281040906906128, "logits/rejected": -2.26870059967041, "logps/chosen": -0.6054626703262329, "logps/rejected": -0.48731446266174316, "loss": 1.0483, "nll_loss": 0.9450349807739258, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.060546260327100754, "rewards/margins": -0.011814813129603863, "rewards/rejected": -0.048731446266174316, "step": 1470 }, { "epoch": 0.8765176191886289, "grad_norm": 10.3125, "learning_rate": 2.2777864380525426e-08, "log_odds_chosen": -0.20190663635730743, "log_odds_ratio": -0.8694218397140503, "logits/chosen": -2.288378953933716, "logits/rejected": -2.2683846950531006, "logps/chosen": -0.5955201387405396, "logps/rejected": -0.4755355417728424, "loss": 1.0093, "nll_loss": 0.8863022923469543, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.059552013874053955, "rewards/margins": -0.01199845876544714, "rewards/rejected": -0.04755355045199394, "step": 1480 }, { "epoch": 0.8824400355344981, "grad_norm": 11.125, "learning_rate": 2.0670314305261423e-08, "log_odds_chosen": -0.21881277859210968, "log_odds_ratio": -0.8681440353393555, "logits/chosen": -2.3011648654937744, "logits/rejected": -2.2739992141723633, "logps/chosen": -0.5647403597831726, "logps/rejected": -0.46096763014793396, "loss": 0.9903, "nll_loss": 0.9155017733573914, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.05647404119372368, "rewards/margins": -0.010377271100878716, "rewards/rejected": -0.046096768230199814, "step": 1490 }, { "epoch": 0.8883624518803672, "grad_norm": 15.4375, "learning_rate": 1.866085519178995e-08, "log_odds_chosen": -0.21367135643959045, "log_odds_ratio": -0.8818863034248352, "logits/chosen": -2.283823013305664, "logits/rejected": -2.262935161590576, "logps/chosen": -0.6190184354782104, "logps/rejected": -0.518616259098053, "loss": 1.0823, "nll_loss": 1.0067201852798462, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.06190184876322746, "rewards/margins": -0.010040223598480225, "rewards/rejected": -0.05186162516474724, "step": 1500 }, { "epoch": 0.8942848682262363, "grad_norm": 10.0625, "learning_rate": 1.675034654408894e-08, "log_odds_chosen": -0.2969823479652405, "log_odds_ratio": -0.9049533605575562, "logits/chosen": -2.3211445808410645, "logits/rejected": -2.293593168258667, "logps/chosen": -0.5710967779159546, "logps/rejected": -0.44247856736183167, "loss": 1.0186, "nll_loss": 0.9544011354446411, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.057109683752059937, "rewards/margins": -0.012861823663115501, "rewards/rejected": -0.044247858226299286, "step": 1510 }, { "epoch": 0.9002072845721054, "grad_norm": 11.8125, "learning_rate": 1.4939605542150595e-08, "log_odds_chosen": -0.20066659152507782, "log_odds_ratio": -0.880477249622345, "logits/chosen": -2.306097984313965, "logits/rejected": -2.2691056728363037, "logps/chosen": -0.630598247051239, "logps/rejected": -0.5075589418411255, "loss": 1.0954, "nll_loss": 0.9971143007278442, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.06305982172489166, "rewards/margins": -0.01230393536388874, "rewards/rejected": -0.05075589567422867, "step": 1520 }, { "epoch": 0.9061297009179745, "grad_norm": 11.0, "learning_rate": 1.3229406692449791e-08, "log_odds_chosen": -0.14233054220676422, "log_odds_ratio": -0.8427847623825073, "logits/chosen": -2.2426674365997314, "logits/rejected": -2.2174274921417236, "logps/chosen": -0.5756295919418335, "logps/rejected": -0.4939804971218109, "loss": 1.0628, "nll_loss": 0.9542373418807983, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.05756296589970589, "rewards/margins": -0.008164914324879646, "rewards/rejected": -0.04939804598689079, "step": 1530 }, { "epoch": 0.9120521172638436, "grad_norm": 10.6875, "learning_rate": 1.162048149666503e-08, "log_odds_chosen": -0.209940105676651, "log_odds_ratio": -0.8944632411003113, "logits/chosen": -2.2973880767822266, "logits/rejected": -2.255645990371704, "logps/chosen": -0.6111503839492798, "logps/rejected": -0.49909108877182007, "loss": 1.0425, "nll_loss": 0.9487366676330566, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.061115045100450516, "rewards/margins": -0.011205929331481457, "rewards/rejected": -0.049909114837646484, "step": 1540 }, { "epoch": 0.9179745336097128, "grad_norm": 9.8125, "learning_rate": 1.0113518138794047e-08, "log_odds_chosen": -0.25878992676734924, "log_odds_ratio": -0.899122416973114, "logits/chosen": -2.2492969036102295, "logits/rejected": -2.2273764610290527, "logps/chosen": -0.5986303091049194, "logps/rejected": -0.47187572717666626, "loss": 1.0612, "nll_loss": 0.9412651062011719, "rewards/accuracies": 0.40625, "rewards/chosen": -0.059863023459911346, "rewards/margins": -0.012675456702709198, "rewards/rejected": -0.04718757048249245, "step": 1550 }, { "epoch": 0.9238969499555819, "grad_norm": 10.375, "learning_rate": 8.709161190797565e-09, "log_odds_chosen": -0.14045746624469757, "log_odds_ratio": -0.8357732892036438, "logits/chosen": -2.3169333934783936, "logits/rejected": -2.2879374027252197, "logps/chosen": -0.5618830919265747, "logps/rejected": -0.4810701012611389, "loss": 1.031, "nll_loss": 0.9191296696662903, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.05618830770254135, "rewards/margins": -0.008081300184130669, "rewards/rejected": -0.04810700938105583, "step": 1560 }, { "epoch": 0.929819366301451, "grad_norm": 15.3125, "learning_rate": 7.408011336897141e-09, "log_odds_chosen": -0.323073148727417, "log_odds_ratio": -0.9851021766662598, "logits/chosen": -2.3374483585357666, "logits/rejected": -2.3257203102111816, "logps/chosen": -0.7131141424179077, "logps/rejected": -0.4996616244316101, "loss": 1.0776, "nll_loss": 1.01613450050354, "rewards/accuracies": 0.4375, "rewards/chosen": -0.07131141424179077, "rewards/margins": -0.02134525403380394, "rewards/rejected": -0.04996616020798683, "step": 1570 }, { "epoch": 0.9357417826473201, "grad_norm": 9.5625, "learning_rate": 6.210625116645135e-09, "log_odds_chosen": -0.32444125413894653, "log_odds_ratio": -0.9329547882080078, "logits/chosen": -2.342031955718994, "logits/rejected": -2.3026318550109863, "logps/chosen": -0.6195459365844727, "logps/rejected": -0.45777615904808044, "loss": 1.0033, "nll_loss": 0.8760407567024231, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -0.061954595148563385, "rewards/margins": -0.0161769799888134, "rewards/rejected": -0.04577761888504028, "step": 1580 }, { "epoch": 0.9416641989931892, "grad_norm": 10.625, "learning_rate": 5.117514686876378e-09, "log_odds_chosen": -0.20949645340442657, "log_odds_ratio": -0.8756229281425476, "logits/chosen": -2.30104398727417, "logits/rejected": -2.2671799659729004, "logps/chosen": -0.5797516703605652, "logps/rejected": -0.4763546586036682, "loss": 1.0455, "nll_loss": 0.9568120837211609, "rewards/accuracies": 0.4312500059604645, "rewards/chosen": -0.057975172996520996, "rewards/margins": -0.010339704342186451, "rewards/rejected": -0.04763546586036682, "step": 1590 }, { "epoch": 0.9475866153390583, "grad_norm": 10.0, "learning_rate": 4.1291476026441565e-09, "log_odds_chosen": -0.14046767354011536, "log_odds_ratio": -0.8268812894821167, "logits/chosen": -2.2659006118774414, "logits/rejected": -2.245576858520508, "logps/chosen": -0.5697029829025269, "logps/rejected": -0.4846652150154114, "loss": 0.9915, "nll_loss": 0.8766274452209473, "rewards/accuracies": 0.5, "rewards/chosen": -0.056970298290252686, "rewards/margins": -0.00850378442555666, "rewards/rejected": -0.0484665185213089, "step": 1600 }, { "epoch": 0.9535090316849274, "grad_norm": 9.8125, "learning_rate": 3.2459466172331253e-09, "log_odds_chosen": -0.25180304050445557, "log_odds_ratio": -0.9306501150131226, "logits/chosen": -2.274780035018921, "logits/rejected": -2.255272626876831, "logps/chosen": -0.6529628038406372, "logps/rejected": -0.48409169912338257, "loss": 1.0873, "nll_loss": 0.9862693548202515, "rewards/accuracies": 0.40625, "rewards/chosen": -0.06529629230499268, "rewards/margins": -0.016887117177248, "rewards/rejected": -0.048409171402454376, "step": 1610 }, { "epoch": 0.9594314480307966, "grad_norm": 13.125, "learning_rate": 2.4682895013354854e-09, "log_odds_chosen": -0.230398491024971, "log_odds_ratio": -0.8930587768554688, "logits/chosen": -2.2783544063568115, "logits/rejected": -2.2587246894836426, "logps/chosen": -0.6128379702568054, "logps/rejected": -0.46944743394851685, "loss": 1.0177, "nll_loss": 0.9610903859138489, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.06128380447626114, "rewards/margins": -0.014339059591293335, "rewards/rejected": -0.0469447486102581, "step": 1620 }, { "epoch": 0.9653538643766657, "grad_norm": 25.0, "learning_rate": 1.7965088814675677e-09, "log_odds_chosen": -0.3568347692489624, "log_odds_ratio": -0.9671844244003296, "logits/chosen": -2.2762491703033447, "logits/rejected": -2.2589855194091797, "logps/chosen": -0.6517866253852844, "logps/rejected": -0.4649588167667389, "loss": 1.037, "nll_loss": 0.9754410982131958, "rewards/accuracies": 0.34375, "rewards/chosen": -0.06517866253852844, "rewards/margins": -0.018682777881622314, "rewards/rejected": -0.04649588465690613, "step": 1630 }, { "epoch": 0.9712762807225348, "grad_norm": 10.8125, "learning_rate": 1.2308920976958348e-09, "log_odds_chosen": -0.1785418540239334, "log_odds_ratio": -0.8583124876022339, "logits/chosen": -2.2591869831085205, "logits/rejected": -2.2376914024353027, "logps/chosen": -0.5955510139465332, "logps/rejected": -0.4926881790161133, "loss": 1.0026, "nll_loss": 0.9147430658340454, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.05955510213971138, "rewards/margins": -0.010286283679306507, "rewards/rejected": -0.04926881566643715, "step": 1640 }, { "epoch": 0.9771986970684039, "grad_norm": 11.9375, "learning_rate": 7.716810807330276e-10, "log_odds_chosen": -0.30667099356651306, "log_odds_ratio": -0.9143903851509094, "logits/chosen": -2.2759385108947754, "logits/rejected": -2.2378878593444824, "logps/chosen": -0.6037041544914246, "logps/rejected": -0.45009493827819824, "loss": 1.0344, "nll_loss": 0.93921959400177, "rewards/accuracies": 0.35624998807907104, "rewards/chosen": -0.06037042289972305, "rewards/margins": -0.015360923483967781, "rewards/rejected": -0.045009493827819824, "step": 1650 }, { "epoch": 0.983121113414273, "grad_norm": 17.25, "learning_rate": 4.190722484575804e-10, "log_odds_chosen": -0.24070534110069275, "log_odds_ratio": -0.9141713976860046, "logits/chosen": -2.285658597946167, "logits/rejected": -2.2573189735412598, "logps/chosen": -0.6545957326889038, "logps/rejected": -0.4952670931816101, "loss": 1.0545, "nll_loss": 0.9895190000534058, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.06545957177877426, "rewards/margins": -0.01593286357820034, "rewards/rejected": -0.04952671006321907, "step": 1660 }, { "epoch": 0.9890435297601421, "grad_norm": 9.1875, "learning_rate": 1.732164218998522e-10, "log_odds_chosen": -0.2650103271007538, "log_odds_ratio": -0.8960719108581543, "logits/chosen": -2.2581698894500732, "logits/rejected": -2.2162814140319824, "logps/chosen": -0.6056646704673767, "logps/rejected": -0.4769059717655182, "loss": 1.0107, "nll_loss": 0.9156764149665833, "rewards/accuracies": 0.39375001192092896, "rewards/chosen": -0.06056647375226021, "rewards/margins": -0.0128758754581213, "rewards/rejected": -0.04769059270620346, "step": 1670 }, { "epoch": 0.9949659461060113, "grad_norm": 12.0, "learning_rate": 3.4218760731730136e-11, "log_odds_chosen": -0.21042411029338837, "log_odds_ratio": -0.8711256980895996, "logits/chosen": -2.333160638809204, "logits/rejected": -2.2931103706359863, "logps/chosen": -0.5873175859451294, "logps/rejected": -0.47885292768478394, "loss": 1.066, "nll_loss": 0.9840106964111328, "rewards/accuracies": 0.40625, "rewards/chosen": -0.05873175337910652, "rewards/margins": -0.010846461169421673, "rewards/rejected": -0.047885291278362274, "step": 1680 }, { "epoch": 0.9997038791827065, "step": 1688, "total_flos": 0.0, "train_loss": 1.076995034918401, "train_runtime": 25716.0251, "train_samples_per_second": 2.101, "train_steps_per_second": 0.066 } ], "logging_steps": 10, "max_steps": 1688, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }