diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3066 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9997038791827065, + "eval_steps": 500, + "global_step": 1688, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.005922416345869114, + "grad_norm": 26.875, + "learning_rate": 2.9585798816568044e-08, + "log_odds_chosen": -0.4994420111179352, + "log_odds_ratio": -1.0620524883270264, + "logits/chosen": -2.227687358856201, + "logits/rejected": -2.213762044906616, + "logps/chosen": -0.7160366773605347, + "logps/rejected": -0.47193747758865356, + "loss": 1.3693, + "nll_loss": 1.2856990098953247, + "rewards/accuracies": 0.30000001192092896, + "rewards/chosen": -0.0716036707162857, + "rewards/margins": -0.02440992370247841, + "rewards/rejected": -0.047193750739097595, + "step": 10 + }, + { + "epoch": 0.011844832691738229, + "grad_norm": 26.5, + "learning_rate": 5.917159763313609e-08, + "log_odds_chosen": -0.6077697277069092, + "log_odds_ratio": -1.154677152633667, + "logits/chosen": -2.1866495609283447, + "logits/rejected": -2.1631338596343994, + "logps/chosen": -0.8245598077774048, + "logps/rejected": -0.4715619683265686, + "loss": 1.3378, + "nll_loss": 1.228305459022522, + "rewards/accuracies": 0.30000001192092896, + "rewards/chosen": -0.08245597779750824, + "rewards/margins": -0.03529978543519974, + "rewards/rejected": -0.0471561960875988, + "step": 20 + }, + { + "epoch": 0.017767249037607343, + "grad_norm": 29.125, + "learning_rate": 8.875739644970414e-08, + "log_odds_chosen": -0.5950562357902527, + "log_odds_ratio": -1.171638011932373, + "logits/chosen": -2.152902126312256, + "logits/rejected": -2.1443581581115723, + "logps/chosen": -0.854525089263916, + "logps/rejected": -0.49298763275146484, + "loss": 1.3488, + "nll_loss": 1.3134263753890991, + "rewards/accuracies": 0.3062500059604645, + "rewards/chosen": -0.08545249700546265, + "rewards/margins": -0.03615374490618706, + "rewards/rejected": -0.049298763275146484, + "step": 30 + }, + { + "epoch": 0.023689665383476458, + "grad_norm": 31.25, + "learning_rate": 1.1834319526627217e-07, + "log_odds_chosen": -0.5344940423965454, + "log_odds_ratio": -1.0923480987548828, + "logits/chosen": -2.219038486480713, + "logits/rejected": -2.2063724994659424, + "logps/chosen": -0.7574710845947266, + "logps/rejected": -0.4638025760650635, + "loss": 1.3817, + "nll_loss": 1.2359822988510132, + "rewards/accuracies": 0.33125001192092896, + "rewards/chosen": -0.07574710994958878, + "rewards/margins": -0.02936685085296631, + "rewards/rejected": -0.04638025909662247, + "step": 40 + }, + { + "epoch": 0.029612081729345572, + "grad_norm": 28.0, + "learning_rate": 1.4792899408284022e-07, + "log_odds_chosen": -0.4542032778263092, + "log_odds_ratio": -1.0256363153457642, + "logits/chosen": -2.1617987155914307, + "logits/rejected": -2.146223545074463, + "logps/chosen": -0.7006078958511353, + "logps/rejected": -0.47175368666648865, + "loss": 1.3127, + "nll_loss": 1.2409818172454834, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.07006079703569412, + "rewards/margins": -0.022885426878929138, + "rewards/rejected": -0.047175366431474686, + "step": 50 + }, + { + "epoch": 0.035534498075214686, + "grad_norm": 23.125, + "learning_rate": 1.7751479289940827e-07, + "log_odds_chosen": -0.6588231921195984, + "log_odds_ratio": -1.230991005897522, + "logits/chosen": -2.2183756828308105, + "logits/rejected": -2.187129259109497, + "logps/chosen": -0.8897625207901001, + "logps/rejected": -0.4612082540988922, + "loss": 1.3568, + "nll_loss": 1.2310936450958252, + "rewards/accuracies": 0.36250001192092896, + "rewards/chosen": -0.08897626399993896, + "rewards/margins": -0.04285542666912079, + "rewards/rejected": -0.04612082242965698, + "step": 60 + }, + { + "epoch": 0.041456914421083804, + "grad_norm": 30.75, + "learning_rate": 2.0710059171597633e-07, + "log_odds_chosen": -0.5367478132247925, + "log_odds_ratio": -1.1100060939788818, + "logits/chosen": -2.232348918914795, + "logits/rejected": -2.1998302936553955, + "logps/chosen": -0.7972711324691772, + "logps/rejected": -0.4634431302547455, + "loss": 1.3614, + "nll_loss": 1.2567493915557861, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.07972709834575653, + "rewards/margins": -0.03338279575109482, + "rewards/rejected": -0.04634431377053261, + "step": 70 + }, + { + "epoch": 0.047379330766952915, + "grad_norm": 28.875, + "learning_rate": 2.3668639053254435e-07, + "log_odds_chosen": -0.5755403637886047, + "log_odds_ratio": -1.156178593635559, + "logits/chosen": -2.197105884552002, + "logits/rejected": -2.186234474182129, + "logps/chosen": -0.7956789135932922, + "logps/rejected": -0.4599471688270569, + "loss": 1.3327, + "nll_loss": 1.219543695449829, + "rewards/accuracies": 0.3687500059604645, + "rewards/chosen": -0.07956788688898087, + "rewards/margins": -0.033573172986507416, + "rewards/rejected": -0.04599471390247345, + "step": 80 + }, + { + "epoch": 0.05330174711282203, + "grad_norm": 25.25, + "learning_rate": 2.662721893491124e-07, + "log_odds_chosen": -0.5014861226081848, + "log_odds_ratio": -1.0729024410247803, + "logits/chosen": -2.1807546615600586, + "logits/rejected": -2.1571853160858154, + "logps/chosen": -0.7198914289474487, + "logps/rejected": -0.46573418378829956, + "loss": 1.3113, + "nll_loss": 1.224487066268921, + "rewards/accuracies": 0.3125, + "rewards/chosen": -0.07198914140462875, + "rewards/margins": -0.025415724143385887, + "rewards/rejected": -0.046573419123888016, + "step": 90 + }, + { + "epoch": 0.059224163458691144, + "grad_norm": 25.125, + "learning_rate": 2.9585798816568045e-07, + "log_odds_chosen": -0.4174951910972595, + "log_odds_ratio": -0.9966305494308472, + "logits/chosen": -2.2450003623962402, + "logits/rejected": -2.199430465698242, + "logps/chosen": -0.6903594732284546, + "logps/rejected": -0.4920008182525635, + "loss": 1.2864, + "nll_loss": 1.2207610607147217, + "rewards/accuracies": 0.33125001192092896, + "rewards/chosen": -0.06903595477342606, + "rewards/margins": -0.01983586512506008, + "rewards/rejected": -0.04920008033514023, + "step": 100 + }, + { + "epoch": 0.06514657980456026, + "grad_norm": 19.375, + "learning_rate": 3.254437869822485e-07, + "log_odds_chosen": -0.4817837178707123, + "log_odds_ratio": -1.0484408140182495, + "logits/chosen": -2.195328950881958, + "logits/rejected": -2.172029972076416, + "logps/chosen": -0.7407166361808777, + "logps/rejected": -0.4809334874153137, + "loss": 1.2292, + "nll_loss": 1.1185578107833862, + "rewards/accuracies": 0.3125, + "rewards/chosen": -0.07407166808843613, + "rewards/margins": -0.025978317484259605, + "rewards/rejected": -0.04809335619211197, + "step": 110 + }, + { + "epoch": 0.07106899615042937, + "grad_norm": 26.25, + "learning_rate": 3.5502958579881655e-07, + "log_odds_chosen": -0.5086492300033569, + "log_odds_ratio": -1.073943018913269, + "logits/chosen": -2.2213022708892822, + "logits/rejected": -2.210648536682129, + "logps/chosen": -0.7544690370559692, + "logps/rejected": -0.47401171922683716, + "loss": 1.2703, + "nll_loss": 1.1549344062805176, + "rewards/accuracies": 0.3375000059604645, + "rewards/chosen": -0.07544689625501633, + "rewards/margins": -0.02804572507739067, + "rewards/rejected": -0.04740116745233536, + "step": 120 + }, + { + "epoch": 0.07699141249629848, + "grad_norm": 121.5, + "learning_rate": 3.8461538461538463e-07, + "log_odds_chosen": -0.6346783638000488, + "log_odds_ratio": -1.208389401435852, + "logits/chosen": -2.205939292907715, + "logits/rejected": -2.1982388496398926, + "logps/chosen": -0.8659466505050659, + "logps/rejected": -0.4501543939113617, + "loss": 1.3049, + "nll_loss": 1.1621254682540894, + "rewards/accuracies": 0.34375, + "rewards/chosen": -0.08659467846155167, + "rewards/margins": -0.0415792390704155, + "rewards/rejected": -0.04501544311642647, + "step": 130 + }, + { + "epoch": 0.08291382884216761, + "grad_norm": 18.625, + "learning_rate": 4.1420118343195265e-07, + "log_odds_chosen": -0.4532869756221771, + "log_odds_ratio": -1.0120022296905518, + "logits/chosen": -2.2359938621520996, + "logits/rejected": -2.2115871906280518, + "logps/chosen": -0.6659095287322998, + "logps/rejected": -0.4467584490776062, + "loss": 1.2027, + "nll_loss": 1.0797432661056519, + "rewards/accuracies": 0.33125001192092896, + "rewards/chosen": -0.06659095734357834, + "rewards/margins": -0.02191510982811451, + "rewards/rejected": -0.04467584565281868, + "step": 140 + }, + { + "epoch": 0.08883624518803672, + "grad_norm": 29.75, + "learning_rate": 4.437869822485207e-07, + "log_odds_chosen": -0.46737051010131836, + "log_odds_ratio": -1.0146253108978271, + "logits/chosen": -2.16318941116333, + "logits/rejected": -2.1556496620178223, + "logps/chosen": -0.7067540287971497, + "logps/rejected": -0.47525158524513245, + "loss": 1.217, + "nll_loss": 1.1824976205825806, + "rewards/accuracies": 0.29374998807907104, + "rewards/chosen": -0.07067539542913437, + "rewards/margins": -0.023150241002440453, + "rewards/rejected": -0.047525160014629364, + "step": 150 + }, + { + "epoch": 0.09475866153390583, + "grad_norm": 20.75, + "learning_rate": 4.733727810650887e-07, + "log_odds_chosen": -0.31778836250305176, + "log_odds_ratio": -0.9325827360153198, + "logits/chosen": -2.2458879947662354, + "logits/rejected": -2.2277491092681885, + "logps/chosen": -0.6050869822502136, + "logps/rejected": -0.4580734372138977, + "loss": 1.2157, + "nll_loss": 1.0979220867156982, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06050870940089226, + "rewards/margins": -0.014701364561915398, + "rewards/rejected": -0.04580734297633171, + "step": 160 + }, + { + "epoch": 0.10068107787977496, + "grad_norm": 14.3125, + "learning_rate": 4.999994653198566e-07, + "log_odds_chosen": -0.44623684883117676, + "log_odds_ratio": -1.0507714748382568, + "logits/chosen": -2.273740530014038, + "logits/rejected": -2.248004198074341, + "logps/chosen": -0.744641900062561, + "logps/rejected": -0.4939740300178528, + "loss": 1.2442, + "nll_loss": 1.0892422199249268, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.07446418702602386, + "rewards/margins": -0.0250667966902256, + "rewards/rejected": -0.04939739406108856, + "step": 170 + }, + { + "epoch": 0.10660349422564407, + "grad_norm": 12.5625, + "learning_rate": 4.999353064699471e-07, + "log_odds_chosen": -0.5144436955451965, + "log_odds_ratio": -1.1169707775115967, + "logits/chosen": -2.2361178398132324, + "logits/rejected": -2.2026758193969727, + "logps/chosen": -0.8099610209465027, + "logps/rejected": -0.49819788336753845, + "loss": 1.1022, + "nll_loss": 1.0261476039886475, + "rewards/accuracies": 0.33125001192092896, + "rewards/chosen": -0.08099609613418579, + "rewards/margins": -0.031176313757896423, + "rewards/rejected": -0.049819789826869965, + "step": 180 + }, + { + "epoch": 0.11252591057151318, + "grad_norm": 12.875, + "learning_rate": 4.99764243036258e-07, + "log_odds_chosen": -0.4125841557979584, + "log_odds_ratio": -0.991108775138855, + "logits/chosen": -2.268022298812866, + "logits/rejected": -2.240299701690674, + "logps/chosen": -0.6463659405708313, + "logps/rejected": -0.4517286717891693, + "loss": 1.1318, + "nll_loss": 1.0371661186218262, + "rewards/accuracies": 0.35624998807907104, + "rewards/chosen": -0.06463660299777985, + "rewards/margins": -0.01946372725069523, + "rewards/rejected": -0.04517286270856857, + "step": 190 + }, + { + "epoch": 0.11844832691738229, + "grad_norm": 12.0, + "learning_rate": 4.994863481875841e-07, + "log_odds_chosen": -0.38528627157211304, + "log_odds_ratio": -0.9595619440078735, + "logits/chosen": -2.217349052429199, + "logits/rejected": -2.1852166652679443, + "logps/chosen": -0.6334083676338196, + "logps/rejected": -0.4435149133205414, + "loss": 1.1246, + "nll_loss": 0.9835959672927856, + "rewards/accuracies": 0.30000001192092896, + "rewards/chosen": -0.06334083527326584, + "rewards/margins": -0.01898934319615364, + "rewards/rejected": -0.044351495802402496, + "step": 200 + }, + { + "epoch": 0.12437074326325141, + "grad_norm": 11.5, + "learning_rate": 4.991017407876165e-07, + "log_odds_chosen": -0.429326593875885, + "log_odds_ratio": -1.002436876296997, + "logits/chosen": -2.224944591522217, + "logits/rejected": -2.1807491779327393, + "logps/chosen": -0.7087312936782837, + "logps/rejected": -0.49742716550827026, + "loss": 1.0953, + "nll_loss": 1.0195242166519165, + "rewards/accuracies": 0.3375000059604645, + "rewards/chosen": -0.07087312638759613, + "rewards/margins": -0.021130409091711044, + "rewards/rejected": -0.049742721021175385, + "step": 210 + }, + { + "epoch": 0.13029315960912052, + "grad_norm": 10.6875, + "learning_rate": 4.98610585344102e-07, + "log_odds_chosen": -0.2424849271774292, + "log_odds_ratio": -0.9048135876655579, + "logits/chosen": -2.2507550716400146, + "logits/rejected": -2.217257499694824, + "logps/chosen": -0.6068475246429443, + "logps/rejected": -0.4904823899269104, + "loss": 1.1278, + "nll_loss": 1.0603684186935425, + "rewards/accuracies": 0.34375, + "rewards/chosen": -0.06068475916981697, + "rewards/margins": -0.011636516079306602, + "rewards/rejected": -0.04904823377728462, + "step": 220 + }, + { + "epoch": 0.13621557595498965, + "grad_norm": 9.875, + "learning_rate": 4.980130919384768e-07, + "log_odds_chosen": -0.5562174916267395, + "log_odds_ratio": -1.0973405838012695, + "logits/chosen": -2.246185779571533, + "logits/rejected": -2.2379026412963867, + "logps/chosen": -0.7477759122848511, + "logps/rejected": -0.4505345821380615, + "loss": 1.1333, + "nll_loss": 1.0181388854980469, + "rewards/accuracies": 0.28125, + "rewards/chosen": -0.07477758824825287, + "rewards/margins": -0.029724130406975746, + "rewards/rejected": -0.04505345970392227, + "step": 230 + }, + { + "epoch": 0.14213799230085875, + "grad_norm": 11.5625, + "learning_rate": 4.973095161360105e-07, + "log_odds_chosen": -0.425253689289093, + "log_odds_ratio": -1.0029823780059814, + "logits/chosen": -2.242088794708252, + "logits/rejected": -2.2122817039489746, + "logps/chosen": -0.68077552318573, + "logps/rejected": -0.48119717836380005, + "loss": 1.1443, + "nll_loss": 1.063909649848938, + "rewards/accuracies": 0.29374998807907104, + "rewards/chosen": -0.06807754933834076, + "rewards/margins": -0.01995784044265747, + "rewards/rejected": -0.048119716346263885, + "step": 240 + }, + { + "epoch": 0.14806040864672787, + "grad_norm": 10.3125, + "learning_rate": 4.965001588764913e-07, + "log_odds_chosen": -0.4351120889186859, + "log_odds_ratio": -1.013584852218628, + "logits/chosen": -2.2702879905700684, + "logits/rejected": -2.2400031089782715, + "logps/chosen": -0.6880632638931274, + "logps/rejected": -0.4528827667236328, + "loss": 1.1299, + "nll_loss": 1.0191699266433716, + "rewards/accuracies": 0.34375, + "rewards/chosen": -0.06880633533000946, + "rewards/margins": -0.023518051952123642, + "rewards/rejected": -0.04528827592730522, + "step": 250 + }, + { + "epoch": 0.15398282499259697, + "grad_norm": 11.1875, + "learning_rate": 4.955853663455072e-07, + "log_odds_chosen": -0.30220693349838257, + "log_odds_ratio": -0.9368545413017273, + "logits/chosen": -2.257448673248291, + "logits/rejected": -2.227647542953491, + "logps/chosen": -0.6458665728569031, + "logps/rejected": -0.4764745235443115, + "loss": 1.0645, + "nll_loss": 0.9644678235054016, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.06458665430545807, + "rewards/margins": -0.016939211636781693, + "rewards/rejected": -0.04764745384454727, + "step": 260 + }, + { + "epoch": 0.1599052413384661, + "grad_norm": 10.875, + "learning_rate": 4.945655298263713e-07, + "log_odds_chosen": -0.41390785574913025, + "log_odds_ratio": -0.9837135076522827, + "logits/chosen": -2.20629620552063, + "logits/rejected": -2.1831986904144287, + "logps/chosen": -0.6674059629440308, + "logps/rejected": -0.46569353342056274, + "loss": 1.1528, + "nll_loss": 1.0888841152191162, + "rewards/accuracies": 0.3062500059604645, + "rewards/chosen": -0.06674060225486755, + "rewards/margins": -0.02017124928534031, + "rewards/rejected": -0.046569354832172394, + "step": 270 + }, + { + "epoch": 0.16582765768433522, + "grad_norm": 9.5625, + "learning_rate": 4.934410855327585e-07, + "log_odds_chosen": -0.3461267352104187, + "log_odds_ratio": -0.9425566792488098, + "logits/chosen": -2.2884914875030518, + "logits/rejected": -2.27152943611145, + "logps/chosen": -0.6492639780044556, + "logps/rejected": -0.46900925040245056, + "loss": 1.0682, + "nll_loss": 1.0291364192962646, + "rewards/accuracies": 0.34375, + "rewards/chosen": -0.0649264007806778, + "rewards/margins": -0.018025478348135948, + "rewards/rejected": -0.0469009205698967, + "step": 280 + }, + { + "epoch": 0.1717500740302043, + "grad_norm": 11.0, + "learning_rate": 4.922125144221252e-07, + "log_odds_chosen": -0.38331133127212524, + "log_odds_ratio": -0.9734469652175903, + "logits/chosen": -2.2513084411621094, + "logits/rejected": -2.199239492416382, + "logps/chosen": -0.6518736481666565, + "logps/rejected": -0.4689255356788635, + "loss": 1.1269, + "nll_loss": 1.0506547689437866, + "rewards/accuracies": 0.35624998807907104, + "rewards/chosen": -0.06518735736608505, + "rewards/margins": -0.018294811248779297, + "rewards/rejected": -0.04689255356788635, + "step": 290 + }, + { + "epoch": 0.17767249037607344, + "grad_norm": 10.625, + "learning_rate": 4.90880341989989e-07, + "log_odds_chosen": -0.295235276222229, + "log_odds_ratio": -0.9132793545722961, + "logits/chosen": -2.255086660385132, + "logits/rejected": -2.2318952083587646, + "logps/chosen": -0.6402678489685059, + "logps/rejected": -0.48136910796165466, + "loss": 1.0909, + "nll_loss": 1.0022283792495728, + "rewards/accuracies": 0.34375, + "rewards/chosen": -0.06402678042650223, + "rewards/margins": -0.015889868140220642, + "rewards/rejected": -0.048136912286281586, + "step": 300 + }, + { + "epoch": 0.18359490672194256, + "grad_norm": 9.875, + "learning_rate": 4.894451380451589e-07, + "log_odds_chosen": -0.4930775761604309, + "log_odds_ratio": -1.0459508895874023, + "logits/chosen": -2.2340633869171143, + "logits/rejected": -2.2229130268096924, + "logps/chosen": -0.7189785242080688, + "logps/rejected": -0.46092820167541504, + "loss": 1.116, + "nll_loss": 1.0077855587005615, + "rewards/accuracies": 0.3187499940395355, + "rewards/chosen": -0.07189784944057465, + "rewards/margins": -0.02580503560602665, + "rewards/rejected": -0.046092819422483444, + "step": 310 + }, + { + "epoch": 0.18951732306781166, + "grad_norm": 11.5625, + "learning_rate": 4.879075164660124e-07, + "log_odds_chosen": -0.29097312688827515, + "log_odds_ratio": -0.9061079025268555, + "logits/chosen": -2.238163471221924, + "logits/rejected": -2.2072105407714844, + "logps/chosen": -0.6175664067268372, + "logps/rejected": -0.47239384055137634, + "loss": 1.0495, + "nll_loss": 0.9289931058883667, + "rewards/accuracies": 0.33125001192092896, + "rewards/chosen": -0.06175662949681282, + "rewards/margins": -0.014517253264784813, + "rewards/rejected": -0.047239381819963455, + "step": 320 + }, + { + "epoch": 0.19543973941368079, + "grad_norm": 12.25, + "learning_rate": 4.862681349379212e-07, + "log_odds_chosen": -0.33382827043533325, + "log_odds_ratio": -0.939583420753479, + "logits/chosen": -2.244995594024658, + "logits/rejected": -2.1931443214416504, + "logps/chosen": -0.6333972811698914, + "logps/rejected": -0.4775928556919098, + "loss": 1.1124, + "nll_loss": 1.0409491062164307, + "rewards/accuracies": 0.35624998807907104, + "rewards/chosen": -0.0633397176861763, + "rewards/margins": -0.01558043621480465, + "rewards/rejected": -0.0477592833340168, + "step": 330 + }, + { + "epoch": 0.2013621557595499, + "grad_norm": 9.6875, + "learning_rate": 4.8452769467194e-07, + "log_odds_chosen": -0.3502793610095978, + "log_odds_ratio": -0.9458521604537964, + "logits/chosen": -2.2533793449401855, + "logits/rejected": -2.231985092163086, + "logps/chosen": -0.6348416209220886, + "logps/rejected": -0.46244215965270996, + "loss": 1.0872, + "nll_loss": 0.965823769569397, + "rewards/accuracies": 0.35624998807907104, + "rewards/chosen": -0.06348416954278946, + "rewards/margins": -0.017239956185221672, + "rewards/rejected": -0.04624421149492264, + "step": 340 + }, + { + "epoch": 0.207284572105419, + "grad_norm": 9.625, + "learning_rate": 4.82686940104879e-07, + "log_odds_chosen": -0.37014713883399963, + "log_odds_ratio": -0.9843534231185913, + "logits/chosen": -2.296128511428833, + "logits/rejected": -2.267141103744507, + "logps/chosen": -0.6616524457931519, + "logps/rejected": -0.4461567997932434, + "loss": 1.0383, + "nll_loss": 0.9294153451919556, + "rewards/accuracies": 0.34375, + "rewards/chosen": -0.0661652460694313, + "rewards/margins": -0.02154957316815853, + "rewards/rejected": -0.04461567848920822, + "step": 350 + }, + { + "epoch": 0.21320698845128813, + "grad_norm": 9.6875, + "learning_rate": 4.807466585808856e-07, + "log_odds_chosen": -0.2995724380016327, + "log_odds_ratio": -0.9168221354484558, + "logits/chosen": -2.274096727371216, + "logits/rejected": -2.2658305168151855, + "logps/chosen": -0.5940972566604614, + "logps/rejected": -0.46015462279319763, + "loss": 1.0942, + "nll_loss": 0.9911165237426758, + "rewards/accuracies": 0.3687500059604645, + "rewards/chosen": -0.0594097301363945, + "rewards/margins": -0.013394266366958618, + "rewards/rejected": -0.04601546376943588, + "step": 360 + }, + { + "epoch": 0.21912940479715723, + "grad_norm": 13.5625, + "learning_rate": 4.787076800146752e-07, + "log_odds_chosen": -0.27963608503341675, + "log_odds_ratio": -0.9352908134460449, + "logits/chosen": -2.2542636394500732, + "logits/rejected": -2.2058660984039307, + "logps/chosen": -0.6458699107170105, + "logps/rejected": -0.468344509601593, + "loss": 1.0125, + "nll_loss": 0.9038776159286499, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.06458699703216553, + "rewards/margins": -0.017752548679709435, + "rewards/rejected": -0.046834446489810944, + "step": 370 + }, + { + "epoch": 0.22505182114302635, + "grad_norm": 10.0, + "learning_rate": 4.765708765365526e-07, + "log_odds_chosen": -0.2566812038421631, + "log_odds_ratio": -0.9025079011917114, + "logits/chosen": -2.2573628425598145, + "logits/rejected": -2.2479588985443115, + "logps/chosen": -0.5893818140029907, + "logps/rejected": -0.4597233235836029, + "loss": 1.1093, + "nll_loss": 0.9725319147109985, + "rewards/accuracies": 0.40625, + "rewards/chosen": -0.05893818661570549, + "rewards/margins": -0.012965850532054901, + "rewards/rejected": -0.04597233235836029, + "step": 380 + }, + { + "epoch": 0.23097423748889548, + "grad_norm": 12.25, + "learning_rate": 4.7433716211937587e-07, + "log_odds_chosen": -0.4499928057193756, + "log_odds_ratio": -1.0134861469268799, + "logits/chosen": -2.3190252780914307, + "logits/rejected": -2.297466516494751, + "logps/chosen": -0.655422568321228, + "logps/rejected": -0.43357038497924805, + "loss": 1.0471, + "nll_loss": 1.008756399154663, + "rewards/accuracies": 0.34375, + "rewards/chosen": -0.06554224342107773, + "rewards/margins": -0.02218521013855934, + "rewards/rejected": -0.043357037007808685, + "step": 390 + }, + { + "epoch": 0.23689665383476458, + "grad_norm": 9.3125, + "learning_rate": 4.720074921876245e-07, + "log_odds_chosen": -0.3851686120033264, + "log_odds_ratio": -0.9778718948364258, + "logits/chosen": -2.325918674468994, + "logits/rejected": -2.2813212871551514, + "logps/chosen": -0.6249781847000122, + "logps/rejected": -0.45036381483078003, + "loss": 1.0507, + "nll_loss": 0.9533747434616089, + "rewards/accuracies": 0.3812499940395355, + "rewards/chosen": -0.0624978169798851, + "rewards/margins": -0.01746143028140068, + "rewards/rejected": -0.04503639414906502, + "step": 400 + }, + { + "epoch": 0.2428190701806337, + "grad_norm": 10.5625, + "learning_rate": 4.6958286320873593e-07, + "log_odds_chosen": -0.38822251558303833, + "log_odds_ratio": -0.9542675018310547, + "logits/chosen": -2.2724270820617676, + "logits/rejected": -2.27009916305542, + "logps/chosen": -0.6122742891311646, + "logps/rejected": -0.4287818372249603, + "loss": 1.0679, + "nll_loss": 1.0051120519638062, + "rewards/accuracies": 0.3125, + "rewards/chosen": -0.06122744083404541, + "rewards/margins": -0.018349256366491318, + "rewards/rejected": -0.04287818819284439, + "step": 410 + }, + { + "epoch": 0.24874148652650283, + "grad_norm": 10.25, + "learning_rate": 4.6706431226688804e-07, + "log_odds_chosen": -0.30081695318222046, + "log_odds_ratio": -0.921572208404541, + "logits/chosen": -2.2560360431671143, + "logits/rejected": -2.2262086868286133, + "logps/chosen": -0.6127408742904663, + "logps/rejected": -0.4595797061920166, + "loss": 1.0784, + "nll_loss": 0.9788911938667297, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.06127409264445305, + "rewards/margins": -0.015316121280193329, + "rewards/rejected": -0.04595796763896942, + "step": 420 + }, + { + "epoch": 0.25466390287237195, + "grad_norm": 9.375, + "learning_rate": 4.6445291661940777e-07, + "log_odds_chosen": -0.2526037096977234, + "log_odds_ratio": -0.8853398561477661, + "logits/chosen": -2.274932861328125, + "logits/rejected": -2.2737860679626465, + "logps/chosen": -0.5831697583198547, + "logps/rejected": -0.4629867672920227, + "loss": 1.0351, + "nll_loss": 0.9002013206481934, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05831696838140488, + "rewards/margins": -0.012018295004963875, + "rewards/rejected": -0.04629867523908615, + "step": 430 + }, + { + "epoch": 0.26058631921824105, + "grad_norm": 13.0, + "learning_rate": 4.6174979323599715e-07, + "log_odds_chosen": -0.4437042772769928, + "log_odds_ratio": -1.0250940322875977, + "logits/chosen": -2.2592310905456543, + "logits/rejected": -2.2114596366882324, + "logps/chosen": -0.7022743821144104, + "logps/rejected": -0.4603559374809265, + "loss": 1.0967, + "nll_loss": 1.0961658954620361, + "rewards/accuracies": 0.3187499940395355, + "rewards/chosen": -0.07022743672132492, + "rewards/margins": -0.02419184148311615, + "rewards/rejected": -0.04603559896349907, + "step": 440 + }, + { + "epoch": 0.26650873556411014, + "grad_norm": 9.5, + "learning_rate": 4.5895609832097277e-07, + "log_odds_chosen": -0.3050179183483124, + "log_odds_ratio": -0.9421980977058411, + "logits/chosen": -2.2684884071350098, + "logits/rejected": -2.2559661865234375, + "logps/chosen": -0.6401418447494507, + "logps/rejected": -0.46939319372177124, + "loss": 1.0745, + "nll_loss": 0.96502685546875, + "rewards/accuracies": 0.38749998807907104, + "rewards/chosen": -0.06401418894529343, + "rewards/margins": -0.01707487180829048, + "rewards/rejected": -0.04693932086229324, + "step": 450 + }, + { + "epoch": 0.2724311519099793, + "grad_norm": 10.1875, + "learning_rate": 4.560730268187236e-07, + "log_odds_chosen": -0.26763516664505005, + "log_odds_ratio": -0.8960734605789185, + "logits/chosen": -2.266759157180786, + "logits/rejected": -2.230344533920288, + "logps/chosen": -0.57380610704422, + "logps/rejected": -0.45090922713279724, + "loss": 1.0544, + "nll_loss": 0.9469722509384155, + "rewards/accuracies": 0.4437499940395355, + "rewards/chosen": -0.05738060921430588, + "rewards/margins": -0.012289688922464848, + "rewards/rejected": -0.0450909249484539, + "step": 460 + }, + { + "epoch": 0.2783535682558484, + "grad_norm": 11.1875, + "learning_rate": 4.531018119025989e-07, + "log_odds_chosen": -0.19471798837184906, + "log_odds_ratio": -0.8877772092819214, + "logits/chosen": -2.325700283050537, + "logits/rejected": -2.3014023303985596, + "logps/chosen": -0.5948117971420288, + "logps/rejected": -0.5260331630706787, + "loss": 1.0872, + "nll_loss": 1.042905569076538, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.059481192380189896, + "rewards/margins": -0.006877871695905924, + "rewards/rejected": -0.05260331556200981, + "step": 470 + }, + { + "epoch": 0.2842759846017175, + "grad_norm": 10.1875, + "learning_rate": 4.5004372444744376e-07, + "log_odds_chosen": -0.20854365825653076, + "log_odds_ratio": -0.8700854182243347, + "logits/chosen": -2.267329454421997, + "logits/rejected": -2.2475056648254395, + "logps/chosen": -0.610100269317627, + "logps/rejected": -0.49854737520217896, + "loss": 1.0582, + "nll_loss": 0.982585608959198, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.061010025441646576, + "rewards/margins": -0.011155293323099613, + "rewards/rejected": -0.04985473304986954, + "step": 480 + }, + { + "epoch": 0.2901984009475866, + "grad_norm": 11.125, + "learning_rate": 4.4690007248600967e-07, + "log_odds_chosen": -0.30316418409347534, + "log_odds_ratio": -0.9258543848991394, + "logits/chosen": -2.260499954223633, + "logits/rejected": -2.2460737228393555, + "logps/chosen": -0.6183134913444519, + "logps/rejected": -0.4603392481803894, + "loss": 1.0569, + "nll_loss": 0.9751143455505371, + "rewards/accuracies": 0.38749998807907104, + "rewards/chosen": -0.06183135509490967, + "rewards/margins": -0.01579742692410946, + "rewards/rejected": -0.04603392630815506, + "step": 490 + }, + { + "epoch": 0.29612081729345574, + "grad_norm": 9.25, + "learning_rate": 4.436722006494701e-07, + "log_odds_chosen": -0.4622948169708252, + "log_odds_ratio": -1.0724523067474365, + "logits/chosen": -2.2528557777404785, + "logits/rejected": -2.2317535877227783, + "logps/chosen": -0.7585560083389282, + "logps/rejected": -0.4601530134677887, + "loss": 1.0779, + "nll_loss": 1.0056917667388916, + "rewards/accuracies": 0.39375001192092896, + "rewards/chosen": -0.07585560530424118, + "rewards/margins": -0.02984030917286873, + "rewards/rejected": -0.04601530730724335, + "step": 500 + }, + { + "epoch": 0.30204323363932484, + "grad_norm": 10.125, + "learning_rate": 4.4036148959228356e-07, + "log_odds_chosen": -0.37729692459106445, + "log_odds_ratio": -0.9907791018486023, + "logits/chosen": -2.285222291946411, + "logits/rejected": -2.2465076446533203, + "logps/chosen": -0.6608995199203491, + "logps/rejected": -0.44408687949180603, + "loss": 1.0854, + "nll_loss": 0.9470478892326355, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.0660899430513382, + "rewards/margins": -0.02168126031756401, + "rewards/rejected": -0.04440869390964508, + "step": 510 + }, + { + "epoch": 0.30796564998519393, + "grad_norm": 12.9375, + "learning_rate": 4.3696935540164705e-07, + "log_odds_chosen": -0.3114868998527527, + "log_odds_ratio": -0.9284585118293762, + "logits/chosen": -2.2520318031311035, + "logits/rejected": -2.2336666584014893, + "logps/chosen": -0.6092923879623413, + "logps/rejected": -0.4560086727142334, + "loss": 1.0234, + "nll_loss": 0.954501748085022, + "rewards/accuracies": 0.4437499940395355, + "rewards/chosen": -0.06092924624681473, + "rewards/margins": -0.01532837562263012, + "rewards/rejected": -0.04560086503624916, + "step": 520 + }, + { + "epoch": 0.3138880663310631, + "grad_norm": 9.1875, + "learning_rate": 4.334972489917947e-07, + "log_odds_chosen": -0.22460684180259705, + "log_odds_ratio": -0.88166743516922, + "logits/chosen": -2.313957691192627, + "logits/rejected": -2.2588186264038086, + "logps/chosen": -0.6013073325157166, + "logps/rejected": -0.47843700647354126, + "loss": 1.0456, + "nll_loss": 0.9358353614807129, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.060130733996629715, + "rewards/margins": -0.012287032790482044, + "rewards/rejected": -0.047843702137470245, + "step": 530 + }, + { + "epoch": 0.3198104826769322, + "grad_norm": 11.25, + "learning_rate": 4.299466554833997e-07, + "log_odds_chosen": -0.33192509412765503, + "log_odds_ratio": -0.94036465883255, + "logits/chosen": -2.2912707328796387, + "logits/rejected": -2.2435359954833984, + "logps/chosen": -0.5902704000473022, + "logps/rejected": -0.44104498624801636, + "loss": 1.0515, + "nll_loss": 0.9315252304077148, + "rewards/accuracies": 0.3812499940395355, + "rewards/chosen": -0.059027038514614105, + "rewards/margins": -0.014922534115612507, + "rewards/rejected": -0.044104501605033875, + "step": 540 + }, + { + "epoch": 0.3257328990228013, + "grad_norm": 8.9375, + "learning_rate": 4.263190935683449e-07, + "log_odds_chosen": -0.25842440128326416, + "log_odds_ratio": -0.893360435962677, + "logits/chosen": -2.2691588401794434, + "logits/rejected": -2.2356011867523193, + "logps/chosen": -0.5605894327163696, + "logps/rejected": -0.43656760454177856, + "loss": 0.9862, + "nll_loss": 0.8704695701599121, + "rewards/accuracies": 0.4437499940395355, + "rewards/chosen": -0.05605894327163696, + "rewards/margins": -0.012402191758155823, + "rewards/rejected": -0.04365675523877144, + "step": 550 + }, + { + "epoch": 0.33165531536867043, + "grad_norm": 10.875, + "learning_rate": 4.2261611486013437e-07, + "log_odds_chosen": -0.3279554545879364, + "log_odds_ratio": -0.9397815465927124, + "logits/chosen": -2.3104796409606934, + "logits/rejected": -2.275190830230713, + "logps/chosen": -0.6270398497581482, + "logps/rejected": -0.4670359194278717, + "loss": 1.0697, + "nll_loss": 0.977874755859375, + "rewards/accuracies": 0.3812499940395355, + "rewards/chosen": -0.06270398944616318, + "rewards/margins": -0.01600039377808571, + "rewards/rejected": -0.04670359194278717, + "step": 560 + }, + { + "epoch": 0.33757773171453953, + "grad_norm": 11.125, + "learning_rate": 4.188393032302233e-07, + "log_odds_chosen": -0.14010918140411377, + "log_odds_ratio": -0.8429776430130005, + "logits/chosen": -2.2512803077697754, + "logits/rejected": -2.1937472820281982, + "logps/chosen": -0.5634902715682983, + "logps/rejected": -0.5150736570358276, + "loss": 1.0249, + "nll_loss": 0.931064248085022, + "rewards/accuracies": 0.41874998807907104, + "rewards/chosen": -0.056349027901887894, + "rewards/margins": -0.0048416657373309135, + "rewards/rejected": -0.051507361233234406, + "step": 570 + }, + { + "epoch": 0.3435001480604086, + "grad_norm": 12.75, + "learning_rate": 4.1499027413055e-07, + "log_odds_chosen": -0.33234935998916626, + "log_odds_ratio": -0.9407118558883667, + "logits/chosen": -2.258405923843384, + "logits/rejected": -2.232956647872925, + "logps/chosen": -0.6220130920410156, + "logps/rejected": -0.4592718482017517, + "loss": 1.0413, + "nll_loss": 0.9290376901626587, + "rewards/accuracies": 0.3687500059604645, + "rewards/chosen": -0.06220130994915962, + "rewards/margins": -0.016274118795990944, + "rewards/rejected": -0.04592718556523323, + "step": 580 + }, + { + "epoch": 0.3494225644062778, + "grad_norm": 13.875, + "learning_rate": 4.1107067390256056e-07, + "log_odds_chosen": -0.35427385568618774, + "log_odds_ratio": -0.9841470718383789, + "logits/chosen": -2.305126428604126, + "logits/rejected": -2.280172824859619, + "logps/chosen": -0.696389377117157, + "logps/rejected": -0.4881146550178528, + "loss": 1.0718, + "nll_loss": 1.0334848165512085, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.0696389377117157, + "rewards/margins": -0.02082747593522072, + "rewards/rejected": -0.04881146177649498, + "step": 590 + }, + { + "epoch": 0.3553449807521469, + "grad_norm": 11.6875, + "learning_rate": 4.0708217907302047e-07, + "log_odds_chosen": -0.3386622369289398, + "log_odds_ratio": -0.9444282650947571, + "logits/chosen": -2.2589573860168457, + "logits/rejected": -2.2278530597686768, + "logps/chosen": -0.6211683750152588, + "logps/rejected": -0.46438631415367126, + "loss": 1.0621, + "nll_loss": 0.9823211431503296, + "rewards/accuracies": 0.35624998807907104, + "rewards/chosen": -0.062116838991642, + "rewards/margins": -0.01567821204662323, + "rewards/rejected": -0.04643862694501877, + "step": 600 + }, + { + "epoch": 0.361267397098016, + "grad_norm": 14.4375, + "learning_rate": 4.030264956369157e-07, + "log_odds_chosen": -0.32127273082733154, + "log_odds_ratio": -0.929902195930481, + "logits/chosen": -2.297096014022827, + "logits/rejected": -2.259603977203369, + "logps/chosen": -0.591595470905304, + "logps/rejected": -0.4399223327636719, + "loss": 1.0497, + "nll_loss": 0.9886807203292847, + "rewards/accuracies": 0.3375000059604645, + "rewards/chosen": -0.0591595396399498, + "rewards/margins": -0.01516731083393097, + "rewards/rejected": -0.04399223253130913, + "step": 610 + }, + { + "epoch": 0.3671898134438851, + "grad_norm": 11.1875, + "learning_rate": 3.989053583277492e-07, + "log_odds_chosen": -0.42405062913894653, + "log_odds_ratio": -1.0016412734985352, + "logits/chosen": -2.3095479011535645, + "logits/rejected": -2.2935452461242676, + "logps/chosen": -0.6750982403755188, + "logps/rejected": -0.45489102602005005, + "loss": 1.0537, + "nll_loss": 0.9710051417350769, + "rewards/accuracies": 0.3187499940395355, + "rewards/chosen": -0.06750981509685516, + "rewards/margins": -0.022020723670721054, + "rewards/rejected": -0.04548909515142441, + "step": 620 + }, + { + "epoch": 0.3731122297897542, + "grad_norm": 13.4375, + "learning_rate": 3.947205298755447e-07, + "log_odds_chosen": -0.25669050216674805, + "log_odds_ratio": -0.9015368223190308, + "logits/chosen": -2.2679405212402344, + "logits/rejected": -2.2386162281036377, + "logps/chosen": -0.6160240173339844, + "logps/rejected": -0.48336100578308105, + "loss": 1.0648, + "nll_loss": 0.9532335996627808, + "rewards/accuracies": 0.41874998807907104, + "rewards/chosen": -0.061602406203746796, + "rewards/margins": -0.013266305439174175, + "rewards/rejected": -0.04833609610795975, + "step": 630 + }, + { + "epoch": 0.3790346461356233, + "grad_norm": 10.4375, + "learning_rate": 3.9047380025287634e-07, + "log_odds_chosen": -0.24768850207328796, + "log_odds_ratio": -0.891069769859314, + "logits/chosen": -2.275651216506958, + "logits/rejected": -2.247177839279175, + "logps/chosen": -0.5877569913864136, + "logps/rejected": -0.4681660532951355, + "loss": 1.0549, + "nll_loss": 0.9463118314743042, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.058775704354047775, + "rewards/margins": -0.011959095485508442, + "rewards/rejected": -0.04681660607457161, + "step": 640 + }, + { + "epoch": 0.3849570624814925, + "grad_norm": 12.1875, + "learning_rate": 3.8616698590924523e-07, + "log_odds_chosen": -0.2891980707645416, + "log_odds_ratio": -0.9127435684204102, + "logits/chosen": -2.296032428741455, + "logits/rejected": -2.2514827251434326, + "logps/chosen": -0.6284441351890564, + "logps/rejected": -0.4775362014770508, + "loss": 1.0297, + "nll_loss": 0.9506929516792297, + "rewards/accuracies": 0.4124999940395355, + "rewards/chosen": -0.062844417989254, + "rewards/margins": -0.01509079895913601, + "rewards/rejected": -0.04775362089276314, + "step": 650 + }, + { + "epoch": 0.39087947882736157, + "grad_norm": 9.9375, + "learning_rate": 3.8180192899413123e-07, + "log_odds_chosen": -0.3009001314640045, + "log_odds_ratio": -0.9173041582107544, + "logits/chosen": -2.292931079864502, + "logits/rejected": -2.2850821018218994, + "logps/chosen": -0.5977297425270081, + "logps/rejected": -0.4498085081577301, + "loss": 1.066, + "nll_loss": 0.9441615343093872, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.059772975742816925, + "rewards/margins": -0.014792119152843952, + "rewards/rejected": -0.04498085752129555, + "step": 660 + }, + { + "epoch": 0.39680189517323067, + "grad_norm": 9.6875, + "learning_rate": 3.7738049656905225e-07, + "log_odds_chosen": -0.2274588793516159, + "log_odds_ratio": -0.871192455291748, + "logits/chosen": -2.2281768321990967, + "logits/rejected": -2.1852290630340576, + "logps/chosen": -0.5783167481422424, + "logps/rejected": -0.472917377948761, + "loss": 1.0607, + "nll_loss": 0.9557689428329468, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.05783168226480484, + "rewards/margins": -0.010539938695728779, + "rewards/rejected": -0.04729173332452774, + "step": 670 + }, + { + "epoch": 0.4027243115190998, + "grad_norm": 10.375, + "learning_rate": 3.7290457980896787e-07, + "log_odds_chosen": -0.1645122915506363, + "log_odds_ratio": -0.8458727598190308, + "logits/chosen": -2.2992634773254395, + "logits/rejected": -2.270430564880371, + "logps/chosen": -0.5671563148498535, + "logps/rejected": -0.4864569306373596, + "loss": 1.0284, + "nll_loss": 0.9164050817489624, + "rewards/accuracies": 0.41874998807907104, + "rewards/chosen": -0.05671562999486923, + "rewards/margins": -0.00806993618607521, + "rewards/rejected": -0.04864569753408432, + "step": 680 + }, + { + "epoch": 0.4086467278649689, + "grad_norm": 10.125, + "learning_rate": 3.68376093193369e-07, + "log_odds_chosen": -0.2814542353153229, + "log_odds_ratio": -0.9015814661979675, + "logits/chosen": -2.3065972328186035, + "logits/rejected": -2.2681093215942383, + "logps/chosen": -0.5637949109077454, + "logps/rejected": -0.4352457523345947, + "loss": 1.0214, + "nll_loss": 0.91374272108078, + "rewards/accuracies": 0.39375001192092896, + "rewards/chosen": -0.05637948960065842, + "rewards/margins": -0.012854918837547302, + "rewards/rejected": -0.043524570763111115, + "step": 690 + }, + { + "epoch": 0.414569144210838, + "grad_norm": 58.5, + "learning_rate": 3.637969736873992e-07, + "log_odds_chosen": -0.21553269028663635, + "log_odds_ratio": -0.8870409727096558, + "logits/chosen": -2.2836763858795166, + "logits/rejected": -2.252403736114502, + "logps/chosen": -0.5681829452514648, + "logps/rejected": -0.465969979763031, + "loss": 1.0701, + "nll_loss": 0.9871380925178528, + "rewards/accuracies": 0.45625001192092896, + "rewards/chosen": -0.056818295270204544, + "rewards/margins": -0.010221302509307861, + "rewards/rejected": -0.04659699648618698, + "step": 700 + }, + { + "epoch": 0.4204915605567071, + "grad_norm": 10.5, + "learning_rate": 3.591691799133587e-07, + "log_odds_chosen": -0.19581297039985657, + "log_odds_ratio": -0.8488709330558777, + "logits/chosen": -2.3274245262145996, + "logits/rejected": -2.2992606163024902, + "logps/chosen": -0.5645796060562134, + "logps/rejected": -0.4613499641418457, + "loss": 1.0495, + "nll_loss": 0.9565572738647461, + "rewards/accuracies": 0.41874998807907104, + "rewards/chosen": -0.05645795539021492, + "rewards/margins": -0.010322963818907738, + "rewards/rejected": -0.04613499343395233, + "step": 710 + }, + { + "epoch": 0.42641397690257626, + "grad_norm": 9.75, + "learning_rate": 3.5449469131294476e-07, + "log_odds_chosen": -0.22600612044334412, + "log_odds_ratio": -0.8781830668449402, + "logits/chosen": -2.2927708625793457, + "logits/rejected": -2.2485132217407227, + "logps/chosen": -0.5577629804611206, + "logps/rejected": -0.44653376936912537, + "loss": 1.0248, + "nll_loss": 0.9297264814376831, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.055776309221982956, + "rewards/margins": -0.0111229307949543, + "rewards/rejected": -0.044653378427028656, + "step": 720 + }, + { + "epoch": 0.43233639324844536, + "grad_norm": 9.3125, + "learning_rate": 3.497755073005868e-07, + "log_odds_chosen": -0.09444288164377213, + "log_odds_ratio": -0.8072474598884583, + "logits/chosen": -2.290067672729492, + "logits/rejected": -2.257514238357544, + "logps/chosen": -0.5471974611282349, + "logps/rejected": -0.47366800904273987, + "loss": 1.0112, + "nll_loss": 0.8891817927360535, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": -0.054719746112823486, + "rewards/margins": -0.007352945860475302, + "rewards/rejected": -0.04736679792404175, + "step": 730 + }, + { + "epoch": 0.43825880959431446, + "grad_norm": 16.75, + "learning_rate": 3.4501364640823926e-07, + "log_odds_chosen": -0.3251793384552002, + "log_odds_ratio": -0.9317482709884644, + "logits/chosen": -2.2995355129241943, + "logits/rejected": -2.2732508182525635, + "logps/chosen": -0.6547442674636841, + "logps/rejected": -0.4866989254951477, + "loss": 1.0482, + "nll_loss": 0.9714682698249817, + "rewards/accuracies": 0.38749998807907104, + "rewards/chosen": -0.06547442078590393, + "rewards/margins": -0.016804538667201996, + "rewards/rejected": -0.04866989329457283, + "step": 740 + }, + { + "epoch": 0.4441812259401836, + "grad_norm": 10.4375, + "learning_rate": 3.402111454219966e-07, + "log_odds_chosen": -0.17538635432720184, + "log_odds_ratio": -0.8506783246994019, + "logits/chosen": -2.3090875148773193, + "logits/rejected": -2.26053786277771, + "logps/chosen": -0.5713698863983154, + "logps/rejected": -0.47184181213378906, + "loss": 1.0275, + "nll_loss": 0.954795241355896, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.05713699012994766, + "rewards/margins": -0.009952803142368793, + "rewards/rejected": -0.047184187918901443, + "step": 750 + }, + { + "epoch": 0.4501036422860527, + "grad_norm": 9.3125, + "learning_rate": 3.353700585109005e-07, + "log_odds_chosen": -0.19826039671897888, + "log_odds_ratio": -0.8637887835502625, + "logits/chosen": -2.302405834197998, + "logits/rejected": -2.27463698387146, + "logps/chosen": -0.5740953683853149, + "logps/rejected": -0.4722967743873596, + "loss": 1.0239, + "nll_loss": 0.963403582572937, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.057409536093473434, + "rewards/margins": -0.010179854929447174, + "rewards/rejected": -0.04722967743873596, + "step": 760 + }, + { + "epoch": 0.4560260586319218, + "grad_norm": 9.875, + "learning_rate": 3.304924563483129e-07, + "log_odds_chosen": -0.22836697101593018, + "log_odds_ratio": -0.895135760307312, + "logits/chosen": -2.315516948699951, + "logits/rejected": -2.3024649620056152, + "logps/chosen": -0.6285193562507629, + "logps/rejected": -0.48862919211387634, + "loss": 1.073, + "nll_loss": 1.0095432996749878, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": -0.0628519356250763, + "rewards/margins": -0.013989018276333809, + "rewards/rejected": -0.04886292293667793, + "step": 770 + }, + { + "epoch": 0.46194847497779096, + "grad_norm": 11.125, + "learning_rate": 3.255804252262283e-07, + "log_odds_chosen": -0.19756431877613068, + "log_odds_ratio": -0.856968104839325, + "logits/chosen": -2.255115032196045, + "logits/rejected": -2.226313352584839, + "logps/chosen": -0.551701545715332, + "logps/rejected": -0.45012766122817993, + "loss": 1.0499, + "nll_loss": 0.9961403608322144, + "rewards/accuracies": 0.4437499940395355, + "rewards/chosen": -0.05517015606164932, + "rewards/margins": -0.010157393291592598, + "rewards/rejected": -0.045012760907411575, + "step": 780 + }, + { + "epoch": 0.46787089132366005, + "grad_norm": 8.375, + "learning_rate": 3.2063606616290626e-07, + "log_odds_chosen": -0.3132410943508148, + "log_odds_ratio": -0.9298326373100281, + "logits/chosen": -2.2360429763793945, + "logits/rejected": -2.1973369121551514, + "logps/chosen": -0.5941890478134155, + "logps/rejected": -0.44506731629371643, + "loss": 0.9654, + "nll_loss": 0.8383496999740601, + "rewards/accuracies": 0.4124999940395355, + "rewards/chosen": -0.05941891670227051, + "rewards/margins": -0.01491218339651823, + "rewards/rejected": -0.044506728649139404, + "step": 790 + }, + { + "epoch": 0.47379330766952915, + "grad_norm": 15.0625, + "learning_rate": 3.1566149400420523e-07, + "log_odds_chosen": -0.26251059770584106, + "log_odds_ratio": -0.8918318748474121, + "logits/chosen": -2.2902214527130127, + "logits/rejected": -2.2795047760009766, + "logps/chosen": -0.6117950081825256, + "logps/rejected": -0.4801320433616638, + "loss": 1.0567, + "nll_loss": 0.9525865316390991, + "rewards/accuracies": 0.4312500059604645, + "rewards/chosen": -0.0611795075237751, + "rewards/margins": -0.013166295364499092, + "rewards/rejected": -0.04801321029663086, + "step": 800 + }, + { + "epoch": 0.4797157240153983, + "grad_norm": 10.3125, + "learning_rate": 3.1065883651900087e-07, + "log_odds_chosen": -0.2203420102596283, + "log_odds_ratio": -0.8829119801521301, + "logits/chosen": -2.2788829803466797, + "logits/rejected": -2.2381834983825684, + "logps/chosen": -0.5892807841300964, + "logps/rejected": -0.48378220200538635, + "loss": 1.0678, + "nll_loss": 0.9220091104507446, + "rewards/accuracies": 0.41874998807907104, + "rewards/chosen": -0.05892808362841606, + "rewards/margins": -0.01054986473172903, + "rewards/rejected": -0.048378217965364456, + "step": 810 + }, + { + "epoch": 0.4856381403612674, + "grad_norm": 9.875, + "learning_rate": 3.056302334890786e-07, + "log_odds_chosen": -0.30824679136276245, + "log_odds_ratio": -0.9259847402572632, + "logits/chosen": -2.288405179977417, + "logits/rejected": -2.2682487964630127, + "logps/chosen": -0.6053352355957031, + "logps/rejected": -0.4507838189601898, + "loss": 1.0098, + "nll_loss": 0.9126564860343933, + "rewards/accuracies": 0.38749998807907104, + "rewards/chosen": -0.06053352355957031, + "rewards/margins": -0.01545514166355133, + "rewards/rejected": -0.04507838934659958, + "step": 820 + }, + { + "epoch": 0.4915605567071365, + "grad_norm": 12.6875, + "learning_rate": 3.0057783579388586e-07, + "log_odds_chosen": -0.15970291197299957, + "log_odds_ratio": -0.8330586552619934, + "logits/chosen": -2.2909493446350098, + "logits/rejected": -2.2521986961364746, + "logps/chosen": -0.5571908950805664, + "logps/rejected": -0.4815686345100403, + "loss": 1.0258, + "nll_loss": 0.9384473562240601, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.05571908876299858, + "rewards/margins": -0.007562229875475168, + "rewards/rejected": -0.04815686494112015, + "step": 830 + }, + { + "epoch": 0.49748297305300565, + "grad_norm": 11.75, + "learning_rate": 2.9550380449053907e-07, + "log_odds_chosen": -0.18619410693645477, + "log_odds_ratio": -0.8525155782699585, + "logits/chosen": -2.2423572540283203, + "logits/rejected": -2.221928596496582, + "logps/chosen": -0.5615742206573486, + "logps/rejected": -0.4591636657714844, + "loss": 1.0133, + "nll_loss": 0.8223134279251099, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.056157421320676804, + "rewards/margins": -0.010241055861115456, + "rewards/rejected": -0.0459163673222065, + "step": 840 + }, + { + "epoch": 0.5034053893988747, + "grad_norm": 8.625, + "learning_rate": 2.904103098894767e-07, + "log_odds_chosen": -0.22144293785095215, + "log_odds_ratio": -0.8922742009162903, + "logits/chosen": -2.280796527862549, + "logits/rejected": -2.2380261421203613, + "logps/chosen": -0.5996569991111755, + "logps/rejected": -0.4632148742675781, + "loss": 1.0102, + "nll_loss": 0.9282135963439941, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.059965699911117554, + "rewards/margins": -0.013644215650856495, + "rewards/rejected": -0.04632148891687393, + "step": 850 + }, + { + "epoch": 0.5093278057447439, + "grad_norm": 12.625, + "learning_rate": 2.852995306261545e-07, + "log_odds_chosen": -0.1986076533794403, + "log_odds_ratio": -0.8607484698295593, + "logits/chosen": -2.306536912918091, + "logits/rejected": -2.2707247734069824, + "logps/chosen": -0.575395405292511, + "logps/rejected": -0.4835848808288574, + "loss": 1.074, + "nll_loss": 1.0040955543518066, + "rewards/accuracies": 0.4437499940395355, + "rewards/chosen": -0.05753953382372856, + "rewards/margins": -0.009181044064462185, + "rewards/rejected": -0.0483584925532341, + "step": 860 + }, + { + "epoch": 0.515250222090613, + "grad_norm": 10.6875, + "learning_rate": 2.801736527291797e-07, + "log_odds_chosen": -0.26449286937713623, + "log_odds_ratio": -0.9028227925300598, + "logits/chosen": -2.275608777999878, + "logits/rejected": -2.233181953430176, + "logps/chosen": -0.61722731590271, + "logps/rejected": -0.4729304313659668, + "loss": 1.042, + "nll_loss": 0.908827006816864, + "rewards/accuracies": 0.39375001192092896, + "rewards/chosen": -0.061722736805677414, + "rewards/margins": -0.014429694041609764, + "rewards/rejected": -0.0472930371761322, + "step": 870 + }, + { + "epoch": 0.5211726384364821, + "grad_norm": 10.875, + "learning_rate": 2.750348686852836e-07, + "log_odds_chosen": -0.31994161009788513, + "log_odds_ratio": -0.9219182729721069, + "logits/chosen": -2.329312324523926, + "logits/rejected": -2.2651875019073486, + "logps/chosen": -0.6155102252960205, + "logps/rejected": -0.4632096290588379, + "loss": 1.0724, + "nll_loss": 1.0065295696258545, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.06155102327466011, + "rewards/margins": -0.015230064280331135, + "rewards/rejected": -0.04632095992565155, + "step": 880 + }, + { + "epoch": 0.5270950547823512, + "grad_norm": 11.875, + "learning_rate": 2.69885376501531e-07, + "log_odds_chosen": -0.23163005709648132, + "log_odds_ratio": -0.8846963047981262, + "logits/chosen": -2.261355400085449, + "logits/rejected": -2.2470784187316895, + "logps/chosen": -0.6110343933105469, + "logps/rejected": -0.4852830767631531, + "loss": 1.0546, + "nll_loss": 0.9538838267326355, + "rewards/accuracies": 0.4625000059604645, + "rewards/chosen": -0.06110344082117081, + "rewards/margins": -0.012575129978358746, + "rewards/rejected": -0.04852830991148949, + "step": 890 + }, + { + "epoch": 0.5330174711282203, + "grad_norm": 11.9375, + "learning_rate": 2.647273787651687e-07, + "log_odds_chosen": -0.18702737987041473, + "log_odds_ratio": -0.8396440744400024, + "logits/chosen": -2.2948384284973145, + "logits/rejected": -2.2751121520996094, + "logps/chosen": -0.5671176910400391, + "logps/rejected": -0.47453179955482483, + "loss": 1.0288, + "nll_loss": 0.9608666300773621, + "rewards/accuracies": 0.48124998807907104, + "rewards/chosen": -0.056711774319410324, + "rewards/margins": -0.009258597157895565, + "rewards/rejected": -0.047453176230192184, + "step": 900 + }, + { + "epoch": 0.5389398874740894, + "grad_norm": 16.75, + "learning_rate": 2.5956308170151526e-07, + "log_odds_chosen": -0.40357428789138794, + "log_odds_ratio": -1.0180401802062988, + "logits/chosen": -2.260730504989624, + "logits/rejected": -2.2328133583068848, + "logps/chosen": -0.7037028670310974, + "logps/rejected": -0.4578544497489929, + "loss": 1.1183, + "nll_loss": 0.9839082956314087, + "rewards/accuracies": 0.4124999940395355, + "rewards/chosen": -0.07037027925252914, + "rewards/margins": -0.02458484098315239, + "rewards/rejected": -0.04578544571995735, + "step": 910 + }, + { + "epoch": 0.5448623038199586, + "grad_norm": 10.3125, + "learning_rate": 2.543946942302944e-07, + "log_odds_chosen": -0.21979165077209473, + "log_odds_ratio": -0.8726961016654968, + "logits/chosen": -2.2551956176757812, + "logits/rejected": -2.2191715240478516, + "logps/chosen": -0.5772194862365723, + "logps/rejected": -0.45551061630249023, + "loss": 1.0123, + "nll_loss": 0.9414900541305542, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": -0.057721953839063644, + "rewards/margins": -0.012170888483524323, + "rewards/rejected": -0.04555106535553932, + "step": 920 + }, + { + "epoch": 0.5507847201658277, + "grad_norm": 11.1875, + "learning_rate": 2.492244270208158e-07, + "log_odds_chosen": -0.1632816195487976, + "log_odds_ratio": -0.8366379737854004, + "logits/chosen": -2.2645580768585205, + "logits/rejected": -2.2385404109954834, + "logps/chosen": -0.5705746412277222, + "logps/rejected": -0.48298463225364685, + "loss": 0.9953, + "nll_loss": 0.9456483721733093, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.05705747753381729, + "rewards/margins": -0.008759009651839733, + "rewards/rejected": -0.048298463225364685, + "step": 930 + }, + { + "epoch": 0.5567071365116968, + "grad_norm": 9.5, + "learning_rate": 2.440544915464078e-07, + "log_odds_chosen": -0.2142259180545807, + "log_odds_ratio": -0.8674869537353516, + "logits/chosen": -2.294877290725708, + "logits/rejected": -2.2555816173553467, + "logps/chosen": -0.5593573451042175, + "logps/rejected": -0.45421138405799866, + "loss": 1.0237, + "nll_loss": 0.9162901043891907, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.055935733020305634, + "rewards/margins": -0.01051459088921547, + "rewards/rejected": -0.045421142131090164, + "step": 940 + }, + { + "epoch": 0.5626295528575659, + "grad_norm": 10.0625, + "learning_rate": 2.3888709913850593e-07, + "log_odds_chosen": -0.21557164192199707, + "log_odds_ratio": -0.8706417083740234, + "logits/chosen": -2.3428778648376465, + "logits/rejected": -2.3050456047058105, + "logps/chosen": -0.5851597785949707, + "logps/rejected": -0.4760478436946869, + "loss": 1.0825, + "nll_loss": 0.9482911825180054, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05851597711443901, + "rewards/margins": -0.010911193676292896, + "rewards/rejected": -0.04760478436946869, + "step": 950 + }, + { + "epoch": 0.568551969203435, + "grad_norm": 9.75, + "learning_rate": 2.337244600408025e-07, + "log_odds_chosen": -0.30868110060691833, + "log_odds_ratio": -0.9379078149795532, + "logits/chosen": -2.3101601600646973, + "logits/rejected": -2.2805612087249756, + "logps/chosen": -0.6376503109931946, + "logps/rejected": -0.4714363217353821, + "loss": 1.051, + "nll_loss": 0.983268141746521, + "rewards/accuracies": 0.38749998807907104, + "rewards/chosen": -0.0637650191783905, + "rewards/margins": -0.016621392220258713, + "rewards/rejected": -0.04714363440871239, + "step": 960 + }, + { + "epoch": 0.5744743855493041, + "grad_norm": 11.9375, + "learning_rate": 2.2856878246386085e-07, + "log_odds_chosen": -0.20517487823963165, + "log_odds_ratio": -0.8652151226997375, + "logits/chosen": -2.306201457977295, + "logits/rejected": -2.283665180206299, + "logps/chosen": -0.5846830606460571, + "logps/rejected": -0.4740404486656189, + "loss": 1.0953, + "nll_loss": 1.0276809930801392, + "rewards/accuracies": 0.40625, + "rewards/chosen": -0.058468304574489594, + "rewards/margins": -0.011064260266721249, + "rewards/rejected": -0.04740404710173607, + "step": 970 + }, + { + "epoch": 0.5803968018951732, + "grad_norm": 13.0, + "learning_rate": 2.2342227164060035e-07, + "log_odds_chosen": -0.2963787019252777, + "log_odds_ratio": -0.9264262318611145, + "logits/chosen": -2.2660953998565674, + "logits/rejected": -2.211947441101074, + "logps/chosen": -0.6310227513313293, + "logps/rejected": -0.4772140085697174, + "loss": 1.0355, + "nll_loss": 0.916420578956604, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06310227513313293, + "rewards/margins": -0.015380874276161194, + "rewards/rejected": -0.04772140458226204, + "step": 980 + }, + { + "epoch": 0.5863192182410424, + "grad_norm": 9.5625, + "learning_rate": 2.182871288830533e-07, + "log_odds_chosen": -0.3251541554927826, + "log_odds_ratio": -0.941790759563446, + "logits/chosen": -2.293196439743042, + "logits/rejected": -2.232034206390381, + "logps/chosen": -0.6307833790779114, + "logps/rejected": -0.4696255624294281, + "loss": 1.0677, + "nll_loss": 0.967657208442688, + "rewards/accuracies": 0.35624998807907104, + "rewards/chosen": -0.06307834386825562, + "rewards/margins": -0.016115780919790268, + "rewards/rejected": -0.04696255922317505, + "step": 990 + }, + { + "epoch": 0.5922416345869115, + "grad_norm": 9.6875, + "learning_rate": 2.131655506408007e-07, + "log_odds_chosen": -0.22425034642219543, + "log_odds_ratio": -0.8798470497131348, + "logits/chosen": -2.2940893173217773, + "logits/rejected": -2.254329204559326, + "logps/chosen": -0.5970818400382996, + "logps/rejected": -0.48467540740966797, + "loss": 1.0208, + "nll_loss": 0.9316588640213013, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.059708189219236374, + "rewards/margins": -0.011240655556321144, + "rewards/rejected": -0.04846753552556038, + "step": 1000 + }, + { + "epoch": 0.5981640509327806, + "grad_norm": 9.5, + "learning_rate": 2.0805972756148643e-07, + "log_odds_chosen": -0.3093208074569702, + "log_odds_ratio": -0.9420243501663208, + "logits/chosen": -2.2883636951446533, + "logits/rejected": -2.275327682495117, + "logps/chosen": -0.6675941348075867, + "logps/rejected": -0.47907954454421997, + "loss": 1.0708, + "nll_loss": 1.0012794733047485, + "rewards/accuracies": 0.36250001192092896, + "rewards/chosen": -0.06675940752029419, + "rewards/margins": -0.01885146275162697, + "rewards/rejected": -0.047907955944538116, + "step": 1010 + }, + { + "epoch": 0.6040864672786497, + "grad_norm": 9.1875, + "learning_rate": 2.0297184355381432e-07, + "log_odds_chosen": -0.2639048993587494, + "log_odds_ratio": -0.89494389295578, + "logits/chosen": -2.304008722305298, + "logits/rejected": -2.265723705291748, + "logps/chosen": -0.5768560767173767, + "logps/rejected": -0.4624248445034027, + "loss": 1.0328, + "nll_loss": 0.9577334523200989, + "rewards/accuracies": 0.41874998807907104, + "rewards/chosen": -0.05768561363220215, + "rewards/margins": -0.011443129740655422, + "rewards/rejected": -0.04624248296022415, + "step": 1020 + }, + { + "epoch": 0.6100088836245188, + "grad_norm": 9.125, + "learning_rate": 1.9790407485342638e-07, + "log_odds_chosen": -0.3557616174221039, + "log_odds_ratio": -0.9650157690048218, + "logits/chosen": -2.327831268310547, + "logits/rejected": -2.2884087562561035, + "logps/chosen": -0.6429619193077087, + "logps/rejected": -0.4408210217952728, + "loss": 1.0091, + "nll_loss": 0.9397379755973816, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.06429620087146759, + "rewards/margins": -0.02021409198641777, + "rewards/rejected": -0.04408210515975952, + "step": 1030 + }, + { + "epoch": 0.6159312999703879, + "grad_norm": 10.75, + "learning_rate": 1.928585890920641e-07, + "log_odds_chosen": -0.1900234967470169, + "log_odds_ratio": -0.8621436953544617, + "logits/chosen": -2.2921512126922607, + "logits/rejected": -2.2576987743377686, + "logps/chosen": -0.5736020803451538, + "logps/rejected": -0.46828731894493103, + "loss": 1.0474, + "nll_loss": 0.9162224531173706, + "rewards/accuracies": 0.41874998807907104, + "rewards/chosen": -0.0573602095246315, + "rewards/margins": -0.010531473904848099, + "rewards/rejected": -0.046828728169202805, + "step": 1040 + }, + { + "epoch": 0.6218537163162571, + "grad_norm": 11.875, + "learning_rate": 1.8783754437040902e-07, + "log_odds_chosen": -0.26852238178253174, + "log_odds_ratio": -0.9126049280166626, + "logits/chosen": -2.275580883026123, + "logits/rejected": -2.2431647777557373, + "logps/chosen": -0.5689065456390381, + "logps/rejected": -0.44645556807518005, + "loss": 1.0095, + "nll_loss": 0.9046837091445923, + "rewards/accuracies": 0.39375001192092896, + "rewards/chosen": -0.05689065903425217, + "rewards/margins": -0.012245100922882557, + "rewards/rejected": -0.044645555317401886, + "step": 1050 + }, + { + "epoch": 0.6277761326621262, + "grad_norm": 9.25, + "learning_rate": 1.8284308833500118e-07, + "log_odds_chosen": -0.2125154435634613, + "log_odds_ratio": -0.8751262426376343, + "logits/chosen": -2.277667760848999, + "logits/rejected": -2.253131866455078, + "logps/chosen": -0.5812402963638306, + "logps/rejected": -0.47419658303260803, + "loss": 1.0476, + "nll_loss": 0.93915194272995, + "rewards/accuracies": 0.46875, + "rewards/chosen": -0.05812402814626694, + "rewards/margins": -0.010704366490244865, + "rewards/rejected": -0.04741965979337692, + "step": 1060 + }, + { + "epoch": 0.6336985490079953, + "grad_norm": 11.0625, + "learning_rate": 1.7787735725962756e-07, + "log_odds_chosen": -0.27183157205581665, + "log_odds_ratio": -0.9005556106567383, + "logits/chosen": -2.2851767539978027, + "logits/rejected": -2.2494091987609863, + "logps/chosen": -0.613685667514801, + "logps/rejected": -0.47953805327415466, + "loss": 1.0919, + "nll_loss": 0.9954058527946472, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.06136856600642204, + "rewards/margins": -0.013414761051535606, + "rewards/rejected": -0.047953806817531586, + "step": 1070 + }, + { + "epoch": 0.6396209653538644, + "grad_norm": 9.375, + "learning_rate": 1.7294247513157616e-07, + "log_odds_chosen": -0.22400331497192383, + "log_odds_ratio": -0.8672366142272949, + "logits/chosen": -2.3089351654052734, + "logits/rejected": -2.2596447467803955, + "logps/chosen": -0.5711158514022827, + "logps/rejected": -0.46820420026779175, + "loss": 1.0251, + "nll_loss": 0.960826575756073, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05711158365011215, + "rewards/margins": -0.010291163809597492, + "rewards/rejected": -0.046820417046546936, + "step": 1080 + }, + { + "epoch": 0.6455433816997335, + "grad_norm": 10.0625, + "learning_rate": 1.6804055274314494e-07, + "log_odds_chosen": -0.19274529814720154, + "log_odds_ratio": -0.8532935380935669, + "logits/chosen": -2.270355224609375, + "logits/rejected": -2.248356342315674, + "logps/chosen": -0.5621662735939026, + "logps/rejected": -0.47271862626075745, + "loss": 1.0217, + "nll_loss": 0.9073405265808105, + "rewards/accuracies": 0.4124999940395355, + "rewards/chosen": -0.056216634809970856, + "rewards/margins": -0.008944764733314514, + "rewards/rejected": -0.04727186635136604, + "step": 1090 + }, + { + "epoch": 0.6514657980456026, + "grad_norm": 10.25, + "learning_rate": 1.6317368678879496e-07, + "log_odds_chosen": -0.20030847191810608, + "log_odds_ratio": -0.8486258387565613, + "logits/chosen": -2.3088138103485107, + "logits/rejected": -2.27048659324646, + "logps/chosen": -0.5797799825668335, + "logps/rejected": -0.4854944348335266, + "loss": 1.0725, + "nll_loss": 0.9621385335922241, + "rewards/accuracies": 0.40625, + "rewards/chosen": -0.05797800421714783, + "rewards/margins": -0.009428557008504868, + "rewards/rejected": -0.04854945093393326, + "step": 1100 + }, + { + "epoch": 0.6573882143914718, + "grad_norm": 12.125, + "learning_rate": 1.5834395896833281e-07, + "log_odds_chosen": -0.3109692335128784, + "log_odds_ratio": -0.9263485670089722, + "logits/chosen": -2.3202879428863525, + "logits/rejected": -2.265725612640381, + "logps/chosen": -0.6155823469161987, + "logps/rejected": -0.4582076966762543, + "loss": 1.0499, + "nll_loss": 0.9659247398376465, + "rewards/accuracies": 0.4124999940395355, + "rewards/chosen": -0.06155823543667793, + "rewards/margins": -0.015737462788820267, + "rewards/rejected": -0.045820772647857666, + "step": 1110 + }, + { + "epoch": 0.6633106307373409, + "grad_norm": 9.9375, + "learning_rate": 1.535534350965075e-07, + "log_odds_chosen": -0.25020501017570496, + "log_odds_ratio": -0.8859984278678894, + "logits/chosen": -2.3179831504821777, + "logits/rejected": -2.3054070472717285, + "logps/chosen": -0.5626355409622192, + "logps/rejected": -0.434339702129364, + "loss": 1.0081, + "nll_loss": 0.9209376573562622, + "rewards/accuracies": 0.41874998807907104, + "rewards/chosen": -0.056263554841279984, + "rewards/margins": -0.012829584069550037, + "rewards/rejected": -0.04343396797776222, + "step": 1120 + }, + { + "epoch": 0.66923304708321, + "grad_norm": 15.625, + "learning_rate": 1.4880416421940154e-07, + "log_odds_chosen": -0.23923833668231964, + "log_odds_ratio": -0.8853415250778198, + "logits/chosen": -2.26355242729187, + "logits/rejected": -2.240990161895752, + "logps/chosen": -0.6214331388473511, + "logps/rejected": -0.4842914938926697, + "loss": 1.1113, + "nll_loss": 1.0326354503631592, + "rewards/accuracies": 0.4124999940395355, + "rewards/chosen": -0.062143318355083466, + "rewards/margins": -0.01371416263282299, + "rewards/rejected": -0.048429153859615326, + "step": 1130 + }, + { + "epoch": 0.6751554634290791, + "grad_norm": 12.75, + "learning_rate": 1.4409817773799459e-07, + "log_odds_chosen": -0.23250596225261688, + "log_odds_ratio": -0.8853020668029785, + "logits/chosen": -2.288491725921631, + "logits/rejected": -2.24708890914917, + "logps/chosen": -0.6100078225135803, + "logps/rejected": -0.4815722405910492, + "loss": 1.0552, + "nll_loss": 0.9337055087089539, + "rewards/accuracies": 0.4124999940395355, + "rewards/chosen": -0.061000775545835495, + "rewards/margins": -0.01284355204552412, + "rewards/rejected": -0.0481572225689888, + "step": 1140 + }, + { + "epoch": 0.6810778797749482, + "grad_norm": 10.4375, + "learning_rate": 1.3943748853927385e-07, + "log_odds_chosen": -0.3103570342063904, + "log_odds_ratio": -0.9324914216995239, + "logits/chosen": -2.28434419631958, + "logits/rejected": -2.277893543243408, + "logps/chosen": -0.64482182264328, + "logps/rejected": -0.46989989280700684, + "loss": 1.035, + "nll_loss": 0.934810996055603, + "rewards/accuracies": 0.3687500059604645, + "rewards/chosen": -0.064482182264328, + "rewards/margins": -0.017492195591330528, + "rewards/rejected": -0.046989988535642624, + "step": 1150 + }, + { + "epoch": 0.6870002961208173, + "grad_norm": 11.375, + "learning_rate": 1.3482409013526436e-07, + "log_odds_chosen": -0.3323788642883301, + "log_odds_ratio": -0.9415693283081055, + "logits/chosen": -2.272247791290283, + "logits/rejected": -2.2672269344329834, + "logps/chosen": -0.6134747862815857, + "logps/rejected": -0.4571937918663025, + "loss": 1.0638, + "nll_loss": 0.9829813241958618, + "rewards/accuracies": 0.3812499940395355, + "rewards/chosen": -0.06134747713804245, + "rewards/margins": -0.01562810130417347, + "rewards/rejected": -0.04571938142180443, + "step": 1160 + }, + { + "epoch": 0.6929227124666865, + "grad_norm": 9.4375, + "learning_rate": 1.302599558103456e-07, + "log_odds_chosen": -0.23517660796642303, + "log_odds_ratio": -0.8992069363594055, + "logits/chosen": -2.3287193775177, + "logits/rejected": -2.293454885482788, + "logps/chosen": -0.6200941801071167, + "logps/rejected": -0.4878036081790924, + "loss": 1.0413, + "nll_loss": 0.9660770297050476, + "rewards/accuracies": 0.4312500059604645, + "rewards/chosen": -0.06200941652059555, + "rewards/margins": -0.013229051604866982, + "rewards/rejected": -0.04878035932779312, + "step": 1170 + }, + { + "epoch": 0.6988451288125556, + "grad_norm": 13.0625, + "learning_rate": 1.257470377772214e-07, + "log_odds_chosen": -0.27837398648262024, + "log_odds_ratio": -0.9113019704818726, + "logits/chosen": -2.3072619438171387, + "logits/rejected": -2.282047748565674, + "logps/chosen": -0.5952633023262024, + "logps/rejected": -0.4496152400970459, + "loss": 1.0661, + "nll_loss": 0.9518778920173645, + "rewards/accuracies": 0.39375001192092896, + "rewards/chosen": -0.05952633172273636, + "rewards/margins": -0.014564801938831806, + "rewards/rejected": -0.04496152698993683, + "step": 1180 + }, + { + "epoch": 0.7047675451584247, + "grad_norm": 13.5, + "learning_rate": 1.2128726634190046e-07, + "log_odds_chosen": -0.26337355375289917, + "log_odds_ratio": -0.8862990140914917, + "logits/chosen": -2.3180294036865234, + "logits/rejected": -2.274146556854248, + "logps/chosen": -0.5859608054161072, + "logps/rejected": -0.44980812072753906, + "loss": 1.0204, + "nll_loss": 0.91375333070755, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.05859608203172684, + "rewards/margins": -0.01361527293920517, + "rewards/rejected": -0.044980812817811966, + "step": 1190 + }, + { + "epoch": 0.7106899615042938, + "grad_norm": 13.3125, + "learning_rate": 1.1688254907804992e-07, + "log_odds_chosen": -0.2645830512046814, + "log_odds_ratio": -0.9049927592277527, + "logits/chosen": -2.2710115909576416, + "logits/rejected": -2.2327637672424316, + "logps/chosen": -0.6210035085678101, + "logps/rejected": -0.48435431718826294, + "loss": 1.0683, + "nll_loss": 0.9852622747421265, + "rewards/accuracies": 0.38749998807907104, + "rewards/chosen": -0.06210034340620041, + "rewards/margins": -0.01366492174565792, + "rewards/rejected": -0.048435427248477936, + "step": 1200 + }, + { + "epoch": 0.7166123778501629, + "grad_norm": 9.1875, + "learning_rate": 1.1253477001106956e-07, + "log_odds_chosen": -0.18010739982128143, + "log_odds_ratio": -0.848807156085968, + "logits/chosen": -2.2503340244293213, + "logits/rejected": -2.214433431625366, + "logps/chosen": -0.5777139663696289, + "logps/rejected": -0.48649734258651733, + "loss": 1.0408, + "nll_loss": 0.9145431518554688, + "rewards/accuracies": 0.48124998807907104, + "rewards/chosen": -0.05777139216661453, + "rewards/margins": -0.009121658280491829, + "rewards/rejected": -0.04864973947405815, + "step": 1210 + }, + { + "epoch": 0.722534794196032, + "grad_norm": 14.1875, + "learning_rate": 1.0824578881224065e-07, + "log_odds_chosen": -0.14203877747058868, + "log_odds_ratio": -0.8198834657669067, + "logits/chosen": -2.323948621749878, + "logits/rejected": -2.3119778633117676, + "logps/chosen": -0.5389841794967651, + "logps/rejected": -0.4636968672275543, + "loss": 0.9852, + "nll_loss": 0.871512770652771, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.053898416459560394, + "rewards/margins": -0.007528733462095261, + "rewards/rejected": -0.046369682997465134, + "step": 1220 + }, + { + "epoch": 0.728457210541901, + "grad_norm": 10.25, + "learning_rate": 1.0401744000328918e-07, + "log_odds_chosen": -0.19983641803264618, + "log_odds_ratio": -0.8725547790527344, + "logits/chosen": -2.268932342529297, + "logits/rejected": -2.2664635181427, + "logps/chosen": -0.5955653786659241, + "logps/rejected": -0.4910568296909332, + "loss": 1.0167, + "nll_loss": 0.9245740175247192, + "rewards/accuracies": 0.4124999940395355, + "rewards/chosen": -0.059556543827056885, + "rewards/margins": -0.010450851172208786, + "rewards/rejected": -0.0491056926548481, + "step": 1230 + }, + { + "epoch": 0.7343796268877703, + "grad_norm": 11.25, + "learning_rate": 9.985153217170902e-08, + "log_odds_chosen": -0.27591392397880554, + "log_odds_ratio": -0.9048240780830383, + "logits/chosen": -2.3324825763702393, + "logits/rejected": -2.3199105262756348, + "logps/chosen": -0.613168478012085, + "logps/rejected": -0.47140389680862427, + "loss": 1.1081, + "nll_loss": 1.0194193124771118, + "rewards/accuracies": 0.40625, + "rewards/chosen": -0.061316847801208496, + "rewards/margins": -0.01417645812034607, + "rewards/rejected": -0.047140393406152725, + "step": 1240 + }, + { + "epoch": 0.7403020432336394, + "grad_norm": 12.25, + "learning_rate": 9.574984719717553e-08, + "log_odds_chosen": -0.24321213364601135, + "log_odds_ratio": -0.89483243227005, + "logits/chosen": -2.3112952709198, + "logits/rejected": -2.2951555252075195, + "logps/chosen": -0.5895348191261292, + "logps/rejected": -0.4669637680053711, + "loss": 1.0306, + "nll_loss": 0.9830119013786316, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.05895348638296127, + "rewards/margins": -0.012257112190127373, + "rewards/rejected": -0.04669637233018875, + "step": 1250 + }, + { + "epoch": 0.7462244595795084, + "grad_norm": 9.9375, + "learning_rate": 9.171413948938459e-08, + "log_odds_chosen": -0.2236686646938324, + "log_odds_ratio": -0.879412829875946, + "logits/chosen": -2.3061726093292236, + "logits/rejected": -2.254133701324463, + "logps/chosen": -0.6122428178787231, + "logps/rejected": -0.49692878127098083, + "loss": 1.0596, + "nll_loss": 0.9902396202087402, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.061224281787872314, + "rewards/margins": -0.011531401425600052, + "rewards/rejected": -0.04969288408756256, + "step": 1260 + }, + { + "epoch": 0.7521468759253775, + "grad_norm": 9.4375, + "learning_rate": 8.774613523764049e-08, + "log_odds_chosen": -0.26704955101013184, + "log_odds_ratio": -0.8915314674377441, + "logits/chosen": -2.2866809368133545, + "logits/rejected": -2.239720582962036, + "logps/chosen": -0.5904482007026672, + "logps/rejected": -0.4593755304813385, + "loss": 1.0287, + "nll_loss": 0.9099699854850769, + "rewards/accuracies": 0.39375001192092896, + "rewards/chosen": -0.059044819325208664, + "rewards/margins": -0.013107270002365112, + "rewards/rejected": -0.04593754559755325, + "step": 1270 + }, + { + "epoch": 0.7580692922712466, + "grad_norm": 11.125, + "learning_rate": 8.384753167251412e-08, + "log_odds_chosen": -0.2359321415424347, + "log_odds_ratio": -0.8834274411201477, + "logits/chosen": -2.241650104522705, + "logits/rejected": -2.2175180912017822, + "logps/chosen": -0.5696910619735718, + "logps/rejected": -0.4511106610298157, + "loss": 0.9877, + "nll_loss": 0.8763992190361023, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.05696910619735718, + "rewards/margins": -0.01185804232954979, + "rewards/rejected": -0.045111071318387985, + "step": 1280 + }, + { + "epoch": 0.7639917086171157, + "grad_norm": 10.1875, + "learning_rate": 8.001999633988942e-08, + "log_odds_chosen": -0.26344627141952515, + "log_odds_ratio": -0.8965330123901367, + "logits/chosen": -2.317347764968872, + "logits/rejected": -2.2693257331848145, + "logps/chosen": -0.5864616632461548, + "logps/rejected": -0.45855003595352173, + "loss": 0.9993, + "nll_loss": 0.9034452438354492, + "rewards/accuracies": 0.40625, + "rewards/chosen": -0.058646153658628464, + "rewards/margins": -0.012791156768798828, + "rewards/rejected": -0.04585500434041023, + "step": 1290 + }, + { + "epoch": 0.769914124962985, + "grad_norm": 10.4375, + "learning_rate": 7.62651663877042e-08, + "log_odds_chosen": -0.17867620289325714, + "log_odds_ratio": -0.8561042547225952, + "logits/chosen": -2.2582385540008545, + "logits/rejected": -2.232391357421875, + "logps/chosen": -0.5790480971336365, + "logps/rejected": -0.4850679337978363, + "loss": 1.0993, + "nll_loss": 0.9781789779663086, + "rewards/accuracies": 0.4625000059604645, + "rewards/chosen": -0.057904817163944244, + "rewards/margins": -0.009398018009960651, + "rewards/rejected": -0.04850679263472557, + "step": 1300 + }, + { + "epoch": 0.775836541308854, + "grad_norm": 9.0, + "learning_rate": 7.258464786569549e-08, + "log_odds_chosen": -0.2144562005996704, + "log_odds_ratio": -0.8685463070869446, + "logits/chosen": -2.322035551071167, + "logits/rejected": -2.2717068195343018, + "logps/chosen": -0.5770824551582336, + "logps/rejected": -0.47164034843444824, + "loss": 1.0633, + "nll_loss": 0.9638098478317261, + "rewards/accuracies": 0.4437499940395355, + "rewards/chosen": -0.057708241045475006, + "rewards/margins": -0.010544205084443092, + "rewards/rejected": -0.04716403782367706, + "step": 1310 + }, + { + "epoch": 0.7817589576547231, + "grad_norm": 12.375, + "learning_rate": 6.898001503844483e-08, + "log_odds_chosen": -0.3992167115211487, + "log_odds_ratio": -1.0115876197814941, + "logits/chosen": -2.3506951332092285, + "logits/rejected": -2.3084568977355957, + "logps/chosen": -0.7245315313339233, + "logps/rejected": -0.4761766493320465, + "loss": 1.0509, + "nll_loss": 1.0009998083114624, + "rewards/accuracies": 0.40625, + "rewards/chosen": -0.07245315611362457, + "rewards/margins": -0.0248354934155941, + "rewards/rejected": -0.04761766642332077, + "step": 1320 + }, + { + "epoch": 0.7876813740005922, + "grad_norm": 11.3125, + "learning_rate": 6.545280971202014e-08, + "log_odds_chosen": -0.17274455726146698, + "log_odds_ratio": -0.8463727831840515, + "logits/chosen": -2.310338020324707, + "logits/rejected": -2.2806801795959473, + "logps/chosen": -0.5623282194137573, + "logps/rejected": -0.46932634711265564, + "loss": 1.0128, + "nll_loss": 0.9555832147598267, + "rewards/accuracies": 0.4437499940395355, + "rewards/chosen": -0.05623283237218857, + "rewards/margins": -0.009300192818045616, + "rewards/rejected": -0.046932633966207504, + "step": 1330 + }, + { + "epoch": 0.7936037903464613, + "grad_norm": 11.0625, + "learning_rate": 6.200454057450022e-08, + "log_odds_chosen": -0.2566189169883728, + "log_odds_ratio": -0.8830870389938354, + "logits/chosen": -2.2640976905822754, + "logits/rejected": -2.2190680503845215, + "logps/chosen": -0.6031737327575684, + "logps/rejected": -0.4697316586971283, + "loss": 1.0756, + "nll_loss": 0.9159650802612305, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.060317374765872955, + "rewards/margins": -0.01334420870989561, + "rewards/rejected": -0.04697316139936447, + "step": 1340 + }, + { + "epoch": 0.7995262066923304, + "grad_norm": 12.0, + "learning_rate": 5.863668255066492e-08, + "log_odds_chosen": -0.2177290916442871, + "log_odds_ratio": -0.8585535287857056, + "logits/chosen": -2.262441396713257, + "logits/rejected": -2.231968402862549, + "logps/chosen": -0.5860260128974915, + "logps/rejected": -0.47981762886047363, + "loss": 1.0081, + "nll_loss": 0.9461213946342468, + "rewards/accuracies": 0.3687500059604645, + "rewards/chosen": -0.05860259383916855, + "rewards/margins": -0.010620838031172752, + "rewards/rejected": -0.047981761395931244, + "step": 1350 + }, + { + "epoch": 0.8054486230381996, + "grad_norm": 9.625, + "learning_rate": 5.53506761711274e-08, + "log_odds_chosen": -0.21258850395679474, + "log_odds_ratio": -0.8654868006706238, + "logits/chosen": -2.2940022945404053, + "logits/rejected": -2.264361619949341, + "logps/chosen": -0.5948741436004639, + "logps/rejected": -0.48127132654190063, + "loss": 1.0435, + "nll_loss": 1.0004308223724365, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.059487421065568924, + "rewards/margins": -0.0113602876663208, + "rewards/rejected": -0.04812713339924812, + "step": 1360 + }, + { + "epoch": 0.8113710393840687, + "grad_norm": 14.375, + "learning_rate": 5.2147926956177174e-08, + "log_odds_chosen": -0.3361436724662781, + "log_odds_ratio": -0.9543386697769165, + "logits/chosen": -2.2842912673950195, + "logits/rejected": -2.2753098011016846, + "logps/chosen": -0.6304486989974976, + "logps/rejected": -0.4559609293937683, + "loss": 1.0422, + "nll_loss": 0.9697739481925964, + "rewards/accuracies": 0.35624998807907104, + "rewards/chosen": -0.06304488331079483, + "rewards/margins": -0.0174487866461277, + "rewards/rejected": -0.04559609293937683, + "step": 1370 + }, + { + "epoch": 0.8172934557299378, + "grad_norm": 22.625, + "learning_rate": 4.902980481459834e-08, + "log_odds_chosen": -0.18400034308433533, + "log_odds_ratio": -0.8533352017402649, + "logits/chosen": -2.267984390258789, + "logits/rejected": -2.240002155303955, + "logps/chosen": -0.5833351016044617, + "logps/rejected": -0.4882822632789612, + "loss": 1.0013, + "nll_loss": 0.9279516935348511, + "rewards/accuracies": 0.40625, + "rewards/chosen": -0.058333516120910645, + "rewards/margins": -0.009505288675427437, + "rewards/rejected": -0.04882822558283806, + "step": 1380 + }, + { + "epoch": 0.8232158720758069, + "grad_norm": 11.8125, + "learning_rate": 4.5997643457719646e-08, + "log_odds_chosen": -0.2714422643184662, + "log_odds_ratio": -0.8982048034667969, + "logits/chosen": -2.2855401039123535, + "logits/rejected": -2.2796995639801025, + "logps/chosen": -0.5933629274368286, + "logps/rejected": -0.45899391174316406, + "loss": 0.9938, + "nll_loss": 0.9157652854919434, + "rewards/accuracies": 0.4124999940395355, + "rewards/chosen": -0.05933629721403122, + "rewards/margins": -0.01343690324574709, + "rewards/rejected": -0.045899391174316406, + "step": 1390 + }, + { + "epoch": 0.829138288421676, + "grad_norm": 11.125, + "learning_rate": 4.305273982894772e-08, + "log_odds_chosen": -0.24461349844932556, + "log_odds_ratio": -0.8896273374557495, + "logits/chosen": -2.3211405277252197, + "logits/rejected": -2.279554843902588, + "logps/chosen": -0.6189180612564087, + "logps/rejected": -0.4841720461845398, + "loss": 1.041, + "nll_loss": 0.9456349611282349, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.06189180538058281, + "rewards/margins": -0.013474604114890099, + "rewards/rejected": -0.04841720312833786, + "step": 1400 + }, + { + "epoch": 0.8350607047675451, + "grad_norm": 11.8125, + "learning_rate": 4.0196353549026786e-08, + "log_odds_chosen": -0.1991504579782486, + "log_odds_ratio": -0.8548718690872192, + "logits/chosen": -2.288534641265869, + "logits/rejected": -2.2532122135162354, + "logps/chosen": -0.5849851965904236, + "logps/rejected": -0.48299694061279297, + "loss": 1.0681, + "nll_loss": 1.0149555206298828, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.05849852040410042, + "rewards/margins": -0.010198831558227539, + "rewards/rejected": -0.04829969257116318, + "step": 1410 + }, + { + "epoch": 0.8409831211134142, + "grad_norm": 9.625, + "learning_rate": 3.742970637726181e-08, + "log_odds_chosen": -0.09389691054821014, + "log_odds_ratio": -0.8085994720458984, + "logits/chosen": -2.3118512630462646, + "logits/rejected": -2.2662172317504883, + "logps/chosen": -0.5374116897583008, + "logps/rejected": -0.4831947386264801, + "loss": 1.0166, + "nll_loss": 0.9142959713935852, + "rewards/accuracies": 0.46875, + "rewards/chosen": -0.05374116823077202, + "rewards/margins": -0.005421696230769157, + "rewards/rejected": -0.04831947013735771, + "step": 1420 + }, + { + "epoch": 0.8469055374592834, + "grad_norm": 13.0, + "learning_rate": 3.4753981688937284e-08, + "log_odds_chosen": -0.23033122718334198, + "log_odds_ratio": -0.8797691464424133, + "logits/chosen": -2.2840065956115723, + "logits/rejected": -2.2577414512634277, + "logps/chosen": -0.5791336297988892, + "logps/rejected": -0.46595969796180725, + "loss": 1.0562, + "nll_loss": 0.9663812518119812, + "rewards/accuracies": 0.40625, + "rewards/chosen": -0.05791335552930832, + "rewards/margins": -0.011317392811179161, + "rewards/rejected": -0.046595968306064606, + "step": 1430 + }, + { + "epoch": 0.8528279538051525, + "grad_norm": 16.5, + "learning_rate": 3.217032396915265e-08, + "log_odds_chosen": -0.28934675455093384, + "log_odds_ratio": -0.925268292427063, + "logits/chosen": -2.294243335723877, + "logits/rejected": -2.26255464553833, + "logps/chosen": -0.6588538885116577, + "logps/rejected": -0.4810880720615387, + "loss": 1.0625, + "nll_loss": 0.9974772334098816, + "rewards/accuracies": 0.4312500059604645, + "rewards/chosen": -0.06588538736104965, + "rewards/margins": -0.01777658611536026, + "rewards/rejected": -0.04810880497097969, + "step": 1440 + }, + { + "epoch": 0.8587503701510216, + "grad_norm": 13.5625, + "learning_rate": 2.9679838323293404e-08, + "log_odds_chosen": -0.30326423048973083, + "log_odds_ratio": -0.9402921795845032, + "logits/chosen": -2.285403251647949, + "logits/rejected": -2.2570960521698, + "logps/chosen": -0.6499019265174866, + "logps/rejected": -0.485442578792572, + "loss": 1.0074, + "nll_loss": 0.9335571527481079, + "rewards/accuracies": 0.39375001192092896, + "rewards/chosen": -0.06499020010232925, + "rewards/margins": -0.016445934772491455, + "rewards/rejected": -0.0485442578792572, + "step": 1450 + }, + { + "epoch": 0.8646727864968907, + "grad_norm": 11.875, + "learning_rate": 2.728359000434488e-08, + "log_odds_chosen": -0.25829392671585083, + "log_odds_ratio": -0.8930153846740723, + "logits/chosen": -2.316516399383545, + "logits/rejected": -2.283731460571289, + "logps/chosen": -0.5545108318328857, + "logps/rejected": -0.4498627185821533, + "loss": 1.048, + "nll_loss": 0.9053192138671875, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.055451083928346634, + "rewards/margins": -0.010464807972311974, + "rewards/rejected": -0.04498627781867981, + "step": 1460 + }, + { + "epoch": 0.8705952028427598, + "grad_norm": 10.75, + "learning_rate": 2.498260395725302e-08, + "log_odds_chosen": -0.25851163268089294, + "log_odds_ratio": -0.8944876790046692, + "logits/chosen": -2.281040906906128, + "logits/rejected": -2.26870059967041, + "logps/chosen": -0.6054626703262329, + "logps/rejected": -0.48731446266174316, + "loss": 1.0483, + "nll_loss": 0.9450349807739258, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.060546260327100754, + "rewards/margins": -0.011814813129603863, + "rewards/rejected": -0.048731446266174316, + "step": 1470 + }, + { + "epoch": 0.8765176191886289, + "grad_norm": 10.3125, + "learning_rate": 2.2777864380525426e-08, + "log_odds_chosen": -0.20190663635730743, + "log_odds_ratio": -0.8694218397140503, + "logits/chosen": -2.288378953933716, + "logits/rejected": -2.2683846950531006, + "logps/chosen": -0.5955201387405396, + "logps/rejected": -0.4755355417728424, + "loss": 1.0093, + "nll_loss": 0.8863022923469543, + "rewards/accuracies": 0.4625000059604645, + "rewards/chosen": -0.059552013874053955, + "rewards/margins": -0.01199845876544714, + "rewards/rejected": -0.04755355045199394, + "step": 1480 + }, + { + "epoch": 0.8824400355344981, + "grad_norm": 11.125, + "learning_rate": 2.0670314305261423e-08, + "log_odds_chosen": -0.21881277859210968, + "log_odds_ratio": -0.8681440353393555, + "logits/chosen": -2.3011648654937744, + "logits/rejected": -2.2739992141723633, + "logps/chosen": -0.5647403597831726, + "logps/rejected": -0.46096763014793396, + "loss": 0.9903, + "nll_loss": 0.9155017733573914, + "rewards/accuracies": 0.4437499940395355, + "rewards/chosen": -0.05647404119372368, + "rewards/margins": -0.010377271100878716, + "rewards/rejected": -0.046096768230199814, + "step": 1490 + }, + { + "epoch": 0.8883624518803672, + "grad_norm": 15.4375, + "learning_rate": 1.866085519178995e-08, + "log_odds_chosen": -0.21367135643959045, + "log_odds_ratio": -0.8818863034248352, + "logits/chosen": -2.283823013305664, + "logits/rejected": -2.262935161590576, + "logps/chosen": -0.6190184354782104, + "logps/rejected": -0.518616259098053, + "loss": 1.0823, + "nll_loss": 1.0067201852798462, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.06190184876322746, + "rewards/margins": -0.010040223598480225, + "rewards/rejected": -0.05186162516474724, + "step": 1500 + }, + { + "epoch": 0.8942848682262363, + "grad_norm": 10.0625, + "learning_rate": 1.675034654408894e-08, + "log_odds_chosen": -0.2969823479652405, + "log_odds_ratio": -0.9049533605575562, + "logits/chosen": -2.3211445808410645, + "logits/rejected": -2.293593168258667, + "logps/chosen": -0.5710967779159546, + "logps/rejected": -0.44247856736183167, + "loss": 1.0186, + "nll_loss": 0.9544011354446411, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.057109683752059937, + "rewards/margins": -0.012861823663115501, + "rewards/rejected": -0.044247858226299286, + "step": 1510 + }, + { + "epoch": 0.9002072845721054, + "grad_norm": 11.8125, + "learning_rate": 1.4939605542150595e-08, + "log_odds_chosen": -0.20066659152507782, + "log_odds_ratio": -0.880477249622345, + "logits/chosen": -2.306097984313965, + "logits/rejected": -2.2691056728363037, + "logps/chosen": -0.630598247051239, + "logps/rejected": -0.5075589418411255, + "loss": 1.0954, + "nll_loss": 0.9971143007278442, + "rewards/accuracies": 0.48124998807907104, + "rewards/chosen": -0.06305982172489166, + "rewards/margins": -0.01230393536388874, + "rewards/rejected": -0.05075589567422867, + "step": 1520 + }, + { + "epoch": 0.9061297009179745, + "grad_norm": 11.0, + "learning_rate": 1.3229406692449791e-08, + "log_odds_chosen": -0.14233054220676422, + "log_odds_ratio": -0.8427847623825073, + "logits/chosen": -2.2426674365997314, + "logits/rejected": -2.2174274921417236, + "logps/chosen": -0.5756295919418335, + "logps/rejected": -0.4939804971218109, + "loss": 1.0628, + "nll_loss": 0.9542373418807983, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": -0.05756296589970589, + "rewards/margins": -0.008164914324879646, + "rewards/rejected": -0.04939804598689079, + "step": 1530 + }, + { + "epoch": 0.9120521172638436, + "grad_norm": 10.6875, + "learning_rate": 1.162048149666503e-08, + "log_odds_chosen": -0.209940105676651, + "log_odds_ratio": -0.8944632411003113, + "logits/chosen": -2.2973880767822266, + "logits/rejected": -2.255645990371704, + "logps/chosen": -0.6111503839492798, + "logps/rejected": -0.49909108877182007, + "loss": 1.0425, + "nll_loss": 0.9487366676330566, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.061115045100450516, + "rewards/margins": -0.011205929331481457, + "rewards/rejected": -0.049909114837646484, + "step": 1540 + }, + { + "epoch": 0.9179745336097128, + "grad_norm": 9.8125, + "learning_rate": 1.0113518138794047e-08, + "log_odds_chosen": -0.25878992676734924, + "log_odds_ratio": -0.899122416973114, + "logits/chosen": -2.2492969036102295, + "logits/rejected": -2.2273764610290527, + "logps/chosen": -0.5986303091049194, + "logps/rejected": -0.47187572717666626, + "loss": 1.0612, + "nll_loss": 0.9412651062011719, + "rewards/accuracies": 0.40625, + "rewards/chosen": -0.059863023459911346, + "rewards/margins": -0.012675456702709198, + "rewards/rejected": -0.04718757048249245, + "step": 1550 + }, + { + "epoch": 0.9238969499555819, + "grad_norm": 10.375, + "learning_rate": 8.709161190797565e-09, + "log_odds_chosen": -0.14045746624469757, + "log_odds_ratio": -0.8357732892036438, + "logits/chosen": -2.3169333934783936, + "logits/rejected": -2.2879374027252197, + "logps/chosen": -0.5618830919265747, + "logps/rejected": -0.4810701012611389, + "loss": 1.031, + "nll_loss": 0.9191296696662903, + "rewards/accuracies": 0.4625000059604645, + "rewards/chosen": -0.05618830770254135, + "rewards/margins": -0.008081300184130669, + "rewards/rejected": -0.04810700938105583, + "step": 1560 + }, + { + "epoch": 0.929819366301451, + "grad_norm": 15.3125, + "learning_rate": 7.408011336897141e-09, + "log_odds_chosen": -0.323073148727417, + "log_odds_ratio": -0.9851021766662598, + "logits/chosen": -2.3374483585357666, + "logits/rejected": -2.3257203102111816, + "logps/chosen": -0.7131141424179077, + "logps/rejected": -0.4996616244316101, + "loss": 1.0776, + "nll_loss": 1.01613450050354, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.07131141424179077, + "rewards/margins": -0.02134525403380394, + "rewards/rejected": -0.04996616020798683, + "step": 1570 + }, + { + "epoch": 0.9357417826473201, + "grad_norm": 9.5625, + "learning_rate": 6.210625116645135e-09, + "log_odds_chosen": -0.32444125413894653, + "log_odds_ratio": -0.9329547882080078, + "logits/chosen": -2.342031955718994, + "logits/rejected": -2.3026318550109863, + "logps/chosen": -0.6195459365844727, + "logps/rejected": -0.45777615904808044, + "loss": 1.0033, + "nll_loss": 0.8760407567024231, + "rewards/accuracies": 0.38749998807907104, + "rewards/chosen": -0.061954595148563385, + "rewards/margins": -0.0161769799888134, + "rewards/rejected": -0.04577761888504028, + "step": 1580 + }, + { + "epoch": 0.9416641989931892, + "grad_norm": 10.625, + "learning_rate": 5.117514686876378e-09, + "log_odds_chosen": -0.20949645340442657, + "log_odds_ratio": -0.8756229281425476, + "logits/chosen": -2.30104398727417, + "logits/rejected": -2.2671799659729004, + "logps/chosen": -0.5797516703605652, + "logps/rejected": -0.4763546586036682, + "loss": 1.0455, + "nll_loss": 0.9568120837211609, + "rewards/accuracies": 0.4312500059604645, + "rewards/chosen": -0.057975172996520996, + "rewards/margins": -0.010339704342186451, + "rewards/rejected": -0.04763546586036682, + "step": 1590 + }, + { + "epoch": 0.9475866153390583, + "grad_norm": 10.0, + "learning_rate": 4.1291476026441565e-09, + "log_odds_chosen": -0.14046767354011536, + "log_odds_ratio": -0.8268812894821167, + "logits/chosen": -2.2659006118774414, + "logits/rejected": -2.245576858520508, + "logps/chosen": -0.5697029829025269, + "logps/rejected": -0.4846652150154114, + "loss": 0.9915, + "nll_loss": 0.8766274452209473, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.056970298290252686, + "rewards/margins": -0.00850378442555666, + "rewards/rejected": -0.0484665185213089, + "step": 1600 + }, + { + "epoch": 0.9535090316849274, + "grad_norm": 9.8125, + "learning_rate": 3.2459466172331253e-09, + "log_odds_chosen": -0.25180304050445557, + "log_odds_ratio": -0.9306501150131226, + "logits/chosen": -2.274780035018921, + "logits/rejected": -2.255272626876831, + "logps/chosen": -0.6529628038406372, + "logps/rejected": -0.48409169912338257, + "loss": 1.0873, + "nll_loss": 0.9862693548202515, + "rewards/accuracies": 0.40625, + "rewards/chosen": -0.06529629230499268, + "rewards/margins": -0.016887117177248, + "rewards/rejected": -0.048409171402454376, + "step": 1610 + }, + { + "epoch": 0.9594314480307966, + "grad_norm": 13.125, + "learning_rate": 2.4682895013354854e-09, + "log_odds_chosen": -0.230398491024971, + "log_odds_ratio": -0.8930587768554688, + "logits/chosen": -2.2783544063568115, + "logits/rejected": -2.2587246894836426, + "logps/chosen": -0.6128379702568054, + "logps/rejected": -0.46944743394851685, + "loss": 1.0177, + "nll_loss": 0.9610903859138489, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.06128380447626114, + "rewards/margins": -0.014339059591293335, + "rewards/rejected": -0.0469447486102581, + "step": 1620 + }, + { + "epoch": 0.9653538643766657, + "grad_norm": 25.0, + "learning_rate": 1.7965088814675677e-09, + "log_odds_chosen": -0.3568347692489624, + "log_odds_ratio": -0.9671844244003296, + "logits/chosen": -2.2762491703033447, + "logits/rejected": -2.2589855194091797, + "logps/chosen": -0.6517866253852844, + "logps/rejected": -0.4649588167667389, + "loss": 1.037, + "nll_loss": 0.9754410982131958, + "rewards/accuracies": 0.34375, + "rewards/chosen": -0.06517866253852844, + "rewards/margins": -0.018682777881622314, + "rewards/rejected": -0.04649588465690613, + "step": 1630 + }, + { + "epoch": 0.9712762807225348, + "grad_norm": 10.8125, + "learning_rate": 1.2308920976958348e-09, + "log_odds_chosen": -0.1785418540239334, + "log_odds_ratio": -0.8583124876022339, + "logits/chosen": -2.2591869831085205, + "logits/rejected": -2.2376914024353027, + "logps/chosen": -0.5955510139465332, + "logps/rejected": -0.4926881790161133, + "loss": 1.0026, + "nll_loss": 0.9147430658340454, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": -0.05955510213971138, + "rewards/margins": -0.010286283679306507, + "rewards/rejected": -0.04926881566643715, + "step": 1640 + }, + { + "epoch": 0.9771986970684039, + "grad_norm": 11.9375, + "learning_rate": 7.716810807330276e-10, + "log_odds_chosen": -0.30667099356651306, + "log_odds_ratio": -0.9143903851509094, + "logits/chosen": -2.2759385108947754, + "logits/rejected": -2.2378878593444824, + "logps/chosen": -0.6037041544914246, + "logps/rejected": -0.45009493827819824, + "loss": 1.0344, + "nll_loss": 0.93921959400177, + "rewards/accuracies": 0.35624998807907104, + "rewards/chosen": -0.06037042289972305, + "rewards/margins": -0.015360923483967781, + "rewards/rejected": -0.045009493827819824, + "step": 1650 + }, + { + "epoch": 0.983121113414273, + "grad_norm": 17.25, + "learning_rate": 4.190722484575804e-10, + "log_odds_chosen": -0.24070534110069275, + "log_odds_ratio": -0.9141713976860046, + "logits/chosen": -2.285658597946167, + "logits/rejected": -2.2573189735412598, + "logps/chosen": -0.6545957326889038, + "logps/rejected": -0.4952670931816101, + "loss": 1.0545, + "nll_loss": 0.9895190000534058, + "rewards/accuracies": 0.4437499940395355, + "rewards/chosen": -0.06545957177877426, + "rewards/margins": -0.01593286357820034, + "rewards/rejected": -0.04952671006321907, + "step": 1660 + }, + { + "epoch": 0.9890435297601421, + "grad_norm": 9.1875, + "learning_rate": 1.732164218998522e-10, + "log_odds_chosen": -0.2650103271007538, + "log_odds_ratio": -0.8960719108581543, + "logits/chosen": -2.2581698894500732, + "logits/rejected": -2.2162814140319824, + "logps/chosen": -0.6056646704673767, + "logps/rejected": -0.4769059717655182, + "loss": 1.0107, + "nll_loss": 0.9156764149665833, + "rewards/accuracies": 0.39375001192092896, + "rewards/chosen": -0.06056647375226021, + "rewards/margins": -0.0128758754581213, + "rewards/rejected": -0.04769059270620346, + "step": 1670 + }, + { + "epoch": 0.9949659461060113, + "grad_norm": 12.0, + "learning_rate": 3.4218760731730136e-11, + "log_odds_chosen": -0.21042411029338837, + "log_odds_ratio": -0.8711256980895996, + "logits/chosen": -2.333160638809204, + "logits/rejected": -2.2931103706359863, + "logps/chosen": -0.5873175859451294, + "logps/rejected": -0.47885292768478394, + "loss": 1.066, + "nll_loss": 0.9840106964111328, + "rewards/accuracies": 0.40625, + "rewards/chosen": -0.05873175337910652, + "rewards/margins": -0.010846461169421673, + "rewards/rejected": -0.047885291278362274, + "step": 1680 + }, + { + "epoch": 0.9997038791827065, + "step": 1688, + "total_flos": 0.0, + "train_loss": 1.076995034918401, + "train_runtime": 25716.0251, + "train_samples_per_second": 2.101, + "train_steps_per_second": 0.066 + } + ], + "logging_steps": 10, + "max_steps": 1688, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}