{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999080761654629, "eval_steps": 500, "global_step": 951, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005252790544977019, "grad_norm": 33.25, "learning_rate": 1.0416666666666667e-07, "log_odds_chosen": -0.12333051860332489, "log_odds_ratio": -0.8621311187744141, "logits/chosen": -2.540858030319214, "logits/rejected": -2.1144332885742188, "logps/chosen": -1.1002752780914307, "logps/rejected": -1.0134268999099731, "loss": 2.3046, "nll_loss": 1.4424240589141846, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -1.1002752780914307, "rewards/margins": -0.08684836328029633, "rewards/rejected": -1.0134268999099731, "step": 5 }, { "epoch": 0.010505581089954037, "grad_norm": 30.125, "learning_rate": 2.0833333333333333e-07, "log_odds_chosen": -0.1254591941833496, "log_odds_ratio": -0.8488509058952332, "logits/chosen": -2.521646022796631, "logits/rejected": -2.12934947013855, "logps/chosen": -1.0548789501190186, "logps/rejected": -0.9548781514167786, "loss": 2.2337, "nll_loss": 1.3848837614059448, "rewards/accuracies": 0.453125, "rewards/chosen": -1.0548789501190186, "rewards/margins": -0.10000075399875641, "rewards/rejected": -0.9548781514167786, "step": 10 }, { "epoch": 0.015758371634931056, "grad_norm": 28.25, "learning_rate": 3.1249999999999997e-07, "log_odds_chosen": -0.092379130423069, "log_odds_ratio": -0.839794933795929, "logits/chosen": -2.496335744857788, "logits/rejected": -2.134352445602417, "logps/chosen": -1.0547659397125244, "logps/rejected": -0.9888293147087097, "loss": 2.2323, "nll_loss": 1.3924893140792847, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -1.0547659397125244, "rewards/margins": -0.06593648344278336, "rewards/rejected": -0.9888293147087097, "step": 15 }, { "epoch": 0.021011162179908074, "grad_norm": 27.75, "learning_rate": 4.1666666666666667e-07, "log_odds_chosen": -0.08341892063617706, "log_odds_ratio": -0.845537006855011, "logits/chosen": -2.502532720565796, "logits/rejected": -2.0534327030181885, "logps/chosen": -1.0713450908660889, "logps/rejected": -1.0228570699691772, "loss": 2.2615, "nll_loss": 1.415948748588562, "rewards/accuracies": 0.46875, "rewards/chosen": -1.0713450908660889, "rewards/margins": -0.04848797246813774, "rewards/rejected": -1.0228570699691772, "step": 20 }, { "epoch": 0.026263952724885097, "grad_norm": 31.625, "learning_rate": 5.208333333333334e-07, "log_odds_chosen": -0.05041329935193062, "log_odds_ratio": -0.8150845766067505, "logits/chosen": -2.3506855964660645, "logits/rejected": -2.041471481323242, "logps/chosen": -1.0723893642425537, "logps/rejected": -1.0427805185317993, "loss": 2.2269, "nll_loss": 1.411767601966858, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -1.0723893642425537, "rewards/margins": -0.029608914628624916, "rewards/rejected": -1.0427805185317993, "step": 25 }, { "epoch": 0.03151674326986211, "grad_norm": 34.75, "learning_rate": 6.249999999999999e-07, "log_odds_chosen": -0.16907325387001038, "log_odds_ratio": -0.8892423510551453, "logits/chosen": -2.4877123832702637, "logits/rejected": -2.091643810272217, "logps/chosen": -1.0780900716781616, "logps/rejected": -0.960413932800293, "loss": 2.2862, "nll_loss": 1.3969789743423462, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -1.0780900716781616, "rewards/margins": -0.1176760345697403, "rewards/rejected": -0.960413932800293, "step": 30 }, { "epoch": 0.036769533814839134, "grad_norm": 24.625, "learning_rate": 7.291666666666666e-07, "log_odds_chosen": -0.12296156585216522, "log_odds_ratio": -0.8445537686347961, "logits/chosen": -2.460153579711914, "logits/rejected": -2.100581169128418, "logps/chosen": -0.9918639063835144, "logps/rejected": -0.8978347778320312, "loss": 2.1014, "nll_loss": 1.256840467453003, "rewards/accuracies": 0.43437498807907104, "rewards/chosen": -0.9918639063835144, "rewards/margins": -0.09402903914451599, "rewards/rejected": -0.8978347778320312, "step": 35 }, { "epoch": 0.04202232435981615, "grad_norm": 20.375, "learning_rate": 8.333333333333333e-07, "log_odds_chosen": -0.10392768681049347, "log_odds_ratio": -0.8281729817390442, "logits/chosen": -2.4672484397888184, "logits/rejected": -2.1189260482788086, "logps/chosen": -0.9796692132949829, "logps/rejected": -0.8947553634643555, "loss": 2.0709, "nll_loss": 1.2427122592926025, "rewards/accuracies": 0.4781250059604645, "rewards/chosen": -0.9796692132949829, "rewards/margins": -0.08491390943527222, "rewards/rejected": -0.8947553634643555, "step": 40 }, { "epoch": 0.04727511490479317, "grad_norm": 25.75, "learning_rate": 9.374999999999999e-07, "log_odds_chosen": -0.07403279840946198, "log_odds_ratio": -0.8119841814041138, "logits/chosen": -2.5748581886291504, "logits/rejected": -2.2311367988586426, "logps/chosen": -0.9425970911979675, "logps/rejected": -0.8925843238830566, "loss": 1.966, "nll_loss": 1.1540277004241943, "rewards/accuracies": 0.4781250059604645, "rewards/chosen": -0.9425970911979675, "rewards/margins": -0.05001285672187805, "rewards/rejected": -0.8925843238830566, "step": 45 }, { "epoch": 0.05252790544977019, "grad_norm": 15.5625, "learning_rate": 1.0416666666666667e-06, "log_odds_chosen": -0.015203160233795643, "log_odds_ratio": -0.7965196371078491, "logits/chosen": -2.517662763595581, "logits/rejected": -2.291977882385254, "logps/chosen": -1.0069010257720947, "logps/rejected": -0.9928563833236694, "loss": 2.001, "nll_loss": 1.2044353485107422, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -1.0069010257720947, "rewards/margins": -0.014044714160263538, "rewards/rejected": -0.9928563833236694, "step": 50 }, { "epoch": 0.05778069599474721, "grad_norm": 19.125, "learning_rate": 1.1458333333333333e-06, "log_odds_chosen": -0.06918958574533463, "log_odds_ratio": -0.8064200282096863, "logits/chosen": -2.7286930084228516, "logits/rejected": -2.3158278465270996, "logps/chosen": -0.9621369242668152, "logps/rejected": -0.9042080044746399, "loss": 1.9673, "nll_loss": 1.1608707904815674, "rewards/accuracies": 0.5, "rewards/chosen": -0.9621369242668152, "rewards/margins": -0.05792900174856186, "rewards/rejected": -0.9042080044746399, "step": 55 }, { "epoch": 0.06303348653972422, "grad_norm": 20.375, "learning_rate": 1.2499999999999999e-06, "log_odds_chosen": -0.055296190083026886, "log_odds_ratio": -0.795842170715332, "logits/chosen": -2.733304500579834, "logits/rejected": -2.257201671600342, "logps/chosen": -0.9258626699447632, "logps/rejected": -0.8971433639526367, "loss": 1.9557, "nll_loss": 1.159847378730774, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.9258626699447632, "rewards/margins": -0.028719374909996986, "rewards/rejected": -0.8971433639526367, "step": 60 }, { "epoch": 0.06828627708470125, "grad_norm": 18.25, "learning_rate": 1.3541666666666667e-06, "log_odds_chosen": -0.05717029422521591, "log_odds_ratio": -0.7737418413162231, "logits/chosen": -2.6654744148254395, "logits/rejected": -2.187049627304077, "logps/chosen": -0.8003360033035278, "logps/rejected": -0.7723677754402161, "loss": 1.8696, "nll_loss": 1.0958433151245117, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.8003360033035278, "rewards/margins": -0.02796824648976326, "rewards/rejected": -0.7723677754402161, "step": 65 }, { "epoch": 0.07353906762967827, "grad_norm": 19.375, "learning_rate": 1.4583333333333333e-06, "log_odds_chosen": 0.002531373407691717, "log_odds_ratio": -0.7339381575584412, "logits/chosen": -2.5733718872070312, "logits/rejected": -2.1028685569763184, "logps/chosen": -0.7143228054046631, "logps/rejected": -0.718761146068573, "loss": 1.7947, "nll_loss": 1.0607960224151611, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.7143228054046631, "rewards/margins": 0.004438319243490696, "rewards/rejected": -0.718761146068573, "step": 70 }, { "epoch": 0.07879185817465528, "grad_norm": 15.9375, "learning_rate": 1.5624999999999999e-06, "log_odds_chosen": 0.06011660769581795, "log_odds_ratio": -0.7009418606758118, "logits/chosen": -2.5496840476989746, "logits/rejected": -2.0580315589904785, "logps/chosen": -0.6317678689956665, "logps/rejected": -0.6753242611885071, "loss": 1.6452, "nll_loss": 0.9442570805549622, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.6317678689956665, "rewards/margins": 0.04355642572045326, "rewards/rejected": -0.6753242611885071, "step": 75 }, { "epoch": 0.0840446487196323, "grad_norm": 14.875, "learning_rate": 1.6666666666666667e-06, "log_odds_chosen": 0.10804717242717743, "log_odds_ratio": -0.6780250072479248, "logits/chosen": -2.371317148208618, "logits/rejected": -1.9558740854263306, "logps/chosen": -0.5971282124519348, "logps/rejected": -0.6553691029548645, "loss": 1.6518, "nll_loss": 0.9737834930419922, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.5971282124519348, "rewards/margins": 0.05824087932705879, "rewards/rejected": -0.6553691029548645, "step": 80 }, { "epoch": 0.08929743926460933, "grad_norm": 15.375, "learning_rate": 1.7708333333333332e-06, "log_odds_chosen": 0.13051114976406097, "log_odds_ratio": -0.6608899235725403, "logits/chosen": -2.441239833831787, "logits/rejected": -2.080503225326538, "logps/chosen": -0.5396751165390015, "logps/rejected": -0.6057919263839722, "loss": 1.6033, "nll_loss": 0.9424022436141968, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.5396751165390015, "rewards/margins": 0.06611678004264832, "rewards/rejected": -0.6057919263839722, "step": 85 }, { "epoch": 0.09455022980958634, "grad_norm": 15.5625, "learning_rate": 1.8749999999999998e-06, "log_odds_chosen": 0.19523096084594727, "log_odds_ratio": -0.6398605108261108, "logits/chosen": -2.388965606689453, "logits/rejected": -2.051954507827759, "logps/chosen": -0.514168381690979, "logps/rejected": -0.6006937623023987, "loss": 1.5701, "nll_loss": 0.9302393794059753, "rewards/accuracies": 0.6468750238418579, "rewards/chosen": -0.514168381690979, "rewards/margins": 0.08652535825967789, "rewards/rejected": -0.6006937623023987, "step": 90 }, { "epoch": 0.09980302035456336, "grad_norm": 10.625, "learning_rate": 1.9791666666666666e-06, "log_odds_chosen": 0.12450599670410156, "log_odds_ratio": -0.6654147505760193, "logits/chosen": -2.3805699348449707, "logits/rejected": -2.010688304901123, "logps/chosen": -0.49114733934402466, "logps/rejected": -0.5494757890701294, "loss": 1.5446, "nll_loss": 0.8791642189025879, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.49114733934402466, "rewards/margins": 0.05832843855023384, "rewards/rejected": -0.5494757890701294, "step": 95 }, { "epoch": 0.10505581089954039, "grad_norm": 7.8125, "learning_rate": 1.9998919935516766e-06, "log_odds_chosen": 0.17239874601364136, "log_odds_ratio": -0.6507178544998169, "logits/chosen": -2.2754485607147217, "logits/rejected": -2.040553569793701, "logps/chosen": -0.485573947429657, "logps/rejected": -0.5674648284912109, "loss": 1.4726, "nll_loss": 0.8218661546707153, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.485573947429657, "rewards/margins": 0.08189092576503754, "rewards/rejected": -0.5674648284912109, "step": 100 }, { "epoch": 0.1103086014445174, "grad_norm": 8.3125, "learning_rate": 1.999453257340926e-06, "log_odds_chosen": 0.2180129736661911, "log_odds_ratio": -0.6303091049194336, "logits/chosen": -2.4427425861358643, "logits/rejected": -2.181597948074341, "logps/chosen": -0.4835621416568756, "logps/rejected": -0.5780085325241089, "loss": 1.4945, "nll_loss": 0.8642352223396301, "rewards/accuracies": 0.6343749761581421, "rewards/chosen": -0.4835621416568756, "rewards/margins": 0.09444637596607208, "rewards/rejected": -0.5780085325241089, "step": 105 }, { "epoch": 0.11556139198949442, "grad_norm": 8.0, "learning_rate": 1.998677188931617e-06, "log_odds_chosen": 0.27974802255630493, "log_odds_ratio": -0.6000305414199829, "logits/chosen": -2.4073500633239746, "logits/rejected": -2.158104419708252, "logps/chosen": -0.4692881107330322, "logps/rejected": -0.5915614366531372, "loss": 1.5236, "nll_loss": 0.9235590100288391, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.4692881107330322, "rewards/margins": 0.12227334082126617, "rewards/rejected": -0.5915614366531372, "step": 110 }, { "epoch": 0.12081418253447143, "grad_norm": 7.9375, "learning_rate": 1.997564050259824e-06, "log_odds_chosen": 0.28100112080574036, "log_odds_ratio": -0.601650595664978, "logits/chosen": -2.3918166160583496, "logits/rejected": -2.029897689819336, "logps/chosen": -0.4723443388938904, "logps/rejected": -0.5918693542480469, "loss": 1.5166, "nll_loss": 0.9149250984191895, "rewards/accuracies": 0.671875, "rewards/chosen": -0.4723443388938904, "rewards/margins": 0.11952495574951172, "rewards/rejected": -0.5918693542480469, "step": 115 }, { "epoch": 0.12606697307944845, "grad_norm": 8.8125, "learning_rate": 1.996114217028476e-06, "log_odds_chosen": 0.25655943155288696, "log_odds_ratio": -0.6146520376205444, "logits/chosen": -2.470524311065674, "logits/rejected": -2.134540557861328, "logps/chosen": -0.477255642414093, "logps/rejected": -0.5925866961479187, "loss": 1.5111, "nll_loss": 0.8964211344718933, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.477255642414093, "rewards/margins": 0.11533106863498688, "rewards/rejected": -0.5925866961479187, "step": 120 }, { "epoch": 0.1313197636244255, "grad_norm": 8.75, "learning_rate": 1.994328178580548e-06, "log_odds_chosen": 0.2803216576576233, "log_odds_ratio": -0.601326584815979, "logits/chosen": -2.367903232574463, "logits/rejected": -2.018990993499756, "logps/chosen": -0.46639877557754517, "logps/rejected": -0.5851758718490601, "loss": 1.481, "nll_loss": 0.8796539306640625, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.46639877557754517, "rewards/margins": 0.11877720057964325, "rewards/rejected": -0.5851758718490601, "step": 125 }, { "epoch": 0.1365725541694025, "grad_norm": 8.3125, "learning_rate": 1.9922065377339033e-06, "log_odds_chosen": 0.2894327640533447, "log_odds_ratio": -0.6087297201156616, "logits/chosen": -2.5040173530578613, "logits/rejected": -2.2061374187469482, "logps/chosen": -0.4694454073905945, "logps/rejected": -0.5906943678855896, "loss": 1.4968, "nll_loss": 0.888100266456604, "rewards/accuracies": 0.6656249761581421, "rewards/chosen": -0.4694454073905945, "rewards/margins": 0.12124893814325333, "rewards/rejected": -0.5906943678855896, "step": 130 }, { "epoch": 0.14182534471437952, "grad_norm": 7.375, "learning_rate": 1.98975001057783e-06, "log_odds_chosen": 0.30140143632888794, "log_odds_ratio": -0.5964145660400391, "logits/chosen": -2.4213032722473145, "logits/rejected": -2.004279375076294, "logps/chosen": -0.44823235273361206, "logps/rejected": -0.583377480506897, "loss": 1.4442, "nll_loss": 0.8478012084960938, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.44823235273361206, "rewards/margins": 0.13514509797096252, "rewards/rejected": -0.583377480506897, "step": 135 }, { "epoch": 0.14707813525935653, "grad_norm": 9.4375, "learning_rate": 1.986959426231349e-06, "log_odds_chosen": 0.33596453070640564, "log_odds_ratio": -0.5885840654373169, "logits/chosen": -2.471541166305542, "logits/rejected": -2.1307930946350098, "logps/chosen": -0.47856172919273376, "logps/rejected": -0.6193875074386597, "loss": 1.4974, "nll_loss": 0.9087700843811035, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -0.47856172919273376, "rewards/margins": 0.14082582294940948, "rewards/rejected": -0.6193875074386597, "step": 140 }, { "epoch": 0.15233092580433355, "grad_norm": 8.0625, "learning_rate": 1.9838357265633724e-06, "log_odds_chosen": 0.35230931639671326, "log_odds_ratio": -0.5799855589866638, "logits/chosen": -2.4745469093322754, "logits/rejected": -2.0399346351623535, "logps/chosen": -0.45584583282470703, "logps/rejected": -0.6081861257553101, "loss": 1.4708, "nll_loss": 0.8907746076583862, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.45584583282470703, "rewards/margins": 0.15234029293060303, "rewards/rejected": -0.6081861257553101, "step": 145 }, { "epoch": 0.15758371634931057, "grad_norm": 7.59375, "learning_rate": 1.9803799658748095e-06, "log_odds_chosen": 0.32377585768699646, "log_odds_ratio": -0.5951502919197083, "logits/chosen": -2.3601431846618652, "logits/rejected": -2.0099222660064697, "logps/chosen": -0.46314555406570435, "logps/rejected": -0.6009119153022766, "loss": 1.4988, "nll_loss": 0.9036917686462402, "rewards/accuracies": 0.6875, "rewards/chosen": -0.46314555406570435, "rewards/margins": 0.13776634633541107, "rewards/rejected": -0.6009119153022766, "step": 150 }, { "epoch": 0.16283650689428758, "grad_norm": 9.75, "learning_rate": 1.9765933105427177e-06, "log_odds_chosen": 0.29054537415504456, "log_odds_ratio": -0.6080166101455688, "logits/chosen": -2.429213762283325, "logits/rejected": -2.1127424240112305, "logps/chosen": -0.48361191153526306, "logps/rejected": -0.6187745928764343, "loss": 1.5015, "nll_loss": 0.8934603929519653, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.48361191153526306, "rewards/margins": 0.13516271114349365, "rewards/rejected": -0.6187745928764343, "step": 155 }, { "epoch": 0.1680892974392646, "grad_norm": 14.0625, "learning_rate": 1.972477038626636e-06, "log_odds_chosen": 0.27817827463150024, "log_odds_ratio": -0.6112152338027954, "logits/chosen": -2.4246554374694824, "logits/rejected": -2.0224289894104004, "logps/chosen": -0.49589210748672485, "logps/rejected": -0.6248718500137329, "loss": 1.4978, "nll_loss": 0.886622428894043, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.49589210748672485, "rewards/margins": 0.12897971272468567, "rewards/rejected": -0.6248718500137329, "step": 160 }, { "epoch": 0.17334208798424164, "grad_norm": 10.875, "learning_rate": 1.9680325394372147e-06, "log_odds_chosen": 0.35008612275123596, "log_odds_ratio": -0.5786347389221191, "logits/chosen": -2.506772756576538, "logits/rejected": -2.057096004486084, "logps/chosen": -0.46079978346824646, "logps/rejected": -0.6112517714500427, "loss": 1.4896, "nll_loss": 0.9109176397323608, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.46079978346824646, "rewards/margins": 0.15045206248760223, "rewards/rejected": -0.6112517714500427, "step": 165 }, { "epoch": 0.17859487852921865, "grad_norm": 15.1875, "learning_rate": 1.9632613130673015e-06, "log_odds_chosen": 0.33634597063064575, "log_odds_ratio": -0.589142918586731, "logits/chosen": -2.467883348464966, "logits/rejected": -1.9834989309310913, "logps/chosen": -0.4864015579223633, "logps/rejected": -0.6304683089256287, "loss": 1.4988, "nll_loss": 0.9096533060073853, "rewards/accuracies": 0.690625011920929, "rewards/chosen": -0.4864015579223633, "rewards/margins": 0.144066721200943, "rewards/rejected": -0.6304683089256287, "step": 170 }, { "epoch": 0.18384766907419567, "grad_norm": 26.75, "learning_rate": 1.9581649698856357e-06, "log_odds_chosen": 0.351374089717865, "log_odds_ratio": -0.5786073207855225, "logits/chosen": -2.3902525901794434, "logits/rejected": -2.0138325691223145, "logps/chosen": -0.45923271775245667, "logps/rejected": -0.6129686236381531, "loss": 1.477, "nll_loss": 0.8983781933784485, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -0.45923271775245667, "rewards/margins": 0.1537359207868576, "rewards/rejected": -0.6129686236381531, "step": 175 }, { "epoch": 0.18910045961917268, "grad_norm": 8.5625, "learning_rate": 1.952745229993319e-06, "log_odds_chosen": 0.3817608952522278, "log_odds_ratio": -0.5729137659072876, "logits/chosen": -2.52931547164917, "logits/rejected": -2.1916394233703613, "logps/chosen": -0.48729705810546875, "logps/rejected": -0.6591955423355103, "loss": 1.4891, "nll_loss": 0.9161707758903503, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.48729705810546875, "rewards/margins": 0.1718985140323639, "rewards/rejected": -0.6591955423355103, "step": 180 }, { "epoch": 0.1943532501641497, "grad_norm": 8.5, "learning_rate": 1.947003922643256e-06, "log_odds_chosen": 0.379459023475647, "log_odds_ratio": -0.5737109184265137, "logits/chosen": -2.282898426055908, "logits/rejected": -1.9805419445037842, "logps/chosen": -0.47503146529197693, "logps/rejected": -0.6488234400749207, "loss": 1.4454, "nll_loss": 0.8717378377914429, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -0.47503146529197693, "rewards/margins": 0.1737920045852661, "rewards/rejected": -0.6488234400749207, "step": 185 }, { "epoch": 0.19960604070912671, "grad_norm": 10.125, "learning_rate": 1.9409429856227482e-06, "log_odds_chosen": 0.4121369421482086, "log_odds_ratio": -0.5561366081237793, "logits/chosen": -2.488356113433838, "logits/rejected": -2.0776686668395996, "logps/chosen": -0.4683772921562195, "logps/rejected": -0.647982656955719, "loss": 1.4436, "nll_loss": 0.8874515295028687, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.4683772921562195, "rewards/margins": 0.17960533499717712, "rewards/rejected": -0.647982656955719, "step": 190 }, { "epoch": 0.20485883125410373, "grad_norm": 11.5, "learning_rate": 1.934564464599461e-06, "log_odds_chosen": 0.32919231057167053, "log_odds_ratio": -0.5908551812171936, "logits/chosen": -2.501392364501953, "logits/rejected": -2.0592591762542725, "logps/chosen": -0.49434512853622437, "logps/rejected": -0.6509113311767578, "loss": 1.4187, "nll_loss": 0.8278582692146301, "rewards/accuracies": 0.6968749761581421, "rewards/chosen": -0.49434512853622437, "rewards/margins": 0.15656621754169464, "rewards/rejected": -0.6509113311767578, "step": 195 }, { "epoch": 0.21011162179908077, "grad_norm": 12.8125, "learning_rate": 1.927870512430972e-06, "log_odds_chosen": 0.42371082305908203, "log_odds_ratio": -0.5525480508804321, "logits/chosen": -2.4069533348083496, "logits/rejected": -2.019406795501709, "logps/chosen": -0.4768436551094055, "logps/rejected": -0.6629732251167297, "loss": 1.4572, "nll_loss": 0.9046151041984558, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.4768436551094055, "rewards/margins": 0.18612954020500183, "rewards/rejected": -0.6629732251167297, "step": 200 }, { "epoch": 0.2153644123440578, "grad_norm": 9.0, "learning_rate": 1.9208633884381526e-06, "log_odds_chosen": 0.42966872453689575, "log_odds_ratio": -0.5522044897079468, "logits/chosen": -2.430342197418213, "logits/rejected": -2.0743634700775146, "logps/chosen": -0.4722970426082611, "logps/rejected": -0.6621736884117126, "loss": 1.4295, "nll_loss": 0.8772872090339661, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4722970426082611, "rewards/margins": 0.18987664580345154, "rewards/rejected": -0.6621736884117126, "step": 205 }, { "epoch": 0.2206172028890348, "grad_norm": 9.1875, "learning_rate": 1.9135454576426007e-06, "log_odds_chosen": 0.40302562713623047, "log_odds_ratio": -0.5604028105735779, "logits/chosen": -2.412562847137451, "logits/rejected": -2.0246427059173584, "logps/chosen": -0.4761424660682678, "logps/rejected": -0.661251425743103, "loss": 1.3993, "nll_loss": 0.8388580083847046, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.4761424660682678, "rewards/margins": 0.18510892987251282, "rewards/rejected": -0.661251425743103, "step": 210 }, { "epoch": 0.22586999343401182, "grad_norm": 7.875, "learning_rate": 1.905919189968415e-06, "log_odds_chosen": 0.4606761932373047, "log_odds_ratio": -0.5445691347122192, "logits/chosen": -2.4419312477111816, "logits/rejected": -2.030771493911743, "logps/chosen": -0.4771277904510498, "logps/rejected": -0.6932464838027954, "loss": 1.4377, "nll_loss": 0.8931263089179993, "rewards/accuracies": 0.734375, "rewards/chosen": -0.4771277904510498, "rewards/margins": 0.21611860394477844, "rewards/rejected": -0.6932464838027954, "step": 215 }, { "epoch": 0.23112278397898883, "grad_norm": 7.78125, "learning_rate": 1.897987159408548e-06, "log_odds_chosen": 0.4278109073638916, "log_odds_ratio": -0.5563892722129822, "logits/chosen": -2.4070868492126465, "logits/rejected": -2.033133029937744, "logps/chosen": -0.4777792990207672, "logps/rejected": -0.6746242642402649, "loss": 1.3836, "nll_loss": 0.827177882194519, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.4777792990207672, "rewards/margins": 0.19684496521949768, "rewards/rejected": -0.6746242642402649, "step": 220 }, { "epoch": 0.23637557452396585, "grad_norm": 9.4375, "learning_rate": 1.8897520431560433e-06, "log_odds_chosen": 0.39412638545036316, "log_odds_ratio": -0.5616167187690735, "logits/chosen": -2.437281608581543, "logits/rejected": -2.0233240127563477, "logps/chosen": -0.49209141731262207, "logps/rejected": -0.670540988445282, "loss": 1.3984, "nll_loss": 0.8367835879325867, "rewards/accuracies": 0.71875, "rewards/chosen": -0.49209141731262207, "rewards/margins": 0.17844951152801514, "rewards/rejected": -0.670540988445282, "step": 225 }, { "epoch": 0.24162836506894286, "grad_norm": 9.4375, "learning_rate": 1.8812166207004366e-06, "log_odds_chosen": 0.45934948325157166, "log_odds_ratio": -0.5536540746688843, "logits/chosen": -2.4575705528259277, "logits/rejected": -2.0787205696105957, "logps/chosen": -0.4777277112007141, "logps/rejected": -0.6928449869155884, "loss": 1.3871, "nll_loss": 0.8334070444107056, "rewards/accuracies": 0.7593749761581421, "rewards/chosen": -0.4777277112007141, "rewards/margins": 0.21511724591255188, "rewards/rejected": -0.6928449869155884, "step": 230 }, { "epoch": 0.2468811556139199, "grad_norm": 7.71875, "learning_rate": 1.8723837728896337e-06, "log_odds_chosen": 0.45329445600509644, "log_odds_ratio": -0.5616171360015869, "logits/chosen": -2.522167682647705, "logits/rejected": -2.1475300788879395, "logps/chosen": -0.4945332407951355, "logps/rejected": -0.7140644788742065, "loss": 1.4402, "nll_loss": 0.878614068031311, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.4945332407951355, "rewards/margins": 0.21953122317790985, "rewards/rejected": -0.7140644788742065, "step": 235 }, { "epoch": 0.2521339461588969, "grad_norm": 7.75, "learning_rate": 1.8632564809575738e-06, "log_odds_chosen": 0.4688095152378082, "log_odds_ratio": -0.5438790917396545, "logits/chosen": -2.512554168701172, "logits/rejected": -2.105734348297119, "logps/chosen": -0.48634210228919983, "logps/rejected": -0.7048304677009583, "loss": 1.4387, "nll_loss": 0.8948429226875305, "rewards/accuracies": 0.753125011920929, "rewards/chosen": -0.48634210228919983, "rewards/margins": 0.21848826110363007, "rewards/rejected": -0.7048304677009583, "step": 240 }, { "epoch": 0.2573867367038739, "grad_norm": 9.9375, "learning_rate": 1.8538378255180138e-06, "log_odds_chosen": 0.488097608089447, "log_odds_ratio": -0.5403500199317932, "logits/chosen": -2.3577160835266113, "logits/rejected": -2.0601189136505127, "logps/chosen": -0.5090717077255249, "logps/rejected": -0.7453780174255371, "loss": 1.4193, "nll_loss": 0.878923773765564, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.5090717077255249, "rewards/margins": 0.23630623519420624, "rewards/rejected": -0.7453780174255371, "step": 245 }, { "epoch": 0.262639527248851, "grad_norm": 8.4375, "learning_rate": 1.8441309855247707e-06, "log_odds_chosen": 0.6032781004905701, "log_odds_ratio": -0.5000559091567993, "logits/chosen": -2.403979539871216, "logits/rejected": -2.1050338745117188, "logps/chosen": -0.5098007917404175, "logps/rejected": -0.8097056150436401, "loss": 1.4018, "nll_loss": 0.9017453193664551, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5098007917404175, "rewards/margins": 0.29990485310554504, "rewards/rejected": -0.8097056150436401, "step": 250 }, { "epoch": 0.267892317793828, "grad_norm": 7.90625, "learning_rate": 1.83413923719877e-06, "log_odds_chosen": 0.5410558581352234, "log_odds_ratio": -0.5238425135612488, "logits/chosen": -2.42203688621521, "logits/rejected": -2.095054864883423, "logps/chosen": -0.49079209566116333, "logps/rejected": -0.763100266456604, "loss": 1.3797, "nll_loss": 0.8558791875839233, "rewards/accuracies": 0.778124988079071, "rewards/chosen": -0.49079209566116333, "rewards/margins": 0.2723081707954407, "rewards/rejected": -0.763100266456604, "step": 255 }, { "epoch": 0.273145108338805, "grad_norm": 10.1875, "learning_rate": 1.8238659529222668e-06, "log_odds_chosen": 0.5387502908706665, "log_odds_ratio": -0.5273549556732178, "logits/chosen": -2.458590269088745, "logits/rejected": -2.1467177867889404, "logps/chosen": -0.5123028755187988, "logps/rejected": -0.781539797782898, "loss": 1.4312, "nll_loss": 0.9038845300674438, "rewards/accuracies": 0.765625, "rewards/chosen": -0.5123028755187988, "rewards/margins": 0.2692369818687439, "rewards/rejected": -0.781539797782898, "step": 260 }, { "epoch": 0.278397898883782, "grad_norm": 12.3125, "learning_rate": 1.8133146001006117e-06, "log_odds_chosen": 0.585041880607605, "log_odds_ratio": -0.5241442322731018, "logits/chosen": -2.434957504272461, "logits/rejected": -2.08172345161438, "logps/chosen": -0.5419186353683472, "logps/rejected": -0.8563257455825806, "loss": 1.4995, "nll_loss": 0.9753583669662476, "rewards/accuracies": 0.7593749761581421, "rewards/chosen": -0.5419186353683472, "rewards/margins": 0.314407080411911, "rewards/rejected": -0.8563257455825806, "step": 265 }, { "epoch": 0.28365068942875904, "grad_norm": 12.8125, "learning_rate": 1.8024887399919408e-06, "log_odds_chosen": 0.686429500579834, "log_odds_ratio": -0.49835652112960815, "logits/chosen": -2.493675947189331, "logits/rejected": -2.192899465560913, "logps/chosen": -0.533765435218811, "logps/rejected": -0.9061405062675476, "loss": 1.4053, "nll_loss": 0.9069935083389282, "rewards/accuracies": 0.765625, "rewards/chosen": -0.533765435218811, "rewards/margins": 0.3723750710487366, "rewards/rejected": -0.9061405062675476, "step": 270 }, { "epoch": 0.28890347997373605, "grad_norm": 11.5, "learning_rate": 1.7913920265051946e-06, "log_odds_chosen": 0.7045778036117554, "log_odds_ratio": -0.49370041489601135, "logits/chosen": -2.4899590015411377, "logits/rejected": -2.1618402004241943, "logps/chosen": -0.5214771032333374, "logps/rejected": -0.9220815896987915, "loss": 1.4176, "nll_loss": 0.92388916015625, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.5214771032333374, "rewards/margins": 0.4006044268608093, "rewards/rejected": -0.9220815896987915, "step": 275 }, { "epoch": 0.29415627051871307, "grad_norm": 9.5625, "learning_rate": 1.780028204966859e-06, "log_odds_chosen": 0.6810405254364014, "log_odds_ratio": -0.4989449381828308, "logits/chosen": -2.3327696323394775, "logits/rejected": -2.0119078159332275, "logps/chosen": -0.5228633880615234, "logps/rejected": -0.8827990293502808, "loss": 1.405, "nll_loss": 0.9060786962509155, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5228633880615234, "rewards/margins": 0.3599356710910797, "rewards/rejected": -0.8827990293502808, "step": 280 }, { "epoch": 0.2994090610636901, "grad_norm": 10.8125, "learning_rate": 1.768401110856859e-06, "log_odds_chosen": 0.7910138964653015, "log_odds_ratio": -0.47219276428222656, "logits/chosen": -2.465003252029419, "logits/rejected": -2.085939407348633, "logps/chosen": -0.5146728754043579, "logps/rejected": -0.9470351934432983, "loss": 1.3015, "nll_loss": 0.8292847871780396, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.5146728754043579, "rewards/margins": 0.43236231803894043, "rewards/rejected": -0.9470351934432983, "step": 285 }, { "epoch": 0.3046618516086671, "grad_norm": 13.6875, "learning_rate": 1.7565146685140167e-06, "log_odds_chosen": 0.771044135093689, "log_odds_ratio": -0.4853692948818207, "logits/chosen": -2.4471678733825684, "logits/rejected": -2.1012349128723145, "logps/chosen": -0.5462040901184082, "logps/rejected": -0.9886453747749329, "loss": 1.3976, "nll_loss": 0.9122269749641418, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.5462040901184082, "rewards/margins": 0.4424411654472351, "rewards/rejected": -0.9886453747749329, "step": 290 }, { "epoch": 0.3099146421536441, "grad_norm": 12.0, "learning_rate": 1.7443728898115224e-06, "log_odds_chosen": 0.6316434144973755, "log_odds_ratio": -0.5107887983322144, "logits/chosen": -2.432225465774536, "logits/rejected": -2.0828986167907715, "logps/chosen": -0.5212147235870361, "logps/rejected": -0.8626314997673035, "loss": 1.3532, "nll_loss": 0.8424150347709656, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -0.5212147235870361, "rewards/margins": 0.34141671657562256, "rewards/rejected": -0.8626314997673035, "step": 295 }, { "epoch": 0.31516743269862113, "grad_norm": 19.875, "learning_rate": 1.7319798728028616e-06, "log_odds_chosen": 0.8003711700439453, "log_odds_ratio": -0.4749313294887543, "logits/chosen": -2.4634110927581787, "logits/rejected": -2.111607313156128, "logps/chosen": -0.5615866780281067, "logps/rejected": -1.0098183155059814, "loss": 1.4088, "nll_loss": 0.9338866472244263, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.5615866780281067, "rewards/margins": 0.44823163747787476, "rewards/rejected": -1.0098183155059814, "step": 300 }, { "epoch": 0.32042022324359815, "grad_norm": 17.25, "learning_rate": 1.719339800338651e-06, "log_odds_chosen": 0.8279815912246704, "log_odds_ratio": -0.4675443172454834, "logits/chosen": -2.5601465702056885, "logits/rejected": -2.2116811275482178, "logps/chosen": -0.5433454513549805, "logps/rejected": -1.0226011276245117, "loss": 1.3768, "nll_loss": 0.9092954397201538, "rewards/accuracies": 0.8031250238418579, "rewards/chosen": -0.5433454513549805, "rewards/margins": 0.479255735874176, "rewards/rejected": -1.0226011276245117, "step": 305 }, { "epoch": 0.32567301378857516, "grad_norm": 15.3125, "learning_rate": 1.7064569386548585e-06, "log_odds_chosen": 0.859075665473938, "log_odds_ratio": -0.4543831944465637, "logits/chosen": -2.531367301940918, "logits/rejected": -2.2318122386932373, "logps/chosen": -0.5256025195121765, "logps/rejected": -1.0284937620162964, "loss": 1.3533, "nll_loss": 0.8989534378051758, "rewards/accuracies": 0.8031250238418579, "rewards/chosen": -0.5256025195121765, "rewards/margins": 0.5028911828994751, "rewards/rejected": -1.0284937620162964, "step": 310 }, { "epoch": 0.3309258043335522, "grad_norm": 16.625, "learning_rate": 1.6933356359328754e-06, "log_odds_chosen": 0.7117995619773865, "log_odds_ratio": -0.4905334413051605, "logits/chosen": -2.5292108058929443, "logits/rejected": -2.1856768131256104, "logps/chosen": -0.5284509658813477, "logps/rejected": -0.915407657623291, "loss": 1.371, "nll_loss": 0.8804505467414856, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5284509658813477, "rewards/margins": 0.3869567811489105, "rewards/rejected": -0.915407657623291, "step": 315 }, { "epoch": 0.3361785948785292, "grad_norm": 17.875, "learning_rate": 1.679980320831934e-06, "log_odds_chosen": 0.7291110754013062, "log_odds_ratio": -0.4787971079349518, "logits/chosen": -2.4682400226593018, "logits/rejected": -2.2220332622528076, "logps/chosen": -0.5479062795639038, "logps/rejected": -0.9491809606552124, "loss": 1.3781, "nll_loss": 0.8992602229118347, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.5479062795639038, "rewards/margins": 0.40127477049827576, "rewards/rejected": -0.9491809606552124, "step": 320 }, { "epoch": 0.34143138542350626, "grad_norm": 32.0, "learning_rate": 1.6663955009943602e-06, "log_odds_chosen": 0.9077841639518738, "log_odds_ratio": -0.4515516757965088, "logits/chosen": -2.4324584007263184, "logits/rejected": -2.178394317626953, "logps/chosen": -0.5766757726669312, "logps/rejected": -1.1069071292877197, "loss": 1.374, "nll_loss": 0.9224408268928528, "rewards/accuracies": 0.828125, "rewards/chosen": -0.5766757726669312, "rewards/margins": 0.530231237411499, "rewards/rejected": -1.1069071292877197, "step": 325 }, { "epoch": 0.3466841759684833, "grad_norm": 23.0, "learning_rate": 1.6525857615241685e-06, "log_odds_chosen": 0.733812689781189, "log_odds_ratio": -0.4906436800956726, "logits/chosen": -2.523135185241699, "logits/rejected": -2.1835999488830566, "logps/chosen": -0.5466452836990356, "logps/rejected": -0.9662971496582031, "loss": 1.4195, "nll_loss": 0.9288629293441772, "rewards/accuracies": 0.8031250238418579, "rewards/chosen": -0.5466452836990356, "rewards/margins": 0.4196518361568451, "rewards/rejected": -0.9662971496582031, "step": 330 }, { "epoch": 0.3519369665134603, "grad_norm": 21.875, "learning_rate": 1.6385557634395136e-06, "log_odds_chosen": 0.7822979688644409, "log_odds_ratio": -0.47422999143600464, "logits/chosen": -2.4535679817199707, "logits/rejected": -2.2028393745422363, "logps/chosen": -0.5340802669525146, "logps/rejected": -0.9806568026542664, "loss": 1.3555, "nll_loss": 0.881304144859314, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5340802669525146, "rewards/margins": 0.44657665491104126, "rewards/rejected": -0.9806568026542664, "step": 335 }, { "epoch": 0.3571897570584373, "grad_norm": 21.5, "learning_rate": 1.624310242099518e-06, "log_odds_chosen": 0.7664231061935425, "log_odds_ratio": -0.48080235719680786, "logits/chosen": -2.453505039215088, "logits/rejected": -2.18292498588562, "logps/chosen": -0.5327800512313843, "logps/rejected": -0.9523041844367981, "loss": 1.4089, "nll_loss": 0.9281209111213684, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5327800512313843, "rewards/margins": 0.4195241332054138, "rewards/rejected": -0.9523041844367981, "step": 340 }, { "epoch": 0.3624425476034143, "grad_norm": 24.5, "learning_rate": 1.609854005606009e-06, "log_odds_chosen": 0.9470375776290894, "log_odds_ratio": -0.4272763729095459, "logits/chosen": -2.5423166751861572, "logits/rejected": -2.210846424102783, "logps/chosen": -0.5365777015686035, "logps/rejected": -1.076774001121521, "loss": 1.3329, "nll_loss": 0.9056490063667297, "rewards/accuracies": 0.815625011920929, "rewards/chosen": -0.5365777015686035, "rewards/margins": 0.5401962995529175, "rewards/rejected": -1.076774001121521, "step": 345 }, { "epoch": 0.36769533814839134, "grad_norm": 15.125, "learning_rate": 1.5951919331807048e-06, "log_odds_chosen": 0.9901137351989746, "log_odds_ratio": -0.43201208114624023, "logits/chosen": -2.3910915851593018, "logits/rejected": -2.085310935974121, "logps/chosen": -0.5491678714752197, "logps/rejected": -1.141390085220337, "loss": 1.3711, "nll_loss": 0.9390678405761719, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5491678714752197, "rewards/margins": 0.5922220945358276, "rewards/rejected": -1.141390085220337, "step": 350 }, { "epoch": 0.37294812869336835, "grad_norm": 19.25, "learning_rate": 1.5803289735183949e-06, "log_odds_chosen": 0.9613128900527954, "log_odds_ratio": -0.43703293800354004, "logits/chosen": -2.404744863510132, "logits/rejected": -2.0907814502716064, "logps/chosen": -0.5635210871696472, "logps/rejected": -1.1492810249328613, "loss": 1.3534, "nll_loss": 0.9164144396781921, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.5635210871696472, "rewards/margins": 0.5857599377632141, "rewards/rejected": -1.1492810249328613, "step": 355 }, { "epoch": 0.37820091923834537, "grad_norm": 32.25, "learning_rate": 1.5652701431166717e-06, "log_odds_chosen": 0.9359542727470398, "log_odds_ratio": -0.4396037459373474, "logits/chosen": -2.4650635719299316, "logits/rejected": -2.122915267944336, "logps/chosen": -0.5267240405082703, "logps/rejected": -1.0681325197219849, "loss": 1.3381, "nll_loss": 0.8984518051147461, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.5267240405082703, "rewards/margins": 0.5414084792137146, "rewards/rejected": -1.0681325197219849, "step": 360 }, { "epoch": 0.3834537097833224, "grad_norm": 22.5, "learning_rate": 1.550020524582781e-06, "log_odds_chosen": 0.9607855677604675, "log_odds_ratio": -0.4296341836452484, "logits/chosen": -2.556321620941162, "logits/rejected": -2.233931064605713, "logps/chosen": -0.5581452250480652, "logps/rejected": -1.131134033203125, "loss": 1.2919, "nll_loss": 0.8622277975082397, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5581452250480652, "rewards/margins": 0.5729888677597046, "rewards/rejected": -1.131134033203125, "step": 365 }, { "epoch": 0.3887065003282994, "grad_norm": 20.375, "learning_rate": 1.5345852649181553e-06, "log_odds_chosen": 0.9939554333686829, "log_odds_ratio": -0.4331156313419342, "logits/chosen": -2.4889018535614014, "logits/rejected": -2.2245144844055176, "logps/chosen": -0.5625091791152954, "logps/rejected": -1.159073829650879, "loss": 1.3688, "nll_loss": 0.9356663823127747, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.5625091791152954, "rewards/margins": 0.5965645909309387, "rewards/rejected": -1.159073829650879, "step": 370 }, { "epoch": 0.3939592908732764, "grad_norm": 23.625, "learning_rate": 1.5189695737812151e-06, "log_odds_chosen": 1.057094931602478, "log_odds_ratio": -0.4173505902290344, "logits/chosen": -2.63775634765625, "logits/rejected": -2.2736358642578125, "logps/chosen": -0.5382205843925476, "logps/rejected": -1.1550116539001465, "loss": 1.3662, "nll_loss": 0.9488565325737, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.5382205843925476, "rewards/margins": 0.6167910099029541, "rewards/rejected": -1.1550116539001465, "step": 375 }, { "epoch": 0.39921208141825343, "grad_norm": 20.375, "learning_rate": 1.5031787217290216e-06, "log_odds_chosen": 1.2109272480010986, "log_odds_ratio": -0.40476536750793457, "logits/chosen": -2.441784143447876, "logits/rejected": -2.141080856323242, "logps/chosen": -0.5574549436569214, "logps/rejected": -1.3256219625473022, "loss": 1.3395, "nll_loss": 0.9347711801528931, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5574549436569214, "rewards/margins": 0.7681670188903809, "rewards/rejected": -1.3256219625473022, "step": 380 }, { "epoch": 0.40446487196323044, "grad_norm": 22.125, "learning_rate": 1.487218038438377e-06, "log_odds_chosen": 1.0492345094680786, "log_odds_ratio": -0.41920414566993713, "logits/chosen": -2.4877960681915283, "logits/rejected": -2.2220120429992676, "logps/chosen": -0.5476792454719543, "logps/rejected": -1.1791220903396606, "loss": 1.3255, "nll_loss": 0.9063073992729187, "rewards/accuracies": 0.8218749761581421, "rewards/chosen": -0.5476792454719543, "rewards/margins": 0.6314427256584167, "rewards/rejected": -1.1791220903396606, "step": 385 }, { "epoch": 0.40971766250820746, "grad_norm": 52.5, "learning_rate": 1.4710929109069672e-06, "log_odds_chosen": 1.1698648929595947, "log_odds_ratio": -0.4003461003303528, "logits/chosen": -2.450030565261841, "logits/rejected": -2.1449716091156006, "logps/chosen": -0.5605112314224243, "logps/rejected": -1.2878248691558838, "loss": 1.3523, "nll_loss": 0.9519191980361938, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5605112314224243, "rewards/margins": 0.7273136377334595, "rewards/rejected": -1.2878248691558838, "step": 390 }, { "epoch": 0.41497045305318453, "grad_norm": 23.75, "learning_rate": 1.4548087816351614e-06, "log_odds_chosen": 1.1297777891159058, "log_odds_ratio": -0.41146859526634216, "logits/chosen": -2.514195442199707, "logits/rejected": -2.1877148151397705, "logps/chosen": -0.5281041860580444, "logps/rejected": -1.2085294723510742, "loss": 1.2817, "nll_loss": 0.8702155947685242, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.5281041860580444, "rewards/margins": 0.6804252862930298, "rewards/rejected": -1.2085294723510742, "step": 395 }, { "epoch": 0.42022324359816154, "grad_norm": 68.0, "learning_rate": 1.4383711467890773e-06, "log_odds_chosen": 1.1593742370605469, "log_odds_ratio": -0.4072793424129486, "logits/chosen": -2.410384178161621, "logits/rejected": -2.1880173683166504, "logps/chosen": -0.5577239990234375, "logps/rejected": -1.2925007343292236, "loss": 1.2823, "nll_loss": 0.8749955892562866, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.5577239990234375, "rewards/margins": 0.7347767353057861, "rewards/rejected": -1.2925007343292236, "step": 400 }, { "epoch": 0.42547603414313856, "grad_norm": 26.75, "learning_rate": 1.4217855543455323e-06, "log_odds_chosen": 1.0840833187103271, "log_odds_ratio": -0.4106718599796295, "logits/chosen": -2.384483575820923, "logits/rejected": -2.11120343208313, "logps/chosen": -0.5574430227279663, "logps/rejected": -1.2079960107803345, "loss": 1.3143, "nll_loss": 0.9036461710929871, "rewards/accuracies": 0.8343750238418579, "rewards/chosen": -0.5574430227279663, "rewards/margins": 0.6505529880523682, "rewards/rejected": -1.2079960107803345, "step": 405 }, { "epoch": 0.4307288246881156, "grad_norm": 22.625, "learning_rate": 1.4050576022195082e-06, "log_odds_chosen": 0.8836471438407898, "log_odds_ratio": -0.4627167582511902, "logits/chosen": -2.4845831394195557, "logits/rejected": -2.3066840171813965, "logps/chosen": -0.5467715263366699, "logps/rejected": -1.0581128597259521, "loss": 1.3641, "nll_loss": 0.9014018774032593, "rewards/accuracies": 0.796875, "rewards/chosen": -0.5467715263366699, "rewards/margins": 0.5113412141799927, "rewards/rejected": -1.0581128597259521, "step": 410 }, { "epoch": 0.4359816152330926, "grad_norm": 34.0, "learning_rate": 1.3881929363747626e-06, "log_odds_chosen": 1.0594258308410645, "log_odds_ratio": -0.4148578643798828, "logits/chosen": -2.3405816555023193, "logits/rejected": -2.115149974822998, "logps/chosen": -0.5290128588676453, "logps/rejected": -1.1592894792556763, "loss": 1.3394, "nll_loss": 0.9245734214782715, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.5290128588676453, "rewards/margins": 0.6302765607833862, "rewards/rejected": -1.1592894792556763, "step": 415 }, { "epoch": 0.4412344057780696, "grad_norm": 47.0, "learning_rate": 1.3711972489182206e-06, "log_odds_chosen": 1.4167802333831787, "log_odds_ratio": -0.3603227734565735, "logits/chosen": -2.4658875465393066, "logits/rejected": -2.18940806388855, "logps/chosen": -0.5862340331077576, "logps/rejected": -1.5004864931106567, "loss": 1.305, "nll_loss": 0.9447038769721985, "rewards/accuracies": 0.875, "rewards/chosen": -0.5862340331077576, "rewards/margins": 0.9142524003982544, "rewards/rejected": -1.5004864931106567, "step": 420 }, { "epoch": 0.4464871963230466, "grad_norm": 19.75, "learning_rate": 1.3540762761787936e-06, "log_odds_chosen": 1.2667293548583984, "log_odds_ratio": -0.3922019898891449, "logits/chosen": -2.449897289276123, "logits/rejected": -2.1496291160583496, "logps/chosen": -0.5754435658454895, "logps/rejected": -1.3866373300552368, "loss": 1.2676, "nll_loss": 0.8754428625106812, "rewards/accuracies": 0.859375, "rewards/chosen": -0.5754435658454895, "rewards/margins": 0.8111938238143921, "rewards/rejected": -1.3866373300552368, "step": 425 }, { "epoch": 0.45173998686802364, "grad_norm": 39.0, "learning_rate": 1.3368357967712725e-06, "log_odds_chosen": 1.156019687652588, "log_odds_ratio": -0.395340234041214, "logits/chosen": -2.553677797317505, "logits/rejected": -2.2673325538635254, "logps/chosen": -0.5371165871620178, "logps/rejected": -1.2541286945343018, "loss": 1.3117, "nll_loss": 0.9164005517959595, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.5371165871620178, "rewards/margins": 0.7170120477676392, "rewards/rejected": -1.2541286945343018, "step": 430 }, { "epoch": 0.45699277741300065, "grad_norm": 44.25, "learning_rate": 1.3194816296459482e-06, "log_odds_chosen": 1.1215965747833252, "log_odds_ratio": -0.40178972482681274, "logits/chosen": -2.4841268062591553, "logits/rejected": -2.2464358806610107, "logps/chosen": -0.6227961182594299, "logps/rejected": -1.3194401264190674, "loss": 1.3687, "nll_loss": 0.9668703079223633, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.6227961182594299, "rewards/margins": 0.6966440081596375, "rewards/rejected": -1.3194401264190674, "step": 435 }, { "epoch": 0.46224556795797767, "grad_norm": 30.625, "learning_rate": 1.302019632124619e-06, "log_odds_chosen": 1.4459072351455688, "log_odds_ratio": -0.3312341868877411, "logits/chosen": -2.497469902038574, "logits/rejected": -2.215177297592163, "logps/chosen": -0.5155361294746399, "logps/rejected": -1.4294028282165527, "loss": 1.227, "nll_loss": 0.8957819938659668, "rewards/accuracies": 0.8968750238418579, "rewards/chosen": -0.5155361294746399, "rewards/margins": 0.9138666391372681, "rewards/rejected": -1.4294028282165527, "step": 440 }, { "epoch": 0.4674983585029547, "grad_norm": 27.5, "learning_rate": 1.284455697923646e-06, "log_odds_chosen": 1.5342215299606323, "log_odds_ratio": -0.3261391222476959, "logits/chosen": -2.5461294651031494, "logits/rejected": -2.2099266052246094, "logps/chosen": -0.5843450427055359, "logps/rejected": -1.5760066509246826, "loss": 1.312, "nll_loss": 0.9858700037002563, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.5843450427055359, "rewards/margins": 0.991661548614502, "rewards/rejected": -1.5760066509246826, "step": 445 }, { "epoch": 0.4727511490479317, "grad_norm": 43.0, "learning_rate": 1.2667957551647261e-06, "log_odds_chosen": 1.2222964763641357, "log_odds_ratio": -0.3712048828601837, "logits/chosen": -2.5557785034179688, "logits/rejected": -2.261915922164917, "logps/chosen": -0.5360510945320129, "logps/rejected": -1.2696157693862915, "loss": 1.233, "nll_loss": 0.8618295788764954, "rewards/accuracies": 0.8656250238418579, "rewards/chosen": -0.5360510945320129, "rewards/margins": 0.7335647344589233, "rewards/rejected": -1.2696157693862915, "step": 450 }, { "epoch": 0.4780039395929087, "grad_norm": 50.5, "learning_rate": 1.24904576437405e-06, "log_odds_chosen": 1.1964861154556274, "log_odds_ratio": -0.380424439907074, "logits/chosen": -2.387500762939453, "logits/rejected": -2.2171878814697266, "logps/chosen": -0.5144879221916199, "logps/rejected": -1.2391068935394287, "loss": 1.182, "nll_loss": 0.801527202129364, "rewards/accuracies": 0.8843749761581421, "rewards/chosen": -0.5144879221916199, "rewards/margins": 0.7246190309524536, "rewards/rejected": -1.2391068935394287, "step": 455 }, { "epoch": 0.4832567301378857, "grad_norm": 34.5, "learning_rate": 1.2312117164705265e-06, "log_odds_chosen": 1.319461703300476, "log_odds_ratio": -0.37714654207229614, "logits/chosen": -2.5138354301452637, "logits/rejected": -2.2482171058654785, "logps/chosen": -0.5467159748077393, "logps/rejected": -1.3964442014694214, "loss": 1.2877, "nll_loss": 0.9105404019355774, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.5467159748077393, "rewards/margins": 0.8497281074523926, "rewards/rejected": -1.3964442014694214, "step": 460 }, { "epoch": 0.4885095206828628, "grad_norm": 43.5, "learning_rate": 1.2132996307437468e-06, "log_odds_chosen": 1.3355519771575928, "log_odds_ratio": -0.3902519941329956, "logits/chosen": -2.482901096343994, "logits/rejected": -2.2286696434020996, "logps/chosen": -0.566125750541687, "logps/rejected": -1.4363183975219727, "loss": 1.3035, "nll_loss": 0.9132728576660156, "rewards/accuracies": 0.815625011920929, "rewards/chosen": -0.566125750541687, "rewards/margins": 0.8701925277709961, "rewards/rejected": -1.4363183975219727, "step": 465 }, { "epoch": 0.4937623112278398, "grad_norm": 83.0, "learning_rate": 1.1953155528223725e-06, "log_odds_chosen": 1.1865278482437134, "log_odds_ratio": -0.392407089471817, "logits/chosen": -2.425886869430542, "logits/rejected": -2.155287265777588, "logps/chosen": -0.5029312968254089, "logps/rejected": -1.2368618249893188, "loss": 1.2357, "nll_loss": 0.8432880640029907, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.5029312968254089, "rewards/margins": 0.7339304089546204, "rewards/rejected": -1.2368618249893188, "step": 470 }, { "epoch": 0.4990151017728168, "grad_norm": 40.5, "learning_rate": 1.1772655526336367e-06, "log_odds_chosen": 1.4356929063796997, "log_odds_ratio": -0.3839671313762665, "logits/chosen": -2.398430585861206, "logits/rejected": -2.104560136795044, "logps/chosen": -0.5578696131706238, "logps/rejected": -1.5088526010513306, "loss": 1.2412, "nll_loss": 0.8572656512260437, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5578696131706238, "rewards/margins": 0.9509830474853516, "rewards/rejected": -1.5088526010513306, "step": 475 }, { "epoch": 0.5042678923177938, "grad_norm": 28.25, "learning_rate": 1.1591557223546393e-06, "log_odds_chosen": 1.148279070854187, "log_odds_ratio": -0.3996050953865051, "logits/chosen": -2.365521192550659, "logits/rejected": -2.152665615081787, "logps/chosen": -0.566467821598053, "logps/rejected": -1.2856696844100952, "loss": 1.3237, "nll_loss": 0.9241225123405457, "rewards/accuracies": 0.8531249761581421, "rewards/chosen": -0.566467821598053, "rewards/margins": 0.719201922416687, "rewards/rejected": -1.2856696844100952, "step": 480 }, { "epoch": 0.5095206828627709, "grad_norm": 36.25, "learning_rate": 1.1409921743561381e-06, "log_odds_chosen": 1.1759016513824463, "log_odds_ratio": -0.41472458839416504, "logits/chosen": -2.404526472091675, "logits/rejected": -2.2163596153259277, "logps/chosen": -0.5324310064315796, "logps/rejected": -1.2714060544967651, "loss": 1.293, "nll_loss": 0.8782441020011902, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5324310064315796, "rewards/margins": 0.7389749884605408, "rewards/rejected": -1.2714060544967651, "step": 485 }, { "epoch": 0.5147734734077478, "grad_norm": 223.0, "learning_rate": 1.1227810391395199e-06, "log_odds_chosen": 1.385846734046936, "log_odds_ratio": -0.3814238905906677, "logits/chosen": -2.4934306144714355, "logits/rejected": -2.2085797786712646, "logps/chosen": -0.5657092928886414, "logps/rejected": -1.4650784730911255, "loss": 1.2852, "nll_loss": 0.9037421345710754, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.5657092928886414, "rewards/margins": 0.8993691205978394, "rewards/rejected": -1.4650784730911255, "step": 490 }, { "epoch": 0.5200262639527249, "grad_norm": 27.625, "learning_rate": 1.1045284632676535e-06, "log_odds_chosen": 1.637117624282837, "log_odds_ratio": -0.36074963212013245, "logits/chosen": -2.505157947540283, "logits/rejected": -2.18147611618042, "logps/chosen": -0.5794259905815125, "logps/rejected": -1.7134405374526978, "loss": 1.2555, "nll_loss": 0.8947887420654297, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5794259905815125, "rewards/margins": 1.1340144872665405, "rewards/rejected": -1.7134405374526978, "step": 495 }, { "epoch": 0.525279054497702, "grad_norm": 25.375, "learning_rate": 1.0862406072903223e-06, "log_odds_chosen": 1.4640438556671143, "log_odds_ratio": -0.36846035718917847, "logits/chosen": -2.5681748390197754, "logits/rejected": -2.232964038848877, "logps/chosen": -0.5701361298561096, "logps/rejected": -1.5233440399169922, "loss": 1.2435, "nll_loss": 0.8750120997428894, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5701361298561096, "rewards/margins": 0.9532078504562378, "rewards/rejected": -1.5233440399169922, "step": 500 }, { "epoch": 0.5305318450426789, "grad_norm": 23.75, "learning_rate": 1.067923643664936e-06, "log_odds_chosen": 1.4654853343963623, "log_odds_ratio": -0.35504215955734253, "logits/chosen": -2.502295970916748, "logits/rejected": -2.181178569793701, "logps/chosen": -0.5419307947158813, "logps/rejected": -1.5056110620498657, "loss": 1.2431, "nll_loss": 0.8880621790885925, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.5419307947158813, "rewards/margins": 0.9636803865432739, "rewards/rejected": -1.5056110620498657, "step": 505 }, { "epoch": 0.535784635587656, "grad_norm": 35.0, "learning_rate": 1.0495837546732222e-06, "log_odds_chosen": 1.5194576978683472, "log_odds_ratio": -0.37253108620643616, "logits/chosen": -2.413229465484619, "logits/rejected": -2.184525728225708, "logps/chosen": -0.5820909738540649, "logps/rejected": -1.6039245128631592, "loss": 1.3383, "nll_loss": 0.9657222032546997, "rewards/accuracies": 0.8531249761581421, "rewards/chosen": -0.5820909738540649, "rewards/margins": 1.0218335390090942, "rewards/rejected": -1.6039245128631592, "step": 510 }, { "epoch": 0.541037426132633, "grad_norm": 32.25, "learning_rate": 1.0312271303346038e-06, "log_odds_chosen": 1.314542531967163, "log_odds_ratio": -0.396615594625473, "logits/chosen": -2.545009136199951, "logits/rejected": -2.301347017288208, "logps/chosen": -0.562983512878418, "logps/rejected": -1.4147989749908447, "loss": 1.3396, "nll_loss": 0.9429594278335571, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.562983512878418, "rewards/margins": 0.851815402507782, "rewards/rejected": -1.4147989749908447, "step": 515 }, { "epoch": 0.54629021667761, "grad_norm": 47.75, "learning_rate": 1.0128599663169628e-06, "log_odds_chosen": 1.084162950515747, "log_odds_ratio": -0.4125159680843353, "logits/chosen": -2.4878952503204346, "logits/rejected": -2.245314359664917, "logps/chosen": -0.5130459666252136, "logps/rejected": -1.1407145261764526, "loss": 1.3142, "nll_loss": 0.901726245880127, "rewards/accuracies": 0.8343750238418579, "rewards/chosen": -0.5130459666252136, "rewards/margins": 0.6276686191558838, "rewards/rejected": -1.1407145261764526, "step": 520 }, { "epoch": 0.551543007222587, "grad_norm": 74.0, "learning_rate": 9.944884618454995e-07, "log_odds_chosen": 1.5892114639282227, "log_odds_ratio": -0.3318895697593689, "logits/chosen": -2.5057709217071533, "logits/rejected": -2.110414505004883, "logps/chosen": -0.5387485027313232, "logps/rejected": -1.5842351913452148, "loss": 1.2507, "nll_loss": 0.9187744855880737, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.5387485027313232, "rewards/margins": 1.0454866886138916, "rewards/rejected": -1.5842351913452148, "step": 525 }, { "epoch": 0.556795797767564, "grad_norm": 73.0, "learning_rate": 9.7611881761039e-07, "log_odds_chosen": 1.6785354614257812, "log_odds_ratio": -0.3325541019439697, "logits/chosen": -2.462970733642578, "logits/rejected": -2.220999240875244, "logps/chosen": -0.6112784147262573, "logps/rejected": -1.7486165761947632, "loss": 1.3345, "nll_loss": 1.0019125938415527, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.6112784147262573, "rewards/margins": 1.1373381614685059, "rewards/rejected": -1.7486165761947632, "step": 530 }, { "epoch": 0.562048588312541, "grad_norm": 36.0, "learning_rate": 9.57757233673949e-07, "log_odds_chosen": 1.4563804864883423, "log_odds_ratio": -0.36100301146507263, "logits/chosen": -2.4625449180603027, "logits/rejected": -2.1974194049835205, "logps/chosen": -0.5516290664672852, "logps/rejected": -1.515852928161621, "loss": 1.2346, "nll_loss": 0.8735913038253784, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5516290664672852, "rewards/margins": 0.9642238616943359, "rewards/rejected": -1.515852928161621, "step": 535 }, { "epoch": 0.5673013788575181, "grad_norm": 36.0, "learning_rate": 9.394099073780066e-07, "log_odds_chosen": 1.4258034229278564, "log_odds_ratio": -0.36102384328842163, "logits/chosen": -2.5518240928649902, "logits/rejected": -2.2731943130493164, "logps/chosen": -0.5590797662734985, "logps/rejected": -1.4916408061981201, "loss": 1.2649, "nll_loss": 0.9038828015327454, "rewards/accuracies": 0.8343750238418579, "rewards/chosen": -0.5590797662734985, "rewards/margins": 0.9325610399246216, "rewards/rejected": -1.4916408061981201, "step": 540 }, { "epoch": 0.572554169402495, "grad_norm": 64.0, "learning_rate": 9.210830312521991e-07, "log_odds_chosen": 1.605653166770935, "log_odds_ratio": -0.338408887386322, "logits/chosen": -2.5818705558776855, "logits/rejected": -2.311086416244507, "logps/chosen": -0.5466338992118835, "logps/rejected": -1.6157076358795166, "loss": 1.3041, "nll_loss": 0.9657169580459595, "rewards/accuracies": 0.875, "rewards/chosen": -0.5466338992118835, "rewards/margins": 1.0690736770629883, "rewards/rejected": -1.6157076358795166, "step": 545 }, { "epoch": 0.5778069599474721, "grad_norm": 57.25, "learning_rate": 9.027827909238901e-07, "log_odds_chosen": 1.8266319036483765, "log_odds_ratio": -0.3148033320903778, "logits/chosen": -2.48435115814209, "logits/rejected": -2.166586399078369, "logps/chosen": -0.5606757402420044, "logps/rejected": -1.8262403011322021, "loss": 1.2896, "nll_loss": 0.97479248046875, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.5606757402420044, "rewards/margins": 1.2655649185180664, "rewards/rejected": -1.8262403011322021, "step": 550 }, { "epoch": 0.5830597504924491, "grad_norm": 47.5, "learning_rate": 8.845153630304139e-07, "log_odds_chosen": 1.663627028465271, "log_odds_ratio": -0.3311775028705597, "logits/chosen": -2.4467196464538574, "logits/rejected": -2.2170791625976562, "logps/chosen": -0.5954256057739258, "logps/rejected": -1.7486213445663452, "loss": 1.2756, "nll_loss": 0.9444006085395813, "rewards/accuracies": 0.878125011920929, "rewards/chosen": -0.5954256057739258, "rewards/margins": 1.1531956195831299, "rewards/rejected": -1.7486213445663452, "step": 555 }, { "epoch": 0.5883125410374261, "grad_norm": 45.5, "learning_rate": 8.662869131343606e-07, "log_odds_chosen": 1.4104127883911133, "log_odds_ratio": -0.39170485734939575, "logits/chosen": -2.5256340503692627, "logits/rejected": -2.213099241256714, "logps/chosen": -0.5794434547424316, "logps/rejected": -1.5348830223083496, "loss": 1.3375, "nll_loss": 0.9457686543464661, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5794434547424316, "rewards/margins": 0.955439567565918, "rewards/rejected": -1.5348830223083496, "step": 560 }, { "epoch": 0.5935653315824031, "grad_norm": 72.0, "learning_rate": 8.481035936425926e-07, "log_odds_chosen": 1.1931443214416504, "log_odds_ratio": -0.3968736529350281, "logits/chosen": -2.56657338142395, "logits/rejected": -2.191765785217285, "logps/chosen": -0.5020140409469604, "logps/rejected": -1.227325201034546, "loss": 1.2792, "nll_loss": 0.8822978138923645, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.5020140409469604, "rewards/margins": 0.7253111600875854, "rewards/rejected": -1.227325201034546, "step": 565 }, { "epoch": 0.5988181221273802, "grad_norm": 37.25, "learning_rate": 8.29971541729707e-07, "log_odds_chosen": 1.549736738204956, "log_odds_ratio": -0.3515177369117737, "logits/chosen": -2.526639461517334, "logits/rejected": -2.2129909992218018, "logps/chosen": -0.5579209923744202, "logps/rejected": -1.5522905588150024, "loss": 1.2671, "nll_loss": 0.9156067967414856, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.5579209923744202, "rewards/margins": 0.9943695068359375, "rewards/rejected": -1.5522905588150024, "step": 570 }, { "epoch": 0.6040709126723572, "grad_norm": 78.5, "learning_rate": 8.118968772666338e-07, "log_odds_chosen": 1.9918029308319092, "log_odds_ratio": -0.33105817437171936, "logits/chosen": -2.5553669929504395, "logits/rejected": -2.255253791809082, "logps/chosen": -0.6138916015625, "logps/rejected": -2.058006763458252, "loss": 1.261, "nll_loss": 0.9299631118774414, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.6138916015625, "rewards/margins": 1.4441156387329102, "rewards/rejected": -2.058006763458252, "step": 575 }, { "epoch": 0.6093237032173342, "grad_norm": 32.25, "learning_rate": 7.938857007550796e-07, "log_odds_chosen": 1.5095994472503662, "log_odds_ratio": -0.36659660935401917, "logits/chosen": -2.4949142932891846, "logits/rejected": -2.217616558074951, "logps/chosen": -0.5693143606185913, "logps/rejected": -1.5770564079284668, "loss": 1.2795, "nll_loss": 0.9128750562667847, "rewards/accuracies": 0.8531249761581421, "rewards/chosen": -0.5693143606185913, "rewards/margins": 1.007741928100586, "rewards/rejected": -1.5770564079284668, "step": 580 }, { "epoch": 0.6145764937623113, "grad_norm": 44.5, "learning_rate": 7.759440912685042e-07, "log_odds_chosen": 1.313231348991394, "log_odds_ratio": -0.39206627011299133, "logits/chosen": -2.4366495609283447, "logits/rejected": -2.1927928924560547, "logps/chosen": -0.5398006439208984, "logps/rejected": -1.4002869129180908, "loss": 1.2987, "nll_loss": 0.9065971374511719, "rewards/accuracies": 0.8218749761581421, "rewards/chosen": -0.5398006439208984, "rewards/margins": 0.8604865074157715, "rewards/rejected": -1.4002869129180908, "step": 585 }, { "epoch": 0.6198292843072882, "grad_norm": 41.75, "learning_rate": 7.580781044003324e-07, "log_odds_chosen": 1.5099523067474365, "log_odds_ratio": -0.37858808040618896, "logits/chosen": -2.5282700061798096, "logits/rejected": -2.1985023021698, "logps/chosen": -0.554128110408783, "logps/rejected": -1.5762214660644531, "loss": 1.2642, "nll_loss": 0.885593593120575, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.554128110408783, "rewards/margins": 1.022093415260315, "rewards/rejected": -1.5762214660644531, "step": 590 }, { "epoch": 0.6250820748522653, "grad_norm": 94.0, "learning_rate": 7.402937702200904e-07, "log_odds_chosen": 1.7455905675888062, "log_odds_ratio": -0.3350276052951813, "logits/chosen": -2.5306236743927, "logits/rejected": -2.249689817428589, "logps/chosen": -0.5238341093063354, "logps/rejected": -1.7180259227752686, "loss": 1.2212, "nll_loss": 0.8861449956893921, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.5238341093063354, "rewards/margins": 1.1941916942596436, "rewards/rejected": -1.7180259227752686, "step": 595 }, { "epoch": 0.6303348653972423, "grad_norm": 57.0, "learning_rate": 7.225970912381556e-07, "log_odds_chosen": 1.5003291368484497, "log_odds_ratio": -0.391081303358078, "logits/chosen": -2.381641387939453, "logits/rejected": -2.1322736740112305, "logps/chosen": -0.5944348573684692, "logps/rejected": -1.6424591541290283, "loss": 1.3066, "nll_loss": 0.9154736399650574, "rewards/accuracies": 0.8218749761581421, "rewards/chosen": -0.5944348573684692, "rewards/margins": 1.048024296760559, "rewards/rejected": -1.6424591541290283, "step": 600 }, { "epoch": 0.6355876559422193, "grad_norm": 41.0, "learning_rate": 7.049940403798089e-07, "log_odds_chosen": 1.531709909439087, "log_odds_ratio": -0.3830433487892151, "logits/chosen": -2.4697697162628174, "logits/rejected": -2.217533826828003, "logps/chosen": -0.5523134469985962, "logps/rejected": -1.5712653398513794, "loss": 1.314, "nll_loss": 0.9309525489807129, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.5523134469985962, "rewards/margins": 1.0189517736434937, "rewards/rejected": -1.5712653398513794, "step": 605 }, { "epoch": 0.6408404464871963, "grad_norm": 46.5, "learning_rate": 6.874905589692733e-07, "log_odds_chosen": 1.6414533853530884, "log_odds_ratio": -0.34355098009109497, "logits/chosen": -2.509610176086426, "logits/rejected": -2.1736972332000732, "logps/chosen": -0.5539788007736206, "logps/rejected": -1.6842210292816162, "loss": 1.2389, "nll_loss": 0.8953197598457336, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.5539788007736206, "rewards/margins": 1.1302422285079956, "rewards/rejected": -1.6842210292816162, "step": 610 }, { "epoch": 0.6460932370321734, "grad_norm": 32.25, "learning_rate": 6.700925547244171e-07, "log_odds_chosen": 1.9415044784545898, "log_odds_ratio": -0.31946122646331787, "logits/chosen": -2.4332690238952637, "logits/rejected": -2.26471209526062, "logps/chosen": -0.6300308704376221, "logps/rejected": -2.049290180206299, "loss": 1.2482, "nll_loss": 0.9287741780281067, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.6300308704376221, "rewards/margins": 1.4192593097686768, "rewards/rejected": -2.049290180206299, "step": 615 }, { "epoch": 0.6513460275771503, "grad_norm": 32.5, "learning_rate": 6.528058997627995e-07, "log_odds_chosen": 1.9388889074325562, "log_odds_ratio": -0.3166273534297943, "logits/chosen": -2.5412425994873047, "logits/rejected": -2.1768264770507812, "logps/chosen": -0.5474293828010559, "logps/rejected": -1.9378162622451782, "loss": 1.2866, "nll_loss": 0.9699424505233765, "rewards/accuracies": 0.878125011920929, "rewards/chosen": -0.5474293828010559, "rewards/margins": 1.3903871774673462, "rewards/rejected": -1.9378162622451782, "step": 620 }, { "epoch": 0.6565988181221274, "grad_norm": 40.0, "learning_rate": 6.35636428619734e-07, "log_odds_chosen": 1.7123737335205078, "log_odds_ratio": -0.34193840622901917, "logits/chosen": -2.5048129558563232, "logits/rejected": -2.1842281818389893, "logps/chosen": -0.5440694093704224, "logps/rejected": -1.7357890605926514, "loss": 1.2903, "nll_loss": 0.9483565092086792, "rewards/accuracies": 0.859375, "rewards/chosen": -0.5440694093704224, "rewards/margins": 1.1917197704315186, "rewards/rejected": -1.7357890605926514, "step": 625 }, { "epoch": 0.6618516086671044, "grad_norm": 46.25, "learning_rate": 6.185899362790338e-07, "log_odds_chosen": 1.6516172885894775, "log_odds_ratio": -0.3549567461013794, "logits/chosen": -2.4393770694732666, "logits/rejected": -2.138049602508545, "logps/chosen": -0.5555499196052551, "logps/rejected": -1.7016226053237915, "loss": 1.2573, "nll_loss": 0.9023006558418274, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.5555499196052551, "rewards/margins": 1.1460726261138916, "rewards/rejected": -1.7016226053237915, "step": 630 }, { "epoch": 0.6671043992120814, "grad_norm": 101.5, "learning_rate": 6.016721762171098e-07, "log_odds_chosen": 1.636366605758667, "log_odds_ratio": -0.3687242567539215, "logits/chosen": -2.469954252243042, "logits/rejected": -2.2552268505096436, "logps/chosen": -0.6394462585449219, "logps/rejected": -1.7851154804229736, "loss": 1.3697, "nll_loss": 1.000967025756836, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6394462585449219, "rewards/margins": 1.1456692218780518, "rewards/rejected": -1.7851154804229736, "step": 635 }, { "epoch": 0.6723571897570584, "grad_norm": 64.5, "learning_rate": 5.848888584610726e-07, "log_odds_chosen": 1.693683385848999, "log_odds_ratio": -0.34921011328697205, "logits/chosen": -2.486765146255493, "logits/rejected": -2.2645862102508545, "logps/chosen": -0.5731798410415649, "logps/rejected": -1.7742217779159546, "loss": 1.261, "nll_loss": 0.9118081331253052, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5731798410415649, "rewards/margins": 1.2010419368743896, "rewards/rejected": -1.7742217779159546, "step": 640 }, { "epoch": 0.6776099803020355, "grad_norm": 58.5, "learning_rate": 5.682456476615072e-07, "log_odds_chosen": 1.4461402893066406, "log_odds_ratio": -0.3787740170955658, "logits/chosen": -2.355269432067871, "logits/rejected": -2.16302490234375, "logps/chosen": -0.5690776705741882, "logps/rejected": -1.5551892518997192, "loss": 1.2771, "nll_loss": 0.8983281254768372, "rewards/accuracies": 0.846875011920929, "rewards/chosen": -0.5690776705741882, "rewards/margins": 0.9861115217208862, "rewards/rejected": -1.5551892518997192, "step": 645 }, { "epoch": 0.6828627708470125, "grad_norm": 36.75, "learning_rate": 5.517481611805539e-07, "log_odds_chosen": 1.5578912496566772, "log_odds_ratio": -0.35105592012405396, "logits/chosen": -2.3847219944000244, "logits/rejected": -2.130415439605713, "logps/chosen": -0.537613570690155, "logps/rejected": -1.5832931995391846, "loss": 1.246, "nll_loss": 0.8949264287948608, "rewards/accuracies": 0.84375, "rewards/chosen": -0.537613570690155, "rewards/margins": 1.0456795692443848, "rewards/rejected": -1.5832931995391846, "step": 650 }, { "epoch": 0.6881155613919895, "grad_norm": 28.25, "learning_rate": 5.354019671959599e-07, "log_odds_chosen": 1.4725126028060913, "log_odds_ratio": -0.38070547580718994, "logits/chosen": -2.3801114559173584, "logits/rejected": -2.134171724319458, "logps/chosen": -0.5319515466690063, "logps/rejected": -1.5217872858047485, "loss": 1.3054, "nll_loss": 0.9246999621391296, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5319515466690063, "rewards/margins": 0.9898357391357422, "rewards/rejected": -1.5217872858047485, "step": 655 }, { "epoch": 0.6933683519369666, "grad_norm": 35.5, "learning_rate": 5.192125828217202e-07, "log_odds_chosen": 1.628064513206482, "log_odds_ratio": -0.370327889919281, "logits/chosen": -2.5233168601989746, "logits/rejected": -2.1562933921813965, "logps/chosen": -0.5629066824913025, "logps/rejected": -1.6909490823745728, "loss": 1.2606, "nll_loss": 0.8903215527534485, "rewards/accuracies": 0.846875011920929, "rewards/chosen": -0.5629066824913025, "rewards/margins": 1.128042459487915, "rewards/rejected": -1.6909490823745728, "step": 660 }, { "epoch": 0.6986211424819435, "grad_norm": 51.5, "learning_rate": 5.031854722459652e-07, "log_odds_chosen": 1.8480112552642822, "log_odds_ratio": -0.3127003610134125, "logits/chosen": -2.4370510578155518, "logits/rejected": -2.0890867710113525, "logps/chosen": -0.5302228927612305, "logps/rejected": -1.8121706247329712, "loss": 1.2074, "nll_loss": 0.8947356939315796, "rewards/accuracies": 0.878125011920929, "rewards/chosen": -0.5302228927612305, "rewards/margins": 1.2819478511810303, "rewards/rejected": -1.8121706247329712, "step": 665 }, { "epoch": 0.7038739330269206, "grad_norm": 31.5, "learning_rate": 4.873260448867004e-07, "log_odds_chosen": 2.02109956741333, "log_odds_ratio": -0.31728652119636536, "logits/chosen": -2.470301628112793, "logits/rejected": -2.2189319133758545, "logps/chosen": -0.6230054497718811, "logps/rejected": -2.0598232746124268, "loss": 1.3239, "nll_loss": 1.0066121816635132, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.6230054497718811, "rewards/margins": 1.4368176460266113, "rewards/rejected": -2.0598232746124268, "step": 670 }, { "epoch": 0.7091267235718975, "grad_norm": 68.0, "learning_rate": 4.7163965356604117e-07, "log_odds_chosen": 1.897443413734436, "log_odds_ratio": -0.3486331105232239, "logits/chosen": -2.554206132888794, "logits/rejected": -2.1669750213623047, "logps/chosen": -0.64203941822052, "logps/rejected": -2.0166876316070557, "loss": 1.3553, "nll_loss": 1.0066633224487305, "rewards/accuracies": 0.84375, "rewards/chosen": -0.64203941822052, "rewards/margins": 1.3746483325958252, "rewards/rejected": -2.0166876316070557, "step": 675 }, { "epoch": 0.7143795141168746, "grad_norm": 34.5, "learning_rate": 4.561315927035445e-07, "log_odds_chosen": 1.707550048828125, "log_odds_ratio": -0.34410637617111206, "logits/chosen": -2.440441846847534, "logits/rejected": -2.1145124435424805, "logps/chosen": -0.5574239492416382, "logps/rejected": -1.7339591979980469, "loss": 1.2025, "nll_loss": 0.8583625555038452, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5574239492416382, "rewards/margins": 1.1765353679656982, "rewards/rejected": -1.7339591979980469, "step": 680 }, { "epoch": 0.7196323046618516, "grad_norm": 64.5, "learning_rate": 4.408070965292533e-07, "log_odds_chosen": 1.7007535696029663, "log_odds_ratio": -0.35346347093582153, "logits/chosen": -2.456326961517334, "logits/rejected": -2.1892619132995605, "logps/chosen": -0.5550821423530579, "logps/rejected": -1.7205698490142822, "loss": 1.2778, "nll_loss": 0.9243642091751099, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5550821423530579, "rewards/margins": 1.1654876470565796, "rewards/rejected": -1.7205698490142822, "step": 685 }, { "epoch": 0.7248850952068286, "grad_norm": 42.25, "learning_rate": 4.256713373170564e-07, "log_odds_chosen": 1.5547049045562744, "log_odds_ratio": -0.36127448081970215, "logits/chosen": -2.46553373336792, "logits/rejected": -2.2510862350463867, "logps/chosen": -0.603643536567688, "logps/rejected": -1.6664206981658936, "loss": 1.3272, "nll_loss": 0.9659638404846191, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.603643536567688, "rewards/margins": 1.0627771615982056, "rewards/rejected": -1.6664206981658936, "step": 690 }, { "epoch": 0.7301378857518056, "grad_norm": 38.5, "learning_rate": 4.1072942363896025e-07, "log_odds_chosen": 1.6411514282226562, "log_odds_ratio": -0.3377731442451477, "logits/chosen": -2.5552942752838135, "logits/rejected": -2.229196071624756, "logps/chosen": -0.5576506853103638, "logps/rejected": -1.6714286804199219, "loss": 1.2942, "nll_loss": 0.9564154744148254, "rewards/accuracies": 0.878125011920929, "rewards/chosen": -0.5576506853103638, "rewards/margins": 1.1137781143188477, "rewards/rejected": -1.6714286804199219, "step": 695 }, { "epoch": 0.7353906762967827, "grad_norm": 26.125, "learning_rate": 3.9598639864085925e-07, "log_odds_chosen": 1.2980868816375732, "log_odds_ratio": -0.3892515301704407, "logits/chosen": -2.417532444000244, "logits/rejected": -2.2620291709899902, "logps/chosen": -0.5406171083450317, "logps/rejected": -1.3745439052581787, "loss": 1.283, "nll_loss": 0.893776535987854, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.5406171083450317, "rewards/margins": 0.833926796913147, "rewards/rejected": -1.3745439052581787, "step": 700 }, { "epoch": 0.7406434668417596, "grad_norm": 57.75, "learning_rate": 3.8144723834039073e-07, "log_odds_chosen": 1.3730871677398682, "log_odds_ratio": -0.38403210043907166, "logits/chosen": -2.492102861404419, "logits/rejected": -2.1305251121520996, "logps/chosen": -0.5153442621231079, "logps/rejected": -1.4106855392456055, "loss": 1.2797, "nll_loss": 0.8956896662712097, "rewards/accuracies": 0.8343750238418579, "rewards/chosen": -0.5153442621231079, "rewards/margins": 0.8953412175178528, "rewards/rejected": -1.4106855392456055, "step": 705 }, { "epoch": 0.7458962573867367, "grad_norm": 28.25, "learning_rate": 3.6711684994744486e-07, "log_odds_chosen": 1.7186520099639893, "log_odds_ratio": -0.33004146814346313, "logits/chosen": -2.537470817565918, "logits/rejected": -2.23635196685791, "logps/chosen": -0.4957657754421234, "logps/rejected": -1.6590726375579834, "loss": 1.2277, "nll_loss": 0.8976136445999146, "rewards/accuracies": 0.859375, "rewards/chosen": -0.4957657754421234, "rewards/margins": 1.1633068323135376, "rewards/rejected": -1.6590726375579834, "step": 710 }, { "epoch": 0.7511490479317138, "grad_norm": 50.25, "learning_rate": 3.530000702078999e-07, "log_odds_chosen": 1.9104875326156616, "log_odds_ratio": -0.30225199460983276, "logits/chosen": -2.41103196144104, "logits/rejected": -2.163609743118286, "logps/chosen": -0.535643458366394, "logps/rejected": -1.8592544794082642, "loss": 1.2363, "nll_loss": 0.9340142011642456, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.535643458366394, "rewards/margins": 1.3236110210418701, "rewards/rejected": -1.8592544794082642, "step": 715 }, { "epoch": 0.7564018384766907, "grad_norm": 215.0, "learning_rate": 3.391016637711389e-07, "log_odds_chosen": 1.9387279748916626, "log_odds_ratio": -0.32732483744621277, "logits/chosen": -2.503373861312866, "logits/rejected": -2.184051990509033, "logps/chosen": -0.6013236045837402, "logps/rejected": -1.979087233543396, "loss": 1.2995, "nll_loss": 0.9722166061401367, "rewards/accuracies": 0.8656250238418579, "rewards/chosen": -0.6013236045837402, "rewards/margins": 1.3777637481689453, "rewards/rejected": -1.979087233543396, "step": 720 }, { "epoch": 0.7616546290216678, "grad_norm": 64.5, "learning_rate": 3.2542632158190133e-07, "log_odds_chosen": 1.8217693567276, "log_odds_ratio": -0.3460733890533447, "logits/chosen": -2.4695355892181396, "logits/rejected": -2.266535758972168, "logps/chosen": -0.5930324792861938, "logps/rejected": -1.8648335933685303, "loss": 1.2692, "nll_loss": 0.9231220483779907, "rewards/accuracies": 0.875, "rewards/chosen": -0.5930324792861938, "rewards/margins": 1.271801233291626, "rewards/rejected": -1.8648335933685303, "step": 725 }, { "epoch": 0.7669074195666448, "grad_norm": 60.0, "learning_rate": 3.1197865929701017e-07, "log_odds_chosen": 1.9611870050430298, "log_odds_ratio": -0.3502156138420105, "logits/chosen": -2.595439910888672, "logits/rejected": -2.2361018657684326, "logps/chosen": -0.5836862921714783, "logps/rejected": -2.0304791927337646, "loss": 1.3186, "nll_loss": 0.9683855175971985, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.5836862921714783, "rewards/margins": 1.4467928409576416, "rewards/rejected": -2.0304791927337646, "step": 730 }, { "epoch": 0.7721602101116218, "grad_norm": 63.75, "learning_rate": 2.987632157275114e-07, "log_odds_chosen": 1.6977773904800415, "log_odds_ratio": -0.3493327796459198, "logits/chosen": -2.5089340209960938, "logits/rejected": -2.2651724815368652, "logps/chosen": -0.5790574550628662, "logps/rejected": -1.748196005821228, "loss": 1.2328, "nll_loss": 0.883512020111084, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.5790574550628662, "rewards/margins": 1.1691386699676514, "rewards/rejected": -1.748196005821228, "step": 735 }, { "epoch": 0.7774130006565988, "grad_norm": 47.0, "learning_rate": 2.8578445130674833e-07, "log_odds_chosen": 1.5758211612701416, "log_odds_ratio": -0.3468172550201416, "logits/chosen": -2.4574217796325684, "logits/rejected": -2.2445011138916016, "logps/chosen": -0.5336965322494507, "logps/rejected": -1.6166375875473022, "loss": 1.2211, "nll_loss": 0.8742717504501343, "rewards/accuracies": 0.8531249761581421, "rewards/chosen": -0.5336965322494507, "rewards/margins": 1.082940936088562, "rewards/rejected": -1.6166375875473022, "step": 740 }, { "epoch": 0.7826657912015759, "grad_norm": 33.25, "learning_rate": 2.73046746584891e-07, "log_odds_chosen": 1.6906464099884033, "log_odds_ratio": -0.3406273126602173, "logits/chosen": -2.5112786293029785, "logits/rejected": -2.2304630279541016, "logps/chosen": -0.5315414667129517, "logps/rejected": -1.6976295709609985, "loss": 1.2098, "nll_loss": 0.8692021369934082, "rewards/accuracies": 0.859375, "rewards/chosen": -0.5315414667129517, "rewards/margins": 1.1660881042480469, "rewards/rejected": -1.6976295709609985, "step": 745 }, { "epoch": 0.7879185817465528, "grad_norm": 40.25, "learning_rate": 2.605544007504279e-07, "log_odds_chosen": 1.7450376749038696, "log_odds_ratio": -0.32459336519241333, "logits/chosen": -2.553576946258545, "logits/rejected": -2.259354591369629, "logps/chosen": -0.5844911336898804, "logps/rejected": -1.801825761795044, "loss": 1.2855, "nll_loss": 0.9608856439590454, "rewards/accuracies": 0.8656250238418579, "rewards/chosen": -0.5844911336898804, "rewards/margins": 1.217334508895874, "rewards/rejected": -1.801825761795044, "step": 750 }, { "epoch": 0.7931713722915299, "grad_norm": 36.5, "learning_rate": 2.4831163017911683e-07, "log_odds_chosen": 1.651958703994751, "log_odds_ratio": -0.34634822607040405, "logits/chosen": -2.405233144760132, "logits/rejected": -2.138745069503784, "logps/chosen": -0.5561404228210449, "logps/rejected": -1.6944749355316162, "loss": 1.2428, "nll_loss": 0.8964967727661133, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.5561404228210449, "rewards/margins": 1.1383345127105713, "rewards/rejected": -1.6944749355316162, "step": 755 }, { "epoch": 0.7984241628365069, "grad_norm": 46.75, "learning_rate": 2.3632256701088814e-07, "log_odds_chosen": 1.698676347732544, "log_odds_ratio": -0.3407271206378937, "logits/chosen": -2.5164520740509033, "logits/rejected": -2.169098377227783, "logps/chosen": -0.546515166759491, "logps/rejected": -1.726548433303833, "loss": 1.2007, "nll_loss": 0.8599587678909302, "rewards/accuracies": 0.84375, "rewards/chosen": -0.546515166759491, "rewards/margins": 1.1800330877304077, "rewards/rejected": -1.726548433303833, "step": 760 }, { "epoch": 0.8036769533814839, "grad_norm": 31.625, "learning_rate": 2.245912577551785e-07, "log_odds_chosen": 1.7021366357803345, "log_odds_ratio": -0.36240798234939575, "logits/chosen": -2.583963632583618, "logits/rejected": -2.3067448139190674, "logps/chosen": -0.610865592956543, "logps/rejected": -1.795292854309082, "loss": 1.3449, "nll_loss": 0.9824475049972534, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.610865592956543, "rewards/margins": 1.184427261352539, "rewards/rejected": -1.795292854309082, "step": 765 }, { "epoch": 0.8089297439264609, "grad_norm": 97.5, "learning_rate": 2.131216619251659e-07, "log_odds_chosen": 1.825273871421814, "log_odds_ratio": -0.3238641917705536, "logits/chosen": -2.533202648162842, "logits/rejected": -2.3293657302856445, "logps/chosen": -0.6178978681564331, "logps/rejected": -1.9215917587280273, "loss": 1.3183, "nll_loss": 0.9943979978561401, "rewards/accuracies": 0.8656250238418579, "rewards/chosen": -0.6178978681564331, "rewards/margins": 1.3036938905715942, "rewards/rejected": -1.9215917587280273, "step": 770 }, { "epoch": 0.814182534471438, "grad_norm": 63.0, "learning_rate": 2.0191765070136768e-07, "log_odds_chosen": 1.8990042209625244, "log_odds_ratio": -0.3358913064002991, "logits/chosen": -2.4345898628234863, "logits/rejected": -2.134831190109253, "logps/chosen": -0.5476903915405273, "logps/rejected": -1.865012526512146, "loss": 1.2756, "nll_loss": 0.9396783709526062, "rewards/accuracies": 0.878125011920929, "rewards/chosen": -0.5476903915405273, "rewards/margins": 1.317322015762329, "rewards/rejected": -1.865012526512146, "step": 775 }, { "epoch": 0.8194353250164149, "grad_norm": 102.5, "learning_rate": 1.9098300562505264e-07, "log_odds_chosen": 1.6969549655914307, "log_odds_ratio": -0.3712518811225891, "logits/chosen": -2.4698281288146973, "logits/rejected": -2.181797981262207, "logps/chosen": -0.5788697004318237, "logps/rejected": -1.7834043502807617, "loss": 1.2457, "nll_loss": 0.8744741678237915, "rewards/accuracies": 0.815625011920929, "rewards/chosen": -0.5788697004318237, "rewards/margins": 1.2045344114303589, "rewards/rejected": -1.7834043502807617, "step": 780 }, { "epoch": 0.824688115561392, "grad_norm": 28.625, "learning_rate": 1.803214173219072e-07, "log_odds_chosen": 1.9696476459503174, "log_odds_ratio": -0.30190950632095337, "logits/chosen": -2.483811616897583, "logits/rejected": -2.173767328262329, "logps/chosen": -0.535027265548706, "logps/rejected": -1.9312782287597656, "loss": 1.2045, "nll_loss": 0.902554988861084, "rewards/accuracies": 0.875, "rewards/chosen": -0.535027265548706, "rewards/margins": 1.3962510824203491, "rewards/rejected": -1.9312782287597656, "step": 785 }, { "epoch": 0.8299409061063691, "grad_norm": 31.125, "learning_rate": 1.6993648425638796e-07, "log_odds_chosen": 1.6274923086166382, "log_odds_ratio": -0.3982171416282654, "logits/chosen": -2.5815181732177734, "logits/rejected": -2.206310987472534, "logps/chosen": -0.5905428528785706, "logps/rejected": -1.7506492137908936, "loss": 1.3427, "nll_loss": 0.9444986581802368, "rewards/accuracies": 0.8343750238418579, "rewards/chosen": -0.5905428528785706, "rewards/margins": 1.1601064205169678, "rewards/rejected": -1.7506492137908936, "step": 790 }, { "epoch": 0.835193696651346, "grad_norm": 61.0, "learning_rate": 1.5983171151717921e-07, "log_odds_chosen": 1.5922825336456299, "log_odds_ratio": -0.3533628284931183, "logits/chosen": -2.4570369720458984, "logits/rejected": -2.210930824279785, "logps/chosen": -0.581910252571106, "logps/rejected": -1.6624376773834229, "loss": 1.2185, "nll_loss": 0.8651579022407532, "rewards/accuracies": 0.8656250238418579, "rewards/chosen": -0.581910252571106, "rewards/margins": 1.080527424812317, "rewards/rejected": -1.6624376773834229, "step": 795 }, { "epoch": 0.8404464871963231, "grad_norm": 56.25, "learning_rate": 1.5001050963416716e-07, "log_odds_chosen": 1.7499481439590454, "log_odds_ratio": -0.3268365263938904, "logits/chosen": -2.4593491554260254, "logits/rejected": -2.1416468620300293, "logps/chosen": -0.5591254234313965, "logps/rejected": -1.7770287990570068, "loss": 1.1877, "nll_loss": 0.8609007596969604, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.5591254234313965, "rewards/margins": 1.2179033756256104, "rewards/rejected": -1.7770287990570068, "step": 800 }, { "epoch": 0.8456992777413, "grad_norm": 59.25, "learning_rate": 1.4047619342732908e-07, "log_odds_chosen": 1.5950630903244019, "log_odds_ratio": -0.3615456819534302, "logits/chosen": -2.5065274238586426, "logits/rejected": -2.24869441986084, "logps/chosen": -0.6077946424484253, "logps/rejected": -1.7057603597640991, "loss": 1.273, "nll_loss": 0.911415696144104, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6077946424484253, "rewards/margins": 1.0979657173156738, "rewards/rejected": -1.7057603597640991, "step": 805 }, { "epoch": 0.8509520682862771, "grad_norm": 30.0, "learning_rate": 1.3123198088792577e-07, "log_odds_chosen": 1.6475883722305298, "log_odds_ratio": -0.37195760011672974, "logits/chosen": -2.4656014442443848, "logits/rejected": -2.1296868324279785, "logps/chosen": -0.5928062200546265, "logps/rejected": -1.764866828918457, "loss": 1.2998, "nll_loss": 0.9278379678726196, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5928062200546265, "rewards/margins": 1.172060489654541, "rewards/rejected": -1.764866828918457, "step": 810 }, { "epoch": 0.8562048588312541, "grad_norm": 48.25, "learning_rate": 1.2228099209237607e-07, "log_odds_chosen": 1.6707931756973267, "log_odds_ratio": -0.35219767689704895, "logits/chosen": -2.416558027267456, "logits/rejected": -2.1250758171081543, "logps/chosen": -0.577375054359436, "logps/rejected": -1.7357622385025024, "loss": 1.3212, "nll_loss": 0.969050407409668, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.577375054359436, "rewards/margins": 1.1583871841430664, "rewards/rejected": -1.7357622385025024, "step": 815 }, { "epoch": 0.8614576493762311, "grad_norm": 48.5, "learning_rate": 1.1362624814917842e-07, "log_odds_chosen": 1.469254732131958, "log_odds_ratio": -0.3808806836605072, "logits/chosen": -2.428011178970337, "logits/rejected": -2.1460485458374023, "logps/chosen": -0.549521803855896, "logps/rejected": -1.5279179811477661, "loss": 1.2772, "nll_loss": 0.8962807655334473, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.549521803855896, "rewards/margins": 0.9783961176872253, "rewards/rejected": -1.5279179811477661, "step": 820 }, { "epoch": 0.8667104399212081, "grad_norm": 34.5, "learning_rate": 1.0527067017923652e-07, "log_odds_chosen": 1.5520200729370117, "log_odds_ratio": -0.3591814637184143, "logits/chosen": -2.5619750022888184, "logits/rejected": -2.3039004802703857, "logps/chosen": -0.5574966073036194, "logps/rejected": -1.6048591136932373, "loss": 1.2779, "nll_loss": 0.918703556060791, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.5574966073036194, "rewards/margins": 1.0473625659942627, "rewards/rejected": -1.6048591136932373, "step": 825 }, { "epoch": 0.8719632304661852, "grad_norm": 39.0, "learning_rate": 9.721707832993231e-08, "log_odds_chosen": 1.7053543329238892, "log_odds_ratio": -0.33547329902648926, "logits/chosen": -2.483564615249634, "logits/rejected": -2.2165513038635254, "logps/chosen": -0.5104734301567078, "logps/rejected": -1.664214849472046, "loss": 1.1955, "nll_loss": 0.8600661158561707, "rewards/accuracies": 0.8531249761581421, "rewards/chosen": -0.5104734301567078, "rewards/margins": 1.1537415981292725, "rewards/rejected": -1.664214849472046, "step": 830 }, { "epoch": 0.8772160210111621, "grad_norm": 34.25, "learning_rate": 8.946819082327828e-08, "log_odds_chosen": 1.5886516571044922, "log_odds_ratio": -0.3529045283794403, "logits/chosen": -2.3829362392425537, "logits/rejected": -2.1005430221557617, "logps/chosen": -0.5660222172737122, "logps/rejected": -1.645013451576233, "loss": 1.2596, "nll_loss": 0.9066807627677917, "rewards/accuracies": 0.8531249761581421, "rewards/chosen": -0.5660222172737122, "rewards/margins": 1.078991174697876, "rewards/rejected": -1.645013451576233, "step": 835 }, { "epoch": 0.8824688115561392, "grad_norm": 36.75, "learning_rate": 8.202662303847297e-08, "log_odds_chosen": 1.7980045080184937, "log_odds_ratio": -0.3362274765968323, "logits/chosen": -2.490861654281616, "logits/rejected": -2.1576590538024902, "logps/chosen": -0.5558806657791138, "logps/rejected": -1.7892097234725952, "loss": 1.2791, "nll_loss": 0.9428805112838745, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.5558806657791138, "rewards/margins": 1.2333290576934814, "rewards/rejected": -1.7892097234725952, "step": 840 }, { "epoch": 0.8877216021011162, "grad_norm": 55.5, "learning_rate": 7.48948866291661e-08, "log_odds_chosen": 1.7913442850112915, "log_odds_ratio": -0.32501915097236633, "logits/chosen": -2.5119128227233887, "logits/rejected": -2.193650960922241, "logps/chosen": -0.5597657561302185, "logps/rejected": -1.8090870380401611, "loss": 1.234, "nll_loss": 0.9089807271957397, "rewards/accuracies": 0.890625, "rewards/chosen": -0.5597657561302185, "rewards/margins": 1.2493212223052979, "rewards/rejected": -1.8090870380401611, "step": 845 }, { "epoch": 0.8929743926460932, "grad_norm": 39.5, "learning_rate": 6.80753886757336e-08, "log_odds_chosen": 1.5741755962371826, "log_odds_ratio": -0.34667596220970154, "logits/chosen": -2.4587669372558594, "logits/rejected": -2.187401056289673, "logps/chosen": -0.5418094396591187, "logps/rejected": -1.594808578491211, "loss": 1.2259, "nll_loss": 0.8791839480400085, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.5418094396591187, "rewards/margins": 1.0529991388320923, "rewards/rejected": -1.594808578491211, "step": 850 }, { "epoch": 0.8982271831910703, "grad_norm": 25.625, "learning_rate": 6.157043087284797e-08, "log_odds_chosen": 1.708722710609436, "log_odds_ratio": -0.34805282950401306, "logits/chosen": -2.472571849822998, "logits/rejected": -2.1671009063720703, "logps/chosen": -0.5452659130096436, "logps/rejected": -1.7160043716430664, "loss": 1.2583, "nll_loss": 0.9102743268013, "rewards/accuracies": 0.8343750238418579, "rewards/chosen": -0.5452659130096436, "rewards/margins": 1.1707384586334229, "rewards/rejected": -1.7160043716430664, "step": 855 }, { "epoch": 0.9034799737360473, "grad_norm": 30.75, "learning_rate": 5.538220875261734e-08, "log_odds_chosen": 1.7142833471298218, "log_odds_ratio": -0.31549376249313354, "logits/chosen": -2.5251572132110596, "logits/rejected": -2.228562593460083, "logps/chosen": -0.5315389633178711, "logps/rejected": -1.699853539466858, "loss": 1.2153, "nll_loss": 0.8998427391052246, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.5315389633178711, "rewards/margins": 1.1683146953582764, "rewards/rejected": -1.699853539466858, "step": 860 }, { "epoch": 0.9087327642810243, "grad_norm": 50.75, "learning_rate": 4.9512810943557083e-08, "log_odds_chosen": 1.7466316223144531, "log_odds_ratio": -0.3088250756263733, "logits/chosen": -2.492593288421631, "logits/rejected": -2.1745035648345947, "logps/chosen": -0.5664678812026978, "logps/rejected": -1.7472212314605713, "loss": 1.2514, "nll_loss": 0.9425439834594727, "rewards/accuracies": 0.8968750238418579, "rewards/chosen": -0.5664678812026978, "rewards/margins": 1.1807533502578735, "rewards/rejected": -1.7472212314605713, "step": 865 }, { "epoch": 0.9139855548260013, "grad_norm": 42.25, "learning_rate": 4.396421846564235e-08, "log_odds_chosen": 1.420175313949585, "log_odds_ratio": -0.39961543679237366, "logits/chosen": -2.5364463329315186, "logits/rejected": -2.272904634475708, "logps/chosen": -0.5728206038475037, "logps/rejected": -1.547858715057373, "loss": 1.3665, "nll_loss": 0.9668663144111633, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.5728206038475037, "rewards/margins": 0.9750380516052246, "rewards/rejected": -1.547858715057373, "step": 870 }, { "epoch": 0.9192383453709784, "grad_norm": 88.0, "learning_rate": 3.87383040616811e-08, "log_odds_chosen": 1.8361127376556396, "log_odds_ratio": -0.3314815163612366, "logits/chosen": -2.5305237770080566, "logits/rejected": -2.205706834793091, "logps/chosen": -0.5290949940681458, "logps/rejected": -1.7841472625732422, "loss": 1.2038, "nll_loss": 0.8723037838935852, "rewards/accuracies": 0.871874988079071, "rewards/chosen": -0.5290949940681458, "rewards/margins": 1.2550525665283203, "rewards/rejected": -1.7841472625732422, "step": 875 }, { "epoch": 0.9244911359159553, "grad_norm": 59.25, "learning_rate": 3.383683156523187e-08, "log_odds_chosen": 1.5235865116119385, "log_odds_ratio": -0.3648485541343689, "logits/chosen": -2.4326975345611572, "logits/rejected": -2.0849132537841797, "logps/chosen": -0.5309010744094849, "logps/rejected": -1.563246726989746, "loss": 1.2608, "nll_loss": 0.8959411382675171, "rewards/accuracies": 0.875, "rewards/chosen": -0.5309010744094849, "rewards/margins": 1.0323456525802612, "rewards/rejected": -1.563246726989746, "step": 880 }, { "epoch": 0.9297439264609324, "grad_norm": 33.25, "learning_rate": 2.9261455305280014e-08, "log_odds_chosen": 1.715073585510254, "log_odds_ratio": -0.3189467787742615, "logits/chosen": -2.4626471996307373, "logits/rejected": -2.1226587295532227, "logps/chosen": -0.5422563552856445, "logps/rejected": -1.709451675415039, "loss": 1.2541, "nll_loss": 0.9351384043693542, "rewards/accuracies": 0.8843749761581421, "rewards/chosen": -0.5422563552856445, "rewards/margins": 1.1671955585479736, "rewards/rejected": -1.709451675415039, "step": 885 }, { "epoch": 0.9349967170059094, "grad_norm": 60.0, "learning_rate": 2.5013719547874788e-08, "log_odds_chosen": 1.6406991481781006, "log_odds_ratio": -0.37183278799057007, "logits/chosen": -2.503505229949951, "logits/rejected": -2.177072525024414, "logps/chosen": -0.5782598257064819, "logps/rejected": -1.7351022958755493, "loss": 1.2879, "nll_loss": 0.9161151051521301, "rewards/accuracies": 0.8531249761581421, "rewards/chosen": -0.5782598257064819, "rewards/margins": 1.1568424701690674, "rewards/rejected": -1.7351022958755493, "step": 890 }, { "epoch": 0.9402495075508864, "grad_norm": 30.5, "learning_rate": 2.1095057974913177e-08, "log_odds_chosen": 1.5425198078155518, "log_odds_ratio": -0.3476109504699707, "logits/chosen": -2.463806390762329, "logits/rejected": -2.2360615730285645, "logps/chosen": -0.5494548082351685, "logps/rejected": -1.5607731342315674, "loss": 1.2287, "nll_loss": 0.8811271786689758, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.5494548082351685, "rewards/margins": 1.011318325996399, "rewards/rejected": -1.5607731342315674, "step": 895 }, { "epoch": 0.9455022980958634, "grad_norm": 40.25, "learning_rate": 1.7506793200248504e-08, "log_odds_chosen": 1.79372239112854, "log_odds_ratio": -0.34891271591186523, "logits/chosen": -2.4137704372406006, "logits/rejected": -2.1525025367736816, "logps/chosen": -0.5806652307510376, "logps/rejected": -1.8389291763305664, "loss": 1.2788, "nll_loss": 0.9298731684684753, "rewards/accuracies": 0.8531249761581421, "rewards/chosen": -0.5806652307510376, "rewards/margins": 1.2582640647888184, "rewards/rejected": -1.8389291763305664, "step": 900 }, { "epoch": 0.9507550886408405, "grad_norm": 33.75, "learning_rate": 1.4250136323285866e-08, "log_odds_chosen": 1.7694854736328125, "log_odds_ratio": -0.339056134223938, "logits/chosen": -2.458627223968506, "logits/rejected": -2.133309841156006, "logps/chosen": -0.5246182680130005, "logps/rejected": -1.748004674911499, "loss": 1.2399, "nll_loss": 0.9008275866508484, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.5246182680130005, "rewards/margins": 1.2233861684799194, "rewards/rejected": -1.748004674911499, "step": 905 }, { "epoch": 0.9560078791858174, "grad_norm": 42.25, "learning_rate": 1.1326186520215885e-08, "log_odds_chosen": 1.4994810819625854, "log_odds_ratio": -0.3889666199684143, "logits/chosen": -2.42987322807312, "logits/rejected": -2.2474777698516846, "logps/chosen": -0.5686417818069458, "logps/rejected": -1.6017091274261475, "loss": 1.3525, "nll_loss": 0.9635759592056274, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5686417818069458, "rewards/margins": 1.0330675840377808, "rewards/rejected": -1.6017091274261475, "step": 910 }, { "epoch": 0.9612606697307945, "grad_norm": 77.0, "learning_rate": 8.735930673024805e-09, "log_odds_chosen": 1.6517369747161865, "log_odds_ratio": -0.34624212980270386, "logits/chosen": -2.3800384998321533, "logits/rejected": -2.0897443294525146, "logps/chosen": -0.5255088806152344, "logps/rejected": -1.6485977172851562, "loss": 1.3009, "nll_loss": 0.9546435475349426, "rewards/accuracies": 0.8656250238418579, "rewards/chosen": -0.5255088806152344, "rewards/margins": 1.1230888366699219, "rewards/rejected": -1.6485977172851562, "step": 915 }, { "epoch": 0.9665134602757715, "grad_norm": 30.875, "learning_rate": 6.480243036404598e-09, "log_odds_chosen": 1.8001991510391235, "log_odds_ratio": -0.3332251012325287, "logits/chosen": -2.499809980392456, "logits/rejected": -2.291926860809326, "logps/chosen": -0.5624955892562866, "logps/rejected": -1.824375867843628, "loss": 1.2743, "nll_loss": 0.941113293170929, "rewards/accuracies": 0.84375, "rewards/chosen": -0.5624955892562866, "rewards/margins": 1.2618802785873413, "rewards/rejected": -1.824375867843628, "step": 920 }, { "epoch": 0.9717662508207485, "grad_norm": 44.75, "learning_rate": 4.559884942677783e-09, "log_odds_chosen": 1.4665955305099487, "log_odds_ratio": -0.38308554887771606, "logits/chosen": -2.397916078567505, "logits/rejected": -2.142017126083374, "logps/chosen": -0.5283843874931335, "logps/rejected": -1.4880872964859009, "loss": 1.2288, "nll_loss": 0.8457143902778625, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5283843874931335, "rewards/margins": 0.9597029685974121, "rewards/rejected": -1.4880872964859009, "step": 925 }, { "epoch": 0.9770190413657256, "grad_norm": 47.75, "learning_rate": 2.9755045448351944e-09, "log_odds_chosen": 1.4579670429229736, "log_odds_ratio": -0.3762872815132141, "logits/chosen": -2.4584195613861084, "logits/rejected": -2.198525905609131, "logps/chosen": -0.5691961050033569, "logps/rejected": -1.5534415245056152, "loss": 1.2789, "nll_loss": 0.9026187062263489, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.5691961050033569, "rewards/margins": 0.9842453002929688, "rewards/rejected": -1.5534415245056152, "step": 930 }, { "epoch": 0.9822718319107026, "grad_norm": 49.0, "learning_rate": 1.7276365977730856e-09, "log_odds_chosen": 1.5441417694091797, "log_odds_ratio": -0.3624028265476227, "logits/chosen": -2.535742998123169, "logits/rejected": -2.1748859882354736, "logps/chosen": -0.5510035753250122, "logps/rejected": -1.6074680089950562, "loss": 1.2792, "nll_loss": 0.9167704582214355, "rewards/accuracies": 0.8531249761581421, "rewards/chosen": -0.5510035753250122, "rewards/margins": 1.056464433670044, "rewards/rejected": -1.6074680089950562, "step": 935 }, { "epoch": 0.9875246224556796, "grad_norm": 50.0, "learning_rate": 8.16702277804504e-10, "log_odds_chosen": 1.6150617599487305, "log_odds_ratio": -0.3433099687099457, "logits/chosen": -2.4907350540161133, "logits/rejected": -2.166508674621582, "logps/chosen": -0.5353943109512329, "logps/rejected": -1.6448442935943604, "loss": 1.2318, "nll_loss": 0.888446033000946, "rewards/accuracies": 0.8656250238418579, "rewards/chosen": -0.5353943109512329, "rewards/margins": 1.1094499826431274, "rewards/rejected": -1.6448442935943604, "step": 940 }, { "epoch": 0.9927774130006566, "grad_norm": 44.75, "learning_rate": 2.430090405054486e-10, "log_odds_chosen": 1.457880973815918, "log_odds_ratio": -0.36118173599243164, "logits/chosen": -2.4720263481140137, "logits/rejected": -2.178345203399658, "logps/chosen": -0.5418224334716797, "logps/rejected": -1.4925849437713623, "loss": 1.309, "nll_loss": 0.9478532671928406, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.5418224334716797, "rewards/margins": 0.9507624506950378, "rewards/rejected": -1.4925849437713623, "step": 945 }, { "epoch": 0.9980302035456337, "grad_norm": 33.0, "learning_rate": 6.750516943321294e-12, "log_odds_chosen": 1.7491207122802734, "log_odds_ratio": -0.319837361574173, "logits/chosen": -2.4439542293548584, "logits/rejected": -2.1569535732269287, "logps/chosen": -0.5160128474235535, "logps/rejected": -1.7111313343048096, "loss": 1.2106, "nll_loss": 0.8908060193061829, "rewards/accuracies": 0.871874988079071, "rewards/chosen": -0.5160128474235535, "rewards/margins": 1.1951183080673218, "rewards/rejected": -1.7111313343048096, "step": 950 }, { "epoch": 0.999080761654629, "step": 951, "total_flos": 0.0, "train_loss": 1.3879666121600178, "train_runtime": 22584.718, "train_samples_per_second": 2.697, "train_steps_per_second": 0.042 } ], "logging_steps": 5, "max_steps": 951, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }