{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9952, "eval_steps": 500, "global_step": 351, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.042666666666666665, "grad_norm": 1944.0, "learning_rate": 6.944444444444445e-06, "log_odds_chosen": 3.244816541671753, "log_odds_ratio": -7.320008754730225, "logps/chosen": -23.434091567993164, "logps/rejected": -26.67911148071289, "loss": 385.5662, "nll_loss": 9.728838920593262, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -11.717045783996582, "rewards/margins": 1.6225097179412842, "rewards/rejected": -13.339555740356445, "step": 5 }, { "epoch": 0.08533333333333333, "grad_norm": 480.0, "learning_rate": 1.388888888888889e-05, "log_odds_chosen": 3.327153444290161, "log_odds_ratio": -6.861664772033691, "logps/chosen": -23.437702178955078, "logps/rejected": -26.76658058166504, "loss": 359.3477, "nll_loss": 8.957830429077148, "rewards/accuracies": 0.484375, "rewards/chosen": -11.718851089477539, "rewards/margins": 1.664438247680664, "rewards/rejected": -13.38329029083252, "step": 10 }, { "epoch": 0.128, "grad_norm": 478.0, "learning_rate": 2.0833333333333336e-05, "log_odds_chosen": 7.689353942871094, "log_odds_ratio": -6.320155620574951, "logps/chosen": -22.647563934326172, "logps/rejected": -30.33740234375, "loss": 335.2714, "nll_loss": 8.804255485534668, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -11.323781967163086, "rewards/margins": 3.8449196815490723, "rewards/rejected": -15.168701171875, "step": 15 }, { "epoch": 0.17066666666666666, "grad_norm": 1224.0, "learning_rate": 2.777777777777778e-05, "log_odds_chosen": -1.4896892309188843, "log_odds_ratio": -8.66812515258789, "logps/chosen": -19.805835723876953, "logps/rejected": -18.316551208496094, "loss": 347.0143, "nll_loss": 7.7210588455200195, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -9.902917861938477, "rewards/margins": -0.7446417212486267, "rewards/rejected": -9.158275604248047, "step": 20 }, { "epoch": 0.21333333333333335, "grad_norm": 454.0, "learning_rate": 3.472222222222222e-05, "log_odds_chosen": -0.5688272714614868, "log_odds_ratio": -3.2956409454345703, "logps/chosen": -8.265535354614258, "logps/rejected": -7.680577278137207, "loss": 147.1787, "nll_loss": 3.784611463546753, "rewards/accuracies": 0.53125, "rewards/chosen": -4.132767677307129, "rewards/margins": -0.2924785017967224, "rewards/rejected": -3.8402886390686035, "step": 25 }, { "epoch": 0.256, "grad_norm": 248.0, "learning_rate": 4.166666666666667e-05, "log_odds_chosen": 0.4854741096496582, "log_odds_ratio": -0.7124366760253906, "logps/chosen": -1.896276831626892, "logps/rejected": -2.31614089012146, "loss": 63.2151, "nll_loss": 2.18540620803833, "rewards/accuracies": 0.659375011920929, "rewards/chosen": -0.948138415813446, "rewards/margins": 0.2099320888519287, "rewards/rejected": -1.15807044506073, "step": 30 }, { "epoch": 0.2986666666666667, "grad_norm": 244.0, "learning_rate": 4.8611111111111115e-05, "log_odds_chosen": 1.1114161014556885, "log_odds_ratio": -0.5872758030891418, "logps/chosen": -2.1694393157958984, "logps/rejected": -3.173887014389038, "loss": 56.2731, "nll_loss": 2.2425026893615723, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.0847196578979492, "rewards/margins": 0.5022238492965698, "rewards/rejected": -1.586943507194519, "step": 35 }, { "epoch": 0.3413333333333333, "grad_norm": 876.0, "learning_rate": 4.998010925565448e-05, "log_odds_chosen": 1.8376754522323608, "log_odds_ratio": -0.5564089417457581, "logps/chosen": -2.5326688289642334, "logps/rejected": -4.283413887023926, "loss": 56.2911, "nll_loss": 2.51664137840271, "rewards/accuracies": 0.71875, "rewards/chosen": -1.2663344144821167, "rewards/margins": 0.8753722906112671, "rewards/rejected": -2.141706943511963, "step": 40 }, { "epoch": 0.384, "grad_norm": 840.0, "learning_rate": 4.989935734988098e-05, "log_odds_chosen": 2.519404649734497, "log_odds_ratio": -0.607566773891449, "logps/chosen": -2.6822659969329834, "logps/rejected": -5.1160101890563965, "loss": 48.1337, "nll_loss": 2.4153072834014893, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -1.3411329984664917, "rewards/margins": 1.2168718576431274, "rewards/rejected": -2.5580050945281982, "step": 45 }, { "epoch": 0.4266666666666667, "grad_norm": 246.0, "learning_rate": 4.975670171853926e-05, "log_odds_chosen": 2.2282567024230957, "log_odds_ratio": -0.5513948202133179, "logps/chosen": -1.9226570129394531, "logps/rejected": -4.054492950439453, "loss": 41.9951, "nll_loss": 2.021462917327881, "rewards/accuracies": 0.7281249761581421, "rewards/chosen": -0.9613285064697266, "rewards/margins": 1.06591796875, "rewards/rejected": -2.0272464752197266, "step": 50 }, { "epoch": 0.4693333333333333, "grad_norm": 255.0, "learning_rate": 4.9552497026005974e-05, "log_odds_chosen": 3.256324052810669, "log_odds_ratio": -0.6231921911239624, "logps/chosen": -2.7280972003936768, "logps/rejected": -5.880007743835449, "loss": 42.3838, "nll_loss": 2.3024609088897705, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.3640486001968384, "rewards/margins": 1.5759552717208862, "rewards/rejected": -2.9400038719177246, "step": 55 }, { "epoch": 0.512, "grad_norm": 384.0, "learning_rate": 4.928725095732169e-05, "log_odds_chosen": 4.082178115844727, "log_odds_ratio": -0.5644229054450989, "logps/chosen": -2.1776318550109863, "logps/rejected": -6.1262030601501465, "loss": 30.4184, "nll_loss": 2.084578514099121, "rewards/accuracies": 0.7718750238418579, "rewards/chosen": -1.0888159275054932, "rewards/margins": 1.9742858409881592, "rewards/rejected": -3.0631015300750732, "step": 60 }, { "epoch": 0.5546666666666666, "grad_norm": 298.0, "learning_rate": 4.896162295600589e-05, "log_odds_chosen": 3.969560146331787, "log_odds_ratio": -0.49455374479293823, "logps/chosen": -2.0065150260925293, "logps/rejected": -5.84859561920166, "loss": 27.8168, "nll_loss": 2.0055079460144043, "rewards/accuracies": 0.75, "rewards/chosen": -1.0032575130462646, "rewards/margins": 1.9210401773452759, "rewards/rejected": -2.92429780960083, "step": 65 }, { "epoch": 0.5973333333333334, "grad_norm": 324.0, "learning_rate": 4.8576422584576514e-05, "log_odds_chosen": 4.770110130310059, "log_odds_ratio": -0.6286700963973999, "logps/chosen": -2.9133007526397705, "logps/rejected": -7.5522050857543945, "loss": 35.0427, "nll_loss": 2.424912452697754, "rewards/accuracies": 0.721875011920929, "rewards/chosen": -1.4566503763198853, "rewards/margins": 2.3194520473480225, "rewards/rejected": -3.7761025428771973, "step": 70 }, { "epoch": 0.64, "grad_norm": 560.0, "learning_rate": 4.813260751184992e-05, "log_odds_chosen": 2.769090175628662, "log_odds_ratio": -0.5607115030288696, "logps/chosen": -2.347092390060425, "logps/rejected": -5.006721019744873, "loss": 35.9478, "nll_loss": 2.062216281890869, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.1735461950302124, "rewards/margins": 1.3298143148422241, "rewards/rejected": -2.5033605098724365, "step": 75 }, { "epoch": 0.6826666666666666, "grad_norm": 366.0, "learning_rate": 4.763128113202537e-05, "log_odds_chosen": 3.7859439849853516, "log_odds_ratio": -0.5916112065315247, "logps/chosen": -2.050175189971924, "logps/rejected": -5.733065605163574, "loss": 29.9308, "nll_loss": 1.9341661930084229, "rewards/accuracies": 0.715624988079071, "rewards/chosen": -1.025087594985962, "rewards/margins": 1.8414453268051147, "rewards/rejected": -2.866532802581787, "step": 80 }, { "epoch": 0.7253333333333334, "grad_norm": 404.0, "learning_rate": 4.707368982147318e-05, "log_odds_chosen": 3.4313957691192627, "log_odds_ratio": -0.5835500955581665, "logps/chosen": -2.4331655502319336, "logps/rejected": -5.7494306564331055, "loss": 30.1888, "nll_loss": 2.095024585723877, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.2165827751159668, "rewards/margins": 1.658132553100586, "rewards/rejected": -2.8747153282165527, "step": 85 }, { "epoch": 0.768, "grad_norm": 502.0, "learning_rate": 4.6461219840046654e-05, "log_odds_chosen": 4.333449363708496, "log_odds_ratio": -0.5407252311706543, "logps/chosen": -2.572343349456787, "logps/rejected": -6.752326011657715, "loss": 24.4015, "nll_loss": 2.214782238006592, "rewards/accuracies": 0.778124988079071, "rewards/chosen": -1.2861716747283936, "rewards/margins": 2.0899910926818848, "rewards/rejected": -3.3761630058288574, "step": 90 }, { "epoch": 0.8106666666666666, "grad_norm": 206.0, "learning_rate": 4.579539388462173e-05, "log_odds_chosen": 2.9368910789489746, "log_odds_ratio": -0.596178412437439, "logps/chosen": -2.0651535987854004, "logps/rejected": -4.870631217956543, "loss": 30.807, "nll_loss": 2.019071102142334, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0325767993927002, "rewards/margins": 1.4027388095855713, "rewards/rejected": -2.4353156089782715, "step": 95 }, { "epoch": 0.8533333333333334, "grad_norm": 664.0, "learning_rate": 4.5077867303432546e-05, "log_odds_chosen": 4.723878860473633, "log_odds_ratio": -0.5207824110984802, "logps/chosen": -1.8639392852783203, "logps/rejected": -6.444918155670166, "loss": 25.71, "nll_loss": 1.9058935642242432, "rewards/accuracies": 0.75, "rewards/chosen": -0.9319696426391602, "rewards/margins": 2.290489435195923, "rewards/rejected": -3.222459077835083, "step": 100 }, { "epoch": 0.896, "grad_norm": 460.0, "learning_rate": 4.431042398061499e-05, "log_odds_chosen": 5.0844526290893555, "log_odds_ratio": -0.5295128226280212, "logps/chosen": -2.28594708442688, "logps/rejected": -7.24350118637085, "loss": 23.1814, "nll_loss": 1.9962854385375977, "rewards/accuracies": 0.7593749761581421, "rewards/chosen": -1.14297354221344, "rewards/margins": 2.4787771701812744, "rewards/rejected": -3.621750593185425, "step": 105 }, { "epoch": 0.9386666666666666, "grad_norm": 288.0, "learning_rate": 4.34949719011896e-05, "log_odds_chosen": 4.126296043395996, "log_odds_ratio": -0.5034898519515991, "logps/chosen": -2.1573965549468994, "logps/rejected": -6.1412553787231445, "loss": 20.2386, "nll_loss": 1.9906526803970337, "rewards/accuracies": 0.7406250238418579, "rewards/chosen": -1.0786982774734497, "rewards/margins": 1.9919294118881226, "rewards/rejected": -3.0706276893615723, "step": 110 }, { "epoch": 0.9813333333333333, "grad_norm": 219.0, "learning_rate": 4.263353840751022e-05, "log_odds_chosen": 3.84074068069458, "log_odds_ratio": -0.5733323097229004, "logps/chosen": -2.011295795440674, "logps/rejected": -5.717989444732666, "loss": 25.07, "nll_loss": 1.920058012008667, "rewards/accuracies": 0.71875, "rewards/chosen": -1.005647897720337, "rewards/margins": 1.853346586227417, "rewards/rejected": -2.858994722366333, "step": 115 }, { "epoch": 1.024, "grad_norm": 306.0, "learning_rate": 4.172826515897146e-05, "log_odds_chosen": 4.661204814910889, "log_odds_ratio": -0.4891470968723297, "logps/chosen": -2.1154136657714844, "logps/rejected": -6.599400520324707, "loss": 18.1475, "nll_loss": 1.8821449279785156, "rewards/accuracies": 0.78125, "rewards/chosen": -1.0577068328857422, "rewards/margins": 2.241992712020874, "rewards/rejected": -3.2997002601623535, "step": 120 }, { "epoch": 1.0666666666666667, "grad_norm": 276.0, "learning_rate": 4.078140280750597e-05, "log_odds_chosen": 5.181860446929932, "log_odds_ratio": -0.43918895721435547, "logps/chosen": -1.882028579711914, "logps/rejected": -6.872792720794678, "loss": 9.8689, "nll_loss": 1.7425817251205444, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.941014289855957, "rewards/margins": 2.4953818321228027, "rewards/rejected": -3.436396360397339, "step": 125 }, { "epoch": 1.1093333333333333, "grad_norm": 652.0, "learning_rate": 3.9795305402109195e-05, "log_odds_chosen": 6.771618843078613, "log_odds_ratio": -0.4078306555747986, "logps/chosen": -2.1886353492736816, "logps/rejected": -8.770003318786621, "loss": 10.1183, "nll_loss": 2.0149312019348145, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.0943176746368408, "rewards/margins": 3.290684461593628, "rewards/rejected": -4.3850016593933105, "step": 130 }, { "epoch": 1.152, "grad_norm": 664.0, "learning_rate": 3.8772424536302564e-05, "log_odds_chosen": 6.775271415710449, "log_odds_ratio": -0.4933851361274719, "logps/chosen": -2.101372241973877, "logps/rejected": -8.653040885925293, "loss": 10.4441, "nll_loss": 2.0559275150299072, "rewards/accuracies": 0.765625, "rewards/chosen": -1.0506861209869385, "rewards/margins": 3.275834321975708, "rewards/rejected": -4.3265204429626465, "step": 135 }, { "epoch": 1.1946666666666665, "grad_norm": 308.0, "learning_rate": 3.771530325308579e-05, "log_odds_chosen": 5.011104106903076, "log_odds_ratio": -0.3648762106895447, "logps/chosen": -1.885170340538025, "logps/rejected": -6.628064155578613, "loss": 7.9507, "nll_loss": 1.7945435047149658, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9425851702690125, "rewards/margins": 2.3714470863342285, "rewards/rejected": -3.3140320777893066, "step": 140 }, { "epoch": 1.2373333333333334, "grad_norm": 856.0, "learning_rate": 3.662656972253127e-05, "log_odds_chosen": 6.560722351074219, "log_odds_ratio": -0.3384131193161011, "logps/chosen": -2.5444369316101074, "logps/rejected": -8.864481925964355, "loss": 10.1386, "nll_loss": 2.3033275604248047, "rewards/accuracies": 0.84375, "rewards/chosen": -1.2722184658050537, "rewards/margins": 3.160022258758545, "rewards/rejected": -4.432240962982178, "step": 145 }, { "epoch": 1.28, "grad_norm": 484.0, "learning_rate": 3.550893070773914e-05, "log_odds_chosen": 4.28635311126709, "log_odds_ratio": -0.39339035749435425, "logps/chosen": -1.7995922565460205, "logps/rejected": -5.857707500457764, "loss": 10.8643, "nll_loss": 1.821944236755371, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.8997961282730103, "rewards/margins": 2.029057741165161, "rewards/rejected": -2.928853750228882, "step": 150 }, { "epoch": 1.3226666666666667, "grad_norm": 402.0, "learning_rate": 3.436516483539781e-05, "log_odds_chosen": 3.875488758087158, "log_odds_ratio": -0.3778989017009735, "logps/chosen": -1.592816948890686, "logps/rejected": -5.23328161239624, "loss": 10.4548, "nll_loss": 1.670241355895996, "rewards/accuracies": 0.8343750238418579, "rewards/chosen": -0.796408474445343, "rewards/margins": 1.8202323913574219, "rewards/rejected": -2.61664080619812, "step": 155 }, { "epoch": 1.3653333333333333, "grad_norm": 1032.0, "learning_rate": 3.3198115687680115e-05, "log_odds_chosen": 5.857348442077637, "log_odds_ratio": -0.4748494029045105, "logps/chosen": -2.9357197284698486, "logps/rejected": -8.583894729614258, "loss": 16.9983, "nll_loss": 2.4070534706115723, "rewards/accuracies": 0.809374988079071, "rewards/chosen": -1.4678598642349243, "rewards/margins": 2.824087142944336, "rewards/rejected": -4.291947364807129, "step": 160 }, { "epoch": 1.408, "grad_norm": 350.0, "learning_rate": 3.201068473265007e-05, "log_odds_chosen": 5.250009059906006, "log_odds_ratio": -0.4220094680786133, "logps/chosen": -1.8191912174224854, "logps/rejected": -6.8444318771362305, "loss": 6.9106, "nll_loss": 1.7983490228652954, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9095956087112427, "rewards/margins": 2.512620449066162, "rewards/rejected": -3.4222159385681152, "step": 165 }, { "epoch": 1.4506666666666668, "grad_norm": 410.0, "learning_rate": 3.0805824110756064e-05, "log_odds_chosen": 4.978752613067627, "log_odds_ratio": -0.3623356521129608, "logps/chosen": -1.7844550609588623, "logps/rejected": -6.520079135894775, "loss": 8.704, "nll_loss": 1.795832633972168, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.8922275304794312, "rewards/margins": 2.367812395095825, "rewards/rejected": -3.2600395679473877, "step": 170 }, { "epoch": 1.4933333333333334, "grad_norm": 604.0, "learning_rate": 2.958652929534456e-05, "log_odds_chosen": 5.599987506866455, "log_odds_ratio": -0.4776592254638672, "logps/chosen": -1.9634740352630615, "logps/rejected": -7.329955101013184, "loss": 5.1659, "nll_loss": 1.689762830734253, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.9817370176315308, "rewards/margins": 2.6832404136657715, "rewards/rejected": -3.664977550506592, "step": 175 }, { "epoch": 1.536, "grad_norm": 720.0, "learning_rate": 2.8355831645441388e-05, "log_odds_chosen": 6.7552642822265625, "log_odds_ratio": -0.47188645601272583, "logps/chosen": -2.2946345806121826, "logps/rejected": -8.818719863891602, "loss": 1.0241, "nll_loss": 1.8295665979385376, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -1.1473172903060913, "rewards/margins": 3.262042284011841, "rewards/rejected": -4.409359931945801, "step": 180 }, { "epoch": 1.5786666666666667, "grad_norm": 588.0, "learning_rate": 2.7116790869315582e-05, "log_odds_chosen": 6.408926963806152, "log_odds_ratio": -0.3623564839363098, "logps/chosen": -1.9395840167999268, "logps/rejected": -8.078444480895996, "loss": -3.5024, "nll_loss": 1.8322137594223022, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -0.9697920083999634, "rewards/margins": 3.069430112838745, "rewards/rejected": -4.039222240447998, "step": 185 }, { "epoch": 1.6213333333333333, "grad_norm": 680.0, "learning_rate": 2.587248741756253e-05, "log_odds_chosen": 5.76962423324585, "log_odds_ratio": -0.43182724714279175, "logps/chosen": -2.518749952316284, "logps/rejected": -8.067339897155762, "loss": 12.9187, "nll_loss": 2.242504596710205, "rewards/accuracies": 0.8343750238418579, "rewards/chosen": -1.259374976158142, "rewards/margins": 2.774294853210449, "rewards/rejected": -4.033669948577881, "step": 190 }, { "epoch": 1.6640000000000001, "grad_norm": 344.0, "learning_rate": 2.4626014824618415e-05, "log_odds_chosen": 5.181513786315918, "log_odds_ratio": -0.4605328440666199, "logps/chosen": -2.409707546234131, "logps/rejected": -7.369992256164551, "loss": 10.867, "nll_loss": 2.1529345512390137, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2048537731170654, "rewards/margins": 2.480142116546631, "rewards/rejected": -3.6849961280822754, "step": 195 }, { "epoch": 1.7066666666666666, "grad_norm": 300.0, "learning_rate": 2.3380472017746202e-05, "log_odds_chosen": 4.388461589813232, "log_odds_ratio": -0.3886472284793854, "logps/chosen": -1.969788908958435, "logps/rejected": -6.0931878089904785, "loss": 4.992, "nll_loss": 1.8285843133926392, "rewards/accuracies": 0.828125, "rewards/chosen": -0.9848944544792175, "rewards/margins": 2.061699628829956, "rewards/rejected": -3.0465939044952393, "step": 200 }, { "epoch": 1.7493333333333334, "grad_norm": 426.0, "learning_rate": 2.2138955612614207e-05, "log_odds_chosen": 4.932843208312988, "log_odds_ratio": -0.3715705871582031, "logps/chosen": -2.034646511077881, "logps/rejected": -6.7041802406311035, "loss": 2.4883, "nll_loss": 1.8913071155548096, "rewards/accuracies": 0.840624988079071, "rewards/chosen": -1.0173232555389404, "rewards/margins": 2.3347668647766113, "rewards/rejected": -3.3520901203155518, "step": 205 }, { "epoch": 1.792, "grad_norm": 568.0, "learning_rate": 2.090455221462156e-05, "log_odds_chosen": 6.162611484527588, "log_odds_ratio": -0.35488948225975037, "logps/chosen": -2.7630133628845215, "logps/rejected": -8.680257797241211, "loss": 8.013, "nll_loss": 2.3462085723876953, "rewards/accuracies": 0.8531249761581421, "rewards/chosen": -1.3815066814422607, "rewards/margins": 2.9586222171783447, "rewards/rejected": -4.3401288986206055, "step": 210 }, { "epoch": 1.8346666666666667, "grad_norm": 444.0, "learning_rate": 1.9680330745110954e-05, "log_odds_chosen": 6.676821231842041, "log_odds_ratio": -0.35103148221969604, "logps/chosen": -2.7386841773986816, "logps/rejected": -9.172264099121094, "loss": 3.6042, "nll_loss": 2.2856781482696533, "rewards/accuracies": 0.84375, "rewards/chosen": -1.3693420886993408, "rewards/margins": 3.2167904376983643, "rewards/rejected": -4.586132049560547, "step": 215 }, { "epoch": 1.8773333333333333, "grad_norm": 304.0, "learning_rate": 1.8469334811546542e-05, "log_odds_chosen": 5.581591606140137, "log_odds_ratio": -0.32436496019363403, "logps/chosen": -1.8424360752105713, "logps/rejected": -7.145682334899902, "loss": -3.5616, "nll_loss": 1.7858359813690186, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9212180376052856, "rewards/margins": 2.651623010635376, "rewards/rejected": -3.572841167449951, "step": 220 }, { "epoch": 1.92, "grad_norm": 380.0, "learning_rate": 1.7274575140626318e-05, "log_odds_chosen": 5.548121452331543, "log_odds_ratio": -0.35478144884109497, "logps/chosen": -1.7758629322052002, "logps/rejected": -7.074243068695068, "loss": -4.049, "nll_loss": 1.7282772064208984, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.8879314661026001, "rewards/margins": 2.6491901874542236, "rewards/rejected": -3.537121534347534, "step": 225 }, { "epoch": 1.9626666666666668, "grad_norm": 480.0, "learning_rate": 1.609902209314108e-05, "log_odds_chosen": 4.978342533111572, "log_odds_ratio": -0.4749108850955963, "logps/chosen": -2.2677643299102783, "logps/rejected": -7.024735927581787, "loss": 8.6972, "nll_loss": 1.9503904581069946, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.1338821649551392, "rewards/margins": 2.378485918045044, "rewards/rejected": -3.5123679637908936, "step": 230 }, { "epoch": 2.005333333333333, "grad_norm": 312.0, "learning_rate": 1.4945598279189565e-05, "log_odds_chosen": 6.838204383850098, "log_odds_ratio": -0.2882954180240631, "logps/chosen": -1.9998537302017212, "logps/rejected": -8.556573867797852, "loss": -13.2073, "nll_loss": 1.872807264328003, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.9999268651008606, "rewards/margins": 3.2783596515655518, "rewards/rejected": -4.278286933898926, "step": 235 }, { "epoch": 2.048, "grad_norm": 454.0, "learning_rate": 1.3817171292109183e-05, "log_odds_chosen": 7.710375785827637, "log_odds_ratio": -0.20927873253822327, "logps/chosen": -1.8049392700195312, "logps/rejected": -9.193501472473145, "loss": -23.6918, "nll_loss": 1.7197338342666626, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.9024696350097656, "rewards/margins": 3.6942811012268066, "rewards/rejected": -4.596750736236572, "step": 240 }, { "epoch": 2.0906666666666665, "grad_norm": 502.0, "learning_rate": 1.271654657918722e-05, "log_odds_chosen": 7.912774562835693, "log_odds_ratio": -0.21029043197631836, "logps/chosen": -1.9993797540664673, "logps/rejected": -9.580835342407227, "loss": -17.0599, "nll_loss": 1.8537139892578125, "rewards/accuracies": 0.909375011920929, "rewards/chosen": -0.9996898770332336, "rewards/margins": 3.790727138519287, "rewards/rejected": -4.790417671203613, "step": 245 }, { "epoch": 2.1333333333333333, "grad_norm": 474.0, "learning_rate": 1.1646460466876783e-05, "log_odds_chosen": 10.823402404785156, "log_odds_ratio": -0.1927870213985443, "logps/chosen": -2.2192864418029785, "logps/rejected": -12.710413932800293, "loss": -30.9129, "nll_loss": 1.8327054977416992, "rewards/accuracies": 0.8968750238418579, "rewards/chosen": -1.1096432209014893, "rewards/margins": 5.2455644607543945, "rewards/rejected": -6.3552069664001465, "step": 250 }, { "epoch": 2.176, "grad_norm": 968.0, "learning_rate": 1.0609573357858166e-05, "log_odds_chosen": 10.115427017211914, "log_odds_ratio": -0.26323577761650085, "logps/chosen": -2.626338481903076, "logps/rejected": -12.449792861938477, "loss": -20.4821, "nll_loss": 2.039127826690674, "rewards/accuracies": 0.8656250238418579, "rewards/chosen": -1.313169240951538, "rewards/margins": 4.911727428436279, "rewards/rejected": -6.224896430969238, "step": 255 }, { "epoch": 2.2186666666666666, "grad_norm": 496.0, "learning_rate": 9.608463116858542e-06, "log_odds_chosen": 10.273801803588867, "log_odds_ratio": -0.2681754231452942, "logps/chosen": -2.019514322280884, "logps/rejected": -11.975286483764648, "loss": -22.5762, "nll_loss": 1.8232285976409912, "rewards/accuracies": 0.8968750238418579, "rewards/chosen": -1.009757161140442, "rewards/margins": 4.9778852462768555, "rewards/rejected": -5.987643241882324, "step": 260 }, { "epoch": 2.2613333333333334, "grad_norm": 844.0, "learning_rate": 8.645618661674142e-06, "log_odds_chosen": 10.029523849487305, "log_odds_ratio": -0.1975027322769165, "logps/chosen": -1.7940292358398438, "logps/rejected": -11.495455741882324, "loss": -28.7693, "nll_loss": 1.6812047958374023, "rewards/accuracies": 0.9468749761581421, "rewards/chosen": -0.8970146179199219, "rewards/margins": 4.85071325302124, "rewards/rejected": -5.747727870941162, "step": 265 }, { "epoch": 2.304, "grad_norm": 832.0, "learning_rate": 7.723433775328384e-06, "log_odds_chosen": 9.740096092224121, "log_odds_ratio": -0.1778937429189682, "logps/chosen": -1.8428449630737305, "logps/rejected": -11.24230670928955, "loss": -29.1488, "nll_loss": 1.835345983505249, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.9214224815368652, "rewards/margins": 4.69973087310791, "rewards/rejected": -5.621153354644775, "step": 270 }, { "epoch": 2.3466666666666667, "grad_norm": 724.0, "learning_rate": 6.844201154750177e-06, "log_odds_chosen": 8.547750473022461, "log_odds_ratio": -0.3056946098804474, "logps/chosen": -2.104405164718628, "logps/rejected": -10.345699310302734, "loss": -11.4456, "nll_loss": 2.045605182647705, "rewards/accuracies": 0.9156249761581421, "rewards/chosen": -1.052202582359314, "rewards/margins": 4.120646953582764, "rewards/rejected": -5.172849655151367, "step": 275 }, { "epoch": 2.389333333333333, "grad_norm": 764.0, "learning_rate": 6.010106710768052e-06, "log_odds_chosen": 8.403626441955566, "log_odds_ratio": -0.31066903471946716, "logps/chosen": -1.7713654041290283, "logps/rejected": -9.849652290344238, "loss": -22.0833, "nll_loss": 1.7295347452163696, "rewards/accuracies": 0.8843749761581421, "rewards/chosen": -0.8856827020645142, "rewards/margins": 4.0391435623168945, "rewards/rejected": -4.924826145172119, "step": 280 }, { "epoch": 2.432, "grad_norm": 600.0, "learning_rate": 5.223224133591476e-06, "log_odds_chosen": 8.72407054901123, "log_odds_ratio": -0.20049254596233368, "logps/chosen": -1.742493987083435, "logps/rejected": -10.135185241699219, "loss": -25.8772, "nll_loss": 1.6694942712783813, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.8712469935417175, "rewards/margins": 4.196345329284668, "rewards/rejected": -5.067592620849609, "step": 285 }, { "epoch": 2.474666666666667, "grad_norm": 572.0, "learning_rate": 4.4855097372902135e-06, "log_odds_chosen": 10.502706527709961, "log_odds_ratio": -0.20751452445983887, "logps/chosen": -2.106323719024658, "logps/rejected": -12.293268203735352, "loss": -34.1987, "nll_loss": 1.7795928716659546, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -1.053161859512329, "rewards/margins": 5.093472957611084, "rewards/rejected": -6.146634101867676, "step": 290 }, { "epoch": 2.517333333333333, "grad_norm": 612.0, "learning_rate": 3.798797596089351e-06, "log_odds_chosen": 10.75947380065918, "log_odds_ratio": -0.24149751663208008, "logps/chosen": -1.9401658773422241, "logps/rejected": -12.338597297668457, "loss": -39.145, "nll_loss": 1.7528741359710693, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.9700829386711121, "rewards/margins": 5.199215888977051, "rewards/rejected": -6.1692986488342285, "step": 295 }, { "epoch": 2.56, "grad_norm": 756.0, "learning_rate": 3.164794984571759e-06, "log_odds_chosen": 9.287260055541992, "log_odds_ratio": -0.22732312977313995, "logps/chosen": -1.9922412633895874, "logps/rejected": -10.951237678527832, "loss": -26.7303, "nll_loss": 1.8050663471221924, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.9961206316947937, "rewards/margins": 4.479498386383057, "rewards/rejected": -5.475618839263916, "step": 300 }, { "epoch": 2.602666666666667, "grad_norm": 504.0, "learning_rate": 2.58507813312448e-06, "log_odds_chosen": 9.52094841003418, "log_odds_ratio": -0.24382726848125458, "logps/chosen": -1.6575381755828857, "logps/rejected": -10.844666481018066, "loss": -28.7465, "nll_loss": 1.7088956832885742, "rewards/accuracies": 0.903124988079071, "rewards/chosen": -0.8287690877914429, "rewards/margins": 4.593564033508301, "rewards/rejected": -5.422333240509033, "step": 305 }, { "epoch": 2.6453333333333333, "grad_norm": 776.0, "learning_rate": 2.0610883091816525e-06, "log_odds_chosen": 8.335237503051758, "log_odds_ratio": -0.29422852396965027, "logps/chosen": -2.0911993980407715, "logps/rejected": -10.096451759338379, "loss": -18.1084, "nll_loss": 1.8924099206924438, "rewards/accuracies": 0.909375011920929, "rewards/chosen": -1.0455996990203857, "rewards/margins": 4.002626419067383, "rewards/rejected": -5.0482258796691895, "step": 310 }, { "epoch": 2.6879999999999997, "grad_norm": 748.0, "learning_rate": 1.59412823400657e-06, "log_odds_chosen": 8.92529582977295, "log_odds_ratio": -0.2323012799024582, "logps/chosen": -2.046330213546753, "logps/rejected": -10.661474227905273, "loss": -24.1607, "nll_loss": 1.8576195240020752, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -1.0231651067733765, "rewards/margins": 4.307572364807129, "rewards/rejected": -5.330737113952637, "step": 315 }, { "epoch": 2.7306666666666666, "grad_norm": 888.0, "learning_rate": 1.1853588439213442e-06, "log_odds_chosen": 9.666345596313477, "log_odds_ratio": -0.16688935458660126, "logps/chosen": -1.6839125156402588, "logps/rejected": -11.016705513000488, "loss": -33.6397, "nll_loss": 1.7162431478500366, "rewards/accuracies": 0.921875, "rewards/chosen": -0.8419562578201294, "rewards/margins": 4.666396141052246, "rewards/rejected": -5.508352756500244, "step": 320 }, { "epoch": 2.7733333333333334, "grad_norm": 776.0, "learning_rate": 8.357964040363209e-07, "log_odds_chosen": 9.806904792785645, "log_odds_ratio": -0.22234347462654114, "logps/chosen": -1.9922107458114624, "logps/rejected": -11.46212100982666, "loss": -28.8822, "nll_loss": 1.8163951635360718, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.9961053729057312, "rewards/margins": 4.734955310821533, "rewards/rejected": -5.73106050491333, "step": 325 }, { "epoch": 2.816, "grad_norm": 556.0, "learning_rate": 5.463099816548579e-07, "log_odds_chosen": 10.028252601623535, "log_odds_ratio": -0.2060905247926712, "logps/chosen": -1.965572714805603, "logps/rejected": -11.667535781860352, "loss": -29.5679, "nll_loss": 1.859195351600647, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9827863574028015, "rewards/margins": 4.850982189178467, "rewards/rejected": -5.833767890930176, "step": 330 }, { "epoch": 2.8586666666666667, "grad_norm": 660.0, "learning_rate": 3.1761928563510955e-07, "log_odds_chosen": 8.567400932312012, "log_odds_ratio": -0.2573620676994324, "logps/chosen": -1.9988925457000732, "logps/rejected": -10.231236457824707, "loss": -22.4237, "nll_loss": 1.8354896306991577, "rewards/accuracies": 0.8968750238418579, "rewards/chosen": -0.9994462728500366, "rewards/margins": 4.116171836853027, "rewards/rejected": -5.1156182289123535, "step": 335 }, { "epoch": 2.9013333333333335, "grad_norm": 482.0, "learning_rate": 1.5029287708036854e-07, "log_odds_chosen": 9.851056098937988, "log_odds_ratio": -0.17909038066864014, "logps/chosen": -1.8972835540771484, "logps/rejected": -11.43578815460205, "loss": -34.3968, "nll_loss": 1.7508747577667236, "rewards/accuracies": 0.9281250238418579, "rewards/chosen": -0.9486417770385742, "rewards/margins": 4.769252777099609, "rewards/rejected": -5.717894077301025, "step": 340 }, { "epoch": 2.944, "grad_norm": 708.0, "learning_rate": 4.474675580662113e-08, "log_odds_chosen": 9.49111270904541, "log_odds_ratio": -0.23770084977149963, "logps/chosen": -1.9169508218765259, "logps/rejected": -11.103437423706055, "loss": -26.6563, "nll_loss": 1.734938383102417, "rewards/accuracies": 0.8968750238418579, "rewards/chosen": -0.9584754109382629, "rewards/margins": 4.59324312210083, "rewards/rejected": -5.551718711853027, "step": 345 }, { "epoch": 2.986666666666667, "grad_norm": 520.0, "learning_rate": 1.2433261014244136e-09, "log_odds_chosen": 9.099141120910645, "log_odds_ratio": -0.22082491219043732, "logps/chosen": -1.970796823501587, "logps/rejected": -10.720368385314941, "loss": -25.9912, "nll_loss": 1.78976309299469, "rewards/accuracies": 0.903124988079071, "rewards/chosen": -0.9853984117507935, "rewards/margins": 4.374785900115967, "rewards/rejected": -5.360184192657471, "step": 350 }, { "epoch": 2.9952, "step": 351, "total_flos": 0.0, "train_loss": 25.09100561196308, "train_runtime": 7757.8492, "train_samples_per_second": 2.9, "train_steps_per_second": 0.045 } ], "logging_steps": 5, "max_steps": 351, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }