{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.9881235154394297, "eval_steps": 500, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06334125098970704, "grad_norm": 1566.8653564453125, "learning_rate": 6.25e-07, "log_odds_chosen": -2.225217342376709, "log_odds_ratio": -13.344230651855469, "logps/chosen": -26.207233428955078, "logps/rejected": -23.98147964477539, "loss": 166.3142, "nll_loss": 10.39463996887207, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.3103615045547485, "rewards/margins": -0.11128749698400497, "rewards/rejected": -1.1990740299224854, "step": 5 }, { "epoch": 0.12668250197941408, "grad_norm": 954.8382568359375, "learning_rate": 1.40625e-06, "log_odds_chosen": -0.6191844344139099, "log_odds_ratio": -14.636589050292969, "logps/chosen": -27.205514907836914, "logps/rejected": -26.585662841796875, "loss": 169.2441, "nll_loss": 10.57776165008545, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -1.3602757453918457, "rewards/margins": -0.030992573127150536, "rewards/rejected": -1.3292831182479858, "step": 10 }, { "epoch": 0.19002375296912113, "grad_norm": 355.5755615234375, "learning_rate": 2.1875000000000002e-06, "log_odds_chosen": 2.383756160736084, "log_odds_ratio": -11.047012329101562, "logps/chosen": -24.0307559967041, "logps/rejected": -26.413951873779297, "loss": 171.9778, "nll_loss": 10.748617172241211, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -1.201537847518921, "rewards/margins": 0.11915971338748932, "rewards/rejected": -1.320697546005249, "step": 15 }, { "epoch": 0.25336500395882816, "grad_norm": 281.12158203125, "learning_rate": 2.96875e-06, "log_odds_chosen": -5.130737781524658, "log_odds_ratio": -15.810519218444824, "logps/chosen": -29.556884765625, "logps/rejected": -24.425743103027344, "loss": 175.1297, "nll_loss": 10.945618629455566, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -1.477844476699829, "rewards/margins": -0.2565571963787079, "rewards/rejected": -1.2212872505187988, "step": 20 }, { "epoch": 0.3167062549485352, "grad_norm": 199.64466857910156, "learning_rate": 3.7500000000000005e-06, "log_odds_chosen": 0.16405829787254333, "log_odds_ratio": -13.288398742675781, "logps/chosen": -26.04940414428711, "logps/rejected": -26.214153289794922, "loss": 163.7713, "nll_loss": 10.235721588134766, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -1.302470326423645, "rewards/margins": 0.008237527683377266, "rewards/rejected": -1.310707688331604, "step": 25 }, { "epoch": 0.38004750593824227, "grad_norm": 195.967529296875, "learning_rate": 4.53125e-06, "log_odds_chosen": -4.0349016189575195, "log_odds_ratio": -13.333663940429688, "logps/chosen": -25.42357635498047, "logps/rejected": -21.388246536254883, "loss": 162.8051, "nll_loss": 10.175333976745605, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.271178960800171, "rewards/margins": -0.20176656544208527, "rewards/rejected": -1.069412350654602, "step": 30 }, { "epoch": 0.4433887569279493, "grad_norm": 149.69757080078125, "learning_rate": 4.999370587356267e-06, "log_odds_chosen": 1.761802315711975, "log_odds_ratio": -10.454744338989258, "logps/chosen": -21.783058166503906, "logps/rejected": -23.544517517089844, "loss": 149.4591, "nll_loss": 9.341216087341309, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -1.0891529321670532, "rewards/margins": 0.08807289600372314, "rewards/rejected": -1.1772258281707764, "step": 35 }, { "epoch": 0.5067300079176563, "grad_norm": 347.0418395996094, "learning_rate": 4.992293334332821e-06, "log_odds_chosen": 0.6469250321388245, "log_odds_ratio": -9.636307716369629, "logps/chosen": -20.772506713867188, "logps/rejected": -21.41815185546875, "loss": 147.193, "nll_loss": 9.199592590332031, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.0386254787445068, "rewards/margins": 0.032282136380672455, "rewards/rejected": -1.0709075927734375, "step": 40 }, { "epoch": 0.5700712589073634, "grad_norm": 450.3840026855469, "learning_rate": 4.977374404419838e-06, "log_odds_chosen": -0.41757726669311523, "log_odds_ratio": -10.531739234924316, "logps/chosen": -21.214900970458984, "logps/rejected": -20.79790496826172, "loss": 130.2091, "nll_loss": 8.138101577758789, "rewards/accuracies": 0.46562498807907104, "rewards/chosen": -1.060745120048523, "rewards/margins": -0.02084996923804283, "rewards/rejected": -1.0398951768875122, "step": 45 }, { "epoch": 0.6334125098970704, "grad_norm": 678.9149169921875, "learning_rate": 4.954660738090297e-06, "log_odds_chosen": -0.34605133533477783, "log_odds_ratio": -6.101162910461426, "logps/chosen": -13.776224136352539, "logps/rejected": -13.429880142211914, "loss": 99.1414, "nll_loss": 6.196375846862793, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.688811182975769, "rewards/margins": -0.01731729507446289, "rewards/rejected": -0.6714939475059509, "step": 50 }, { "epoch": 0.6967537608867775, "grad_norm": 641.2720947265625, "learning_rate": 4.924223800941718e-06, "log_odds_chosen": -2.388538360595703, "log_odds_ratio": -5.602341651916504, "logps/chosen": -10.671758651733398, "logps/rejected": -8.287276268005371, "loss": 73.5918, "nll_loss": 4.599525451660156, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5335879325866699, "rewards/margins": -0.11922411620616913, "rewards/rejected": -0.4143638014793396, "step": 55 }, { "epoch": 0.7600950118764845, "grad_norm": 189.5673370361328, "learning_rate": 4.886159358838952e-06, "log_odds_chosen": -0.5304662585258484, "log_odds_ratio": -2.2777798175811768, "logps/chosen": -5.11728572845459, "logps/rejected": -4.571669578552246, "loss": 47.0867, "nll_loss": 2.9429495334625244, "rewards/accuracies": 0.484375, "rewards/chosen": -0.2558642327785492, "rewards/margins": -0.027280762791633606, "rewards/rejected": -0.22858352959156036, "step": 60 }, { "epoch": 0.8234362628661916, "grad_norm": 120.43927764892578, "learning_rate": 4.8405871765993435e-06, "log_odds_chosen": -0.2079111635684967, "log_odds_ratio": -1.2568554878234863, "logps/chosen": -2.487165927886963, "logps/rejected": -2.300987482070923, "loss": 37.4116, "nll_loss": 2.3382601737976074, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.12435829639434814, "rewards/margins": -0.009308922104537487, "rewards/rejected": -0.11504938453435898, "step": 65 }, { "epoch": 0.8867775138558986, "grad_norm": 85.35801696777344, "learning_rate": 4.7876506411683e-06, "log_odds_chosen": 0.16362255811691284, "log_odds_ratio": -0.9617290496826172, "logps/chosen": -1.9240213632583618, "logps/rejected": -2.0835094451904297, "loss": 34.9569, "nll_loss": 2.1848533153533936, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.09620106220245361, "rewards/margins": 0.007974403910338879, "rewards/rejected": -0.10417548567056656, "step": 70 }, { "epoch": 0.9501187648456056, "grad_norm": 67.66626739501953, "learning_rate": 4.72751631047092e-06, "log_odds_chosen": 0.08022845536470413, "log_odds_ratio": -0.8937705755233765, "logps/chosen": -1.7418140172958374, "logps/rejected": -1.8140445947647095, "loss": 32.5467, "nll_loss": 2.0342295169830322, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.08709069341421127, "rewards/margins": 0.003611528780311346, "rewards/rejected": -0.09070222824811935, "step": 75 }, { "epoch": 1.0253365003958828, "grad_norm": 67.96096801757812, "learning_rate": 4.660373389359137e-06, "log_odds_chosen": 0.220803365111351, "log_odds_ratio": -0.8697079420089722, "logps/chosen": -1.7520455121994019, "logps/rejected": -1.9549840688705444, "loss": 36.1895, "nll_loss": 2.0106112957000732, "rewards/accuracies": 0.5472221970558167, "rewards/chosen": -0.08760227262973785, "rewards/margins": 0.01014692522585392, "rewards/rejected": -0.09774920344352722, "step": 80 }, { "epoch": 1.0886777513855899, "grad_norm": 55.48482894897461, "learning_rate": 4.586433134303257e-06, "log_odds_chosen": 0.2403596192598343, "log_odds_ratio": -0.8039296865463257, "logps/chosen": -1.6341924667358398, "logps/rejected": -1.8617744445800781, "loss": 29.8033, "nll_loss": 1.862810730934143, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.08170963078737259, "rewards/margins": 0.011379102244973183, "rewards/rejected": -0.09308873116970062, "step": 85 }, { "epoch": 1.152019002375297, "grad_norm": 67.8202896118164, "learning_rate": 4.505928188700946e-06, "log_odds_chosen": 0.4101799428462982, "log_odds_ratio": -0.825219452381134, "logps/chosen": -1.640615463256836, "logps/rejected": -2.0214312076568604, "loss": 29.6306, "nll_loss": 1.8520597219467163, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.0820307806134224, "rewards/margins": 0.019040774554014206, "rewards/rejected": -0.1010715514421463, "step": 90 }, { "epoch": 1.2153602533650039, "grad_norm": 62.16216278076172, "learning_rate": 4.4191118508950286e-06, "log_odds_chosen": -0.0038254274986684322, "log_odds_ratio": -0.9625605344772339, "logps/chosen": -1.7339099645614624, "logps/rejected": -1.7298612594604492, "loss": 30.1161, "nll_loss": 1.8824611902236938, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.08669550716876984, "rewards/margins": -0.0002024378627538681, "rewards/rejected": -0.08649305999279022, "step": 95 }, { "epoch": 1.278701504354711, "grad_norm": 61.8267936706543, "learning_rate": 4.326257277203194e-06, "log_odds_chosen": 0.19552679359912872, "log_odds_ratio": -0.8283188939094543, "logps/chosen": -1.5923553705215454, "logps/rejected": -1.7873952388763428, "loss": 28.8905, "nll_loss": 1.8059203624725342, "rewards/accuracies": 0.53125, "rewards/chosen": -0.07961776852607727, "rewards/margins": 0.00975199043750763, "rewards/rejected": -0.0893697664141655, "step": 100 }, { "epoch": 1.342042755344418, "grad_norm": 51.51774215698242, "learning_rate": 4.227656622467162e-06, "log_odds_chosen": 0.08534004539251328, "log_odds_ratio": -0.8807274103164673, "logps/chosen": -1.6110286712646484, "logps/rejected": -1.6765788793563843, "loss": 28.3032, "nll_loss": 1.769310712814331, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.08055143058300018, "rewards/margins": 0.0032775108702480793, "rewards/rejected": -0.08382894098758698, "step": 105 }, { "epoch": 1.405384006334125, "grad_norm": 63.78523635864258, "learning_rate": 4.123620120825459e-06, "log_odds_chosen": 0.01861184649169445, "log_odds_ratio": -0.907670795917511, "logps/chosen": -1.5369993448257446, "logps/rejected": -1.5605800151824951, "loss": 27.58, "nll_loss": 1.7242324352264404, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.07684997469186783, "rewards/margins": 0.0011790260905399919, "rewards/rejected": -0.07802899926900864, "step": 110 }, { "epoch": 1.468725257323832, "grad_norm": 46.49420928955078, "learning_rate": 4.01447510960205e-06, "log_odds_chosen": 0.17250235378742218, "log_odds_ratio": -0.7509892582893372, "logps/chosen": -1.469199776649475, "logps/rejected": -1.6311143636703491, "loss": 28.0838, "nll_loss": 1.7559159994125366, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07345998287200928, "rewards/margins": 0.008095743134617805, "rewards/rejected": -0.08155572414398193, "step": 115 }, { "epoch": 1.5320665083135392, "grad_norm": 116.36309814453125, "learning_rate": 3.900564999382007e-06, "log_odds_chosen": 0.18579277396202087, "log_odds_ratio": -0.859032928943634, "logps/chosen": -1.5738681554794312, "logps/rejected": -1.7462905645370483, "loss": 28.7531, "nll_loss": 1.7980244159698486, "rewards/accuracies": 0.515625, "rewards/chosen": -0.07869341224431992, "rewards/margins": 0.008621118031442165, "rewards/rejected": -0.08731453120708466, "step": 120 }, { "epoch": 1.5954077593032463, "grad_norm": 49.367523193359375, "learning_rate": 3.782248193514766e-06, "log_odds_chosen": 0.03143889456987381, "log_odds_ratio": -0.9138350486755371, "logps/chosen": -1.5310590267181396, "logps/rejected": -1.5749614238739014, "loss": 27.4813, "nll_loss": 1.7188360691070557, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -0.07655295729637146, "rewards/margins": 0.002195121254771948, "rewards/rejected": -0.07874806970357895, "step": 125 }, { "epoch": 1.6587490102929534, "grad_norm": 82.95511627197266, "learning_rate": 3.6598969604445854e-06, "log_odds_chosen": 0.23761017620563507, "log_odds_ratio": -0.773469090461731, "logps/chosen": -1.401659369468689, "logps/rejected": -1.619177222251892, "loss": 26.8694, "nll_loss": 1.6810429096221924, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.07008296996355057, "rewards/margins": 0.010875897482037544, "rewards/rejected": -0.08095885813236237, "step": 130 }, { "epoch": 1.7220902612826603, "grad_norm": 40.690589904785156, "learning_rate": 3.533896262416302e-06, "log_odds_chosen": 0.13715331256389618, "log_odds_ratio": -0.7931355237960815, "logps/chosen": -1.3749868869781494, "logps/rejected": -1.4867315292358398, "loss": 26.6296, "nll_loss": 1.666666030883789, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.06874934583902359, "rewards/margins": 0.005587225314229727, "rewards/rejected": -0.07433657348155975, "step": 135 }, { "epoch": 1.7854315122723674, "grad_norm": 50.21955108642578, "learning_rate": 3.4046425442416807e-06, "log_odds_chosen": 0.1383642852306366, "log_odds_ratio": -0.8326429128646851, "logps/chosen": -1.381520390510559, "logps/rejected": -1.5053998231887817, "loss": 25.8257, "nll_loss": 1.617205262184143, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.06907602399587631, "rewards/margins": 0.0061939675360918045, "rewards/rejected": -0.07526998966932297, "step": 140 }, { "epoch": 1.8487727632620743, "grad_norm": 66.92253875732422, "learning_rate": 3.272542485937369e-06, "log_odds_chosen": 0.1765662282705307, "log_odds_ratio": -0.7538725137710571, "logps/chosen": -1.3644089698791504, "logps/rejected": -1.5174672603607178, "loss": 26.2355, "nll_loss": 1.644059419631958, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.06822045147418976, "rewards/margins": 0.00765291228890419, "rewards/rejected": -0.07587336748838425, "step": 145 }, { "epoch": 1.9121140142517814, "grad_norm": 76.11998748779297, "learning_rate": 3.138011723159107e-06, "log_odds_chosen": 0.030925732105970383, "log_odds_ratio": -0.8653675317764282, "logps/chosen": -1.4222267866134644, "logps/rejected": -1.4430891275405884, "loss": 25.9597, "nll_loss": 1.6283820867538452, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -0.07111133635044098, "rewards/margins": 0.0010431179543957114, "rewards/rejected": -0.0721544548869133, "step": 150 }, { "epoch": 1.9754552652414885, "grad_norm": 56.858306884765625, "learning_rate": 3.0014735394581824e-06, "log_odds_chosen": 0.11269225925207138, "log_odds_ratio": -0.7712882161140442, "logps/chosen": -1.349379062652588, "logps/rejected": -1.4386845827102661, "loss": 25.4824, "nll_loss": 1.6006284952163696, "rewards/accuracies": 0.53125, "rewards/chosen": -0.06746895611286163, "rewards/margins": 0.004465264733880758, "rewards/rejected": -0.07193422317504883, "step": 155 }, { "epoch": 2.0506730007917655, "grad_norm": 51.41761779785156, "learning_rate": 2.863357534474782e-06, "log_odds_chosen": 0.21707920730113983, "log_odds_ratio": -0.7254990339279175, "logps/chosen": -1.2790353298187256, "logps/rejected": -1.455739974975586, "loss": 27.9438, "nll_loss": 1.563090205192566, "rewards/accuracies": 0.5444444417953491, "rewards/chosen": -0.0639517679810524, "rewards/margins": 0.008835244923830032, "rewards/rejected": -0.07278700917959213, "step": 160 }, { "epoch": 2.1140142517814726, "grad_norm": 56.75939178466797, "learning_rate": 2.724098272258584e-06, "log_odds_chosen": 0.22648778557777405, "log_odds_ratio": -0.7485660314559937, "logps/chosen": -1.2268383502960205, "logps/rejected": -1.4281115531921387, "loss": 24.2974, "nll_loss": 1.5330586433410645, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -0.061341919004917145, "rewards/margins": 0.010063661262392998, "rewards/rejected": -0.07140558212995529, "step": 165 }, { "epoch": 2.1773555027711797, "grad_norm": 45.81589889526367, "learning_rate": 2.5841339139694856e-06, "log_odds_chosen": 0.23791304230690002, "log_odds_ratio": -0.7226032018661499, "logps/chosen": -1.2346957921981812, "logps/rejected": -1.4143835306167603, "loss": 24.1894, "nll_loss": 1.531683087348938, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.061734795570373535, "rewards/margins": 0.008984383195638657, "rewards/rejected": -0.0707191675901413, "step": 170 }, { "epoch": 2.240696753760887, "grad_norm": 61.37849044799805, "learning_rate": 2.443904839260488e-06, "log_odds_chosen": 0.13805775344371796, "log_odds_ratio": -0.7800249457359314, "logps/chosen": -1.316514015197754, "logps/rejected": -1.434812307357788, "loss": 24.873, "nll_loss": 1.582626461982727, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.0658257007598877, "rewards/margins": 0.005914908833801746, "rewards/rejected": -0.07174061238765717, "step": 175 }, { "epoch": 2.304038004750594, "grad_norm": 54.45339584350586, "learning_rate": 2.3038522606803882e-06, "log_odds_chosen": 0.295113205909729, "log_odds_ratio": -0.7289345264434814, "logps/chosen": -1.1970077753067017, "logps/rejected": -1.4469192028045654, "loss": 23.3061, "nll_loss": 1.4928674697875977, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.059850387275218964, "rewards/margins": 0.012495574541389942, "rewards/rejected": -0.07234595715999603, "step": 180 }, { "epoch": 2.367379255740301, "grad_norm": 49.3673210144043, "learning_rate": 2.1644168354558623e-06, "log_odds_chosen": 0.24153447151184082, "log_odds_ratio": -0.7290275692939758, "logps/chosen": -1.1816279888153076, "logps/rejected": -1.363236665725708, "loss": 22.865, "nll_loss": 1.4779325723648071, "rewards/accuracies": 0.59375, "rewards/chosen": -0.05908140540122986, "rewards/margins": 0.009080426767468452, "rewards/rejected": -0.06816183030605316, "step": 185 }, { "epoch": 2.4307205067300077, "grad_norm": 52.562843322753906, "learning_rate": 2.026037279020804e-06, "log_odds_chosen": 0.2805042862892151, "log_odds_ratio": -0.714387834072113, "logps/chosen": -1.2529584169387817, "logps/rejected": -1.496994972229004, "loss": 23.8218, "nll_loss": 1.5591580867767334, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.06264792382717133, "rewards/margins": 0.012201832607388496, "rewards/rejected": -0.07484976202249527, "step": 190 }, { "epoch": 2.494061757719715, "grad_norm": 43.61114501953125, "learning_rate": 1.8891489846552645e-06, "log_odds_chosen": 0.29797351360321045, "log_odds_ratio": -0.6934127807617188, "logps/chosen": -1.1956589221954346, "logps/rejected": -1.4286657571792603, "loss": 22.5176, "nll_loss": 1.4987757205963135, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.05978294461965561, "rewards/margins": 0.01165033970028162, "rewards/rejected": -0.07143328338861465, "step": 195 }, { "epoch": 2.557403008709422, "grad_norm": 41.446319580078125, "learning_rate": 1.75418265357711e-06, "log_odds_chosen": 0.33347612619400024, "log_odds_ratio": -0.6794474720954895, "logps/chosen": -1.2364885807037354, "logps/rejected": -1.4904797077178955, "loss": 22.3802, "nll_loss": 1.5241345167160034, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.061824433505535126, "rewards/margins": 0.01269955188035965, "rewards/rejected": -0.07452399283647537, "step": 200 }, { "epoch": 2.620744259699129, "grad_norm": 45.948020935058594, "learning_rate": 1.6215629397966432e-06, "log_odds_chosen": 0.19341301918029785, "log_odds_ratio": -0.7365654706954956, "logps/chosen": -1.2314597368240356, "logps/rejected": -1.3811973333358765, "loss": 21.58, "nll_loss": 1.5147475004196167, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.06157298758625984, "rewards/margins": 0.007486879825592041, "rewards/rejected": -0.06905986368656158, "step": 205 }, { "epoch": 2.684085510688836, "grad_norm": 70.68650817871094, "learning_rate": 1.4917071139979877e-06, "log_odds_chosen": 0.28118547797203064, "log_odds_ratio": -0.6911450624465942, "logps/chosen": -1.1725728511810303, "logps/rejected": -1.3876063823699951, "loss": 20.3847, "nll_loss": 1.48942232131958, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.05862864851951599, "rewards/margins": 0.010751673951745033, "rewards/rejected": -0.06938032060861588, "step": 210 }, { "epoch": 2.7474267616785433, "grad_norm": 43.78276443481445, "learning_rate": 1.3650237506511333e-06, "log_odds_chosen": 0.26468196511268616, "log_odds_ratio": -0.703626275062561, "logps/chosen": -1.1834118366241455, "logps/rejected": -1.3953739404678345, "loss": 19.2108, "nll_loss": 1.4794647693634033, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.059170592576265335, "rewards/margins": 0.010598111897706985, "rewards/rejected": -0.06976870447397232, "step": 215 }, { "epoch": 2.81076801266825, "grad_norm": 37.63428497314453, "learning_rate": 1.2419114424855e-06, "log_odds_chosen": 0.22841043770313263, "log_odds_ratio": -0.718733549118042, "logps/chosen": -1.152614951133728, "logps/rejected": -1.3169124126434326, "loss": 18.0619, "nll_loss": 1.4889500141143799, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.057630755007267, "rewards/margins": 0.008214866742491722, "rewards/rejected": -0.06584562361240387, "step": 220 }, { "epoch": 2.8741092636579575, "grad_norm": 33.8170280456543, "learning_rate": 1.122757546369744e-06, "log_odds_chosen": 0.19230252504348755, "log_odds_ratio": -0.7320453524589539, "logps/chosen": -1.1981362104415894, "logps/rejected": -1.353736162185669, "loss": 16.7321, "nll_loss": 1.5025402307510376, "rewards/accuracies": 0.578125, "rewards/chosen": -0.05990681052207947, "rewards/margins": 0.007779995445162058, "rewards/rejected": -0.06768681108951569, "step": 225 }, { "epoch": 2.937450514647664, "grad_norm": 26.931453704833984, "learning_rate": 1.0079369645437411e-06, "log_odds_chosen": 0.2564031183719635, "log_odds_ratio": -0.7086187601089478, "logps/chosen": -1.181774377822876, "logps/rejected": -1.377415418624878, "loss": 14.6404, "nll_loss": 1.4596824645996094, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05908871814608574, "rewards/margins": 0.009782059118151665, "rewards/rejected": -0.06887076795101166, "step": 230 }, { "epoch": 3.0126682501979416, "grad_norm": 32.48230743408203, "learning_rate": 8.978109650374398e-07, "log_odds_chosen": 0.19491888582706451, "log_odds_ratio": -0.7259272933006287, "logps/chosen": -1.1522880792617798, "logps/rejected": -1.3237059116363525, "loss": 14.4248, "nll_loss": 1.4603744745254517, "rewards/accuracies": 0.5666666626930237, "rewards/chosen": -0.05761440098285675, "rewards/margins": 0.008570893667638302, "rewards/rejected": -0.06618530303239822, "step": 235 }, { "epoch": 3.0760095011876483, "grad_norm": 18.48870277404785, "learning_rate": 7.927260449879828e-07, "log_odds_chosen": 0.38648003339767456, "log_odds_ratio": -0.6636689901351929, "logps/chosen": -1.115918517112732, "logps/rejected": -1.420893907546997, "loss": 10.7067, "nll_loss": 1.409071683883667, "rewards/accuracies": 0.59375, "rewards/chosen": -0.055795930325984955, "rewards/margins": 0.015248763374984264, "rewards/rejected": -0.07104469835758209, "step": 240 }, { "epoch": 3.1393507521773554, "grad_norm": 20.157772064208984, "learning_rate": 6.930128404315214e-07, "log_odds_chosen": 0.3515693247318268, "log_odds_ratio": -0.6867735981941223, "logps/chosen": -1.1619575023651123, "logps/rejected": -1.4377037286758423, "loss": 9.4715, "nll_loss": 1.4781396389007568, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.058097876608371735, "rewards/margins": 0.01378730870783329, "rewards/rejected": -0.07188518345355988, "step": 245 }, { "epoch": 3.2026920031670625, "grad_norm": 16.846302032470703, "learning_rate": 5.989850859999227e-07, "log_odds_chosen": 0.2286454439163208, "log_odds_ratio": -0.7409519553184509, "logps/chosen": -1.2201191186904907, "logps/rejected": -1.3998501300811768, "loss": 7.952, "nll_loss": 1.4928877353668213, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.061005957424640656, "rewards/margins": 0.008986548520624638, "rewards/rejected": -0.06999249756336212, "step": 250 }, { "epoch": 3.2660332541567696, "grad_norm": 15.757951736450195, "learning_rate": 5.109386277955477e-07, "log_odds_chosen": 0.3435971736907959, "log_odds_ratio": -0.6829465627670288, "logps/chosen": -1.1322981119155884, "logps/rejected": -1.391939640045166, "loss": 6.2963, "nll_loss": 1.4439783096313477, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05661489814519882, "rewards/margins": 0.01298207975924015, "rewards/rejected": -0.06959697604179382, "step": 255 }, { "epoch": 3.3293745051464767, "grad_norm": 9.453774452209473, "learning_rate": 4.29150492549959e-07, "log_odds_chosen": 0.31623855233192444, "log_odds_ratio": -0.674622654914856, "logps/chosen": -1.0845999717712402, "logps/rejected": -1.3284567594528198, "loss": 4.9207, "nll_loss": 1.3823580741882324, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.05422999709844589, "rewards/margins": 0.012192841619253159, "rewards/rejected": -0.06642283499240875, "step": 260 }, { "epoch": 3.392715756136184, "grad_norm": 13.926580429077148, "learning_rate": 3.538780159953348e-07, "log_odds_chosen": 0.33605459332466125, "log_odds_ratio": -0.6477686762809753, "logps/chosen": -1.1137669086456299, "logps/rejected": -1.3602466583251953, "loss": 4.1082, "nll_loss": 1.437723994255066, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.055688343942165375, "rewards/margins": 0.01232399232685566, "rewards/rejected": -0.06801234185695648, "step": 265 }, { "epoch": 3.456057007125891, "grad_norm": 6.114862442016602, "learning_rate": 2.8535803319105047e-07, "log_odds_chosen": 0.28404486179351807, "log_odds_ratio": -0.7073220610618591, "logps/chosen": -1.134637475013733, "logps/rejected": -1.340289831161499, "loss": 3.3425, "nll_loss": 1.4215402603149414, "rewards/accuracies": 0.59375, "rewards/chosen": -0.05673186853528023, "rewards/margins": 0.010282614268362522, "rewards/rejected": -0.06701448559761047, "step": 270 }, { "epoch": 3.519398258115598, "grad_norm": 7.700738430023193, "learning_rate": 2.2380613335296037e-07, "log_odds_chosen": 0.40184488892555237, "log_odds_ratio": -0.629717230796814, "logps/chosen": -1.0624561309814453, "logps/rejected": -1.356720209121704, "loss": 2.6924, "nll_loss": 1.4157472848892212, "rewards/accuracies": 0.6468750238418579, "rewards/chosen": -0.05312279984354973, "rewards/margins": 0.014713202603161335, "rewards/rejected": -0.06783600151538849, "step": 275 }, { "epoch": 3.5827395091053047, "grad_norm": 6.366260528564453, "learning_rate": 1.6941598152996453e-07, "log_odds_chosen": 0.31379514932632446, "log_odds_ratio": -0.6721755266189575, "logps/chosen": -1.153221845626831, "logps/rejected": -1.3958936929702759, "loss": 2.3316, "nll_loss": 1.4696441888809204, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.05766110494732857, "rewards/margins": 0.012133581563830376, "rewards/rejected": -0.0697946771979332, "step": 280 }, { "epoch": 3.646080760095012, "grad_norm": 8.542328834533691, "learning_rate": 1.223587092621162e-07, "log_odds_chosen": 0.4368967115879059, "log_odds_ratio": -0.6306554079055786, "logps/chosen": -1.1088006496429443, "logps/rejected": -1.4423551559448242, "loss": 1.9015, "nll_loss": 1.4303786754608154, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.055440038442611694, "rewards/margins": 0.016677727922797203, "rewards/rejected": -0.07211776077747345, "step": 285 }, { "epoch": 3.709422011084719, "grad_norm": 5.483981609344482, "learning_rate": 8.278237613748408e-08, "log_odds_chosen": 0.25653380155563354, "log_odds_ratio": -0.6835523843765259, "logps/chosen": -1.115917444229126, "logps/rejected": -1.3009469509124756, "loss": 1.7019, "nll_loss": 1.435164451599121, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.05579587072134018, "rewards/margins": 0.009251468814909458, "rewards/rejected": -0.06504733860492706, "step": 290 }, { "epoch": 3.772763262074426, "grad_norm": 5.989284992218018, "learning_rate": 5.0811503941911314e-08, "log_odds_chosen": 0.36020272970199585, "log_odds_ratio": -0.6700825691223145, "logps/chosen": -1.1353535652160645, "logps/rejected": -1.425724744796753, "loss": 1.4849, "nll_loss": 1.438443899154663, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05676767975091934, "rewards/margins": 0.014518563635647297, "rewards/rejected": -0.07128624618053436, "step": 295 }, { "epoch": 3.836104513064133, "grad_norm": 5.665014743804932, "learning_rate": 2.6546684867408412e-08, "log_odds_chosen": 0.2839321196079254, "log_odds_ratio": -0.693397045135498, "logps/chosen": -1.183774471282959, "logps/rejected": -1.4149149656295776, "loss": 1.3752, "nll_loss": 1.4539821147918701, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.05918872356414795, "rewards/margins": 0.011557026766240597, "rewards/rejected": -0.07074575871229172, "step": 300 }, { "epoch": 3.89944576405384, "grad_norm": 6.5417280197143555, "learning_rate": 1.006426501190233e-08, "log_odds_chosen": 0.2721025347709656, "log_odds_ratio": -0.6981784105300903, "logps/chosen": -1.153509259223938, "logps/rejected": -1.3679428100585938, "loss": 1.2736, "nll_loss": 1.4551602602005005, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.05767546221613884, "rewards/margins": 0.010721677914261818, "rewards/rejected": -0.06839713454246521, "step": 305 }, { "epoch": 3.9627870150435474, "grad_norm": 6.849119186401367, "learning_rate": 1.4161041661667208e-09, "log_odds_chosen": 0.20419207215309143, "log_odds_ratio": -0.7056440114974976, "logps/chosen": -1.1733808517456055, "logps/rejected": -1.3275495767593384, "loss": 1.2192, "nll_loss": 1.4886562824249268, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.058669041842222214, "rewards/margins": 0.00770843680948019, "rewards/rejected": -0.06637748330831528, "step": 310 }, { "epoch": 3.9881235154394297, "step": 312, "total_flos": 0.0, "train_loss": 41.90841192007065, "train_runtime": 3951.2607, "train_samples_per_second": 5.112, "train_steps_per_second": 0.079 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }