| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.9881235154394297, | |
| "eval_steps": 500, | |
| "global_step": 312, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06334125098970704, | |
| "grad_norm": 1566.8653564453125, | |
| "learning_rate": 6.25e-07, | |
| "log_odds_chosen": -2.225217342376709, | |
| "log_odds_ratio": -13.344230651855469, | |
| "logps/chosen": -26.207233428955078, | |
| "logps/rejected": -23.98147964477539, | |
| "loss": 166.3142, | |
| "nll_loss": 10.39463996887207, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -1.3103615045547485, | |
| "rewards/margins": -0.11128749698400497, | |
| "rewards/rejected": -1.1990740299224854, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.12668250197941408, | |
| "grad_norm": 954.8382568359375, | |
| "learning_rate": 1.40625e-06, | |
| "log_odds_chosen": -0.6191844344139099, | |
| "log_odds_ratio": -14.636589050292969, | |
| "logps/chosen": -27.205514907836914, | |
| "logps/rejected": -26.585662841796875, | |
| "loss": 169.2441, | |
| "nll_loss": 10.57776165008545, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -1.3602757453918457, | |
| "rewards/margins": -0.030992573127150536, | |
| "rewards/rejected": -1.3292831182479858, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.19002375296912113, | |
| "grad_norm": 355.5755615234375, | |
| "learning_rate": 2.1875000000000002e-06, | |
| "log_odds_chosen": 2.383756160736084, | |
| "log_odds_ratio": -11.047012329101562, | |
| "logps/chosen": -24.0307559967041, | |
| "logps/rejected": -26.413951873779297, | |
| "loss": 171.9778, | |
| "nll_loss": 10.748617172241211, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -1.201537847518921, | |
| "rewards/margins": 0.11915971338748932, | |
| "rewards/rejected": -1.320697546005249, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.25336500395882816, | |
| "grad_norm": 281.12158203125, | |
| "learning_rate": 2.96875e-06, | |
| "log_odds_chosen": -5.130737781524658, | |
| "log_odds_ratio": -15.810519218444824, | |
| "logps/chosen": -29.556884765625, | |
| "logps/rejected": -24.425743103027344, | |
| "loss": 175.1297, | |
| "nll_loss": 10.945618629455566, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -1.477844476699829, | |
| "rewards/margins": -0.2565571963787079, | |
| "rewards/rejected": -1.2212872505187988, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3167062549485352, | |
| "grad_norm": 199.64466857910156, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "log_odds_chosen": 0.16405829787254333, | |
| "log_odds_ratio": -13.288398742675781, | |
| "logps/chosen": -26.04940414428711, | |
| "logps/rejected": -26.214153289794922, | |
| "loss": 163.7713, | |
| "nll_loss": 10.235721588134766, | |
| "rewards/accuracies": 0.4906249940395355, | |
| "rewards/chosen": -1.302470326423645, | |
| "rewards/margins": 0.008237527683377266, | |
| "rewards/rejected": -1.310707688331604, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.38004750593824227, | |
| "grad_norm": 195.967529296875, | |
| "learning_rate": 4.53125e-06, | |
| "log_odds_chosen": -4.0349016189575195, | |
| "log_odds_ratio": -13.333663940429688, | |
| "logps/chosen": -25.42357635498047, | |
| "logps/rejected": -21.388246536254883, | |
| "loss": 162.8051, | |
| "nll_loss": 10.175333976745605, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -1.271178960800171, | |
| "rewards/margins": -0.20176656544208527, | |
| "rewards/rejected": -1.069412350654602, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.4433887569279493, | |
| "grad_norm": 149.69757080078125, | |
| "learning_rate": 4.999370587356267e-06, | |
| "log_odds_chosen": 1.761802315711975, | |
| "log_odds_ratio": -10.454744338989258, | |
| "logps/chosen": -21.783058166503906, | |
| "logps/rejected": -23.544517517089844, | |
| "loss": 149.4591, | |
| "nll_loss": 9.341216087341309, | |
| "rewards/accuracies": 0.4906249940395355, | |
| "rewards/chosen": -1.0891529321670532, | |
| "rewards/margins": 0.08807289600372314, | |
| "rewards/rejected": -1.1772258281707764, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.5067300079176563, | |
| "grad_norm": 347.0418395996094, | |
| "learning_rate": 4.992293334332821e-06, | |
| "log_odds_chosen": 0.6469250321388245, | |
| "log_odds_ratio": -9.636307716369629, | |
| "logps/chosen": -20.772506713867188, | |
| "logps/rejected": -21.41815185546875, | |
| "loss": 147.193, | |
| "nll_loss": 9.199592590332031, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -1.0386254787445068, | |
| "rewards/margins": 0.032282136380672455, | |
| "rewards/rejected": -1.0709075927734375, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5700712589073634, | |
| "grad_norm": 450.3840026855469, | |
| "learning_rate": 4.977374404419838e-06, | |
| "log_odds_chosen": -0.41757726669311523, | |
| "log_odds_ratio": -10.531739234924316, | |
| "logps/chosen": -21.214900970458984, | |
| "logps/rejected": -20.79790496826172, | |
| "loss": 130.2091, | |
| "nll_loss": 8.138101577758789, | |
| "rewards/accuracies": 0.46562498807907104, | |
| "rewards/chosen": -1.060745120048523, | |
| "rewards/margins": -0.02084996923804283, | |
| "rewards/rejected": -1.0398951768875122, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.6334125098970704, | |
| "grad_norm": 678.9149169921875, | |
| "learning_rate": 4.954660738090297e-06, | |
| "log_odds_chosen": -0.34605133533477783, | |
| "log_odds_ratio": -6.101162910461426, | |
| "logps/chosen": -13.776224136352539, | |
| "logps/rejected": -13.429880142211914, | |
| "loss": 99.1414, | |
| "nll_loss": 6.196375846862793, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.688811182975769, | |
| "rewards/margins": -0.01731729507446289, | |
| "rewards/rejected": -0.6714939475059509, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6967537608867775, | |
| "grad_norm": 641.2720947265625, | |
| "learning_rate": 4.924223800941718e-06, | |
| "log_odds_chosen": -2.388538360595703, | |
| "log_odds_ratio": -5.602341651916504, | |
| "logps/chosen": -10.671758651733398, | |
| "logps/rejected": -8.287276268005371, | |
| "loss": 73.5918, | |
| "nll_loss": 4.599525451660156, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.5335879325866699, | |
| "rewards/margins": -0.11922411620616913, | |
| "rewards/rejected": -0.4143638014793396, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.7600950118764845, | |
| "grad_norm": 189.5673370361328, | |
| "learning_rate": 4.886159358838952e-06, | |
| "log_odds_chosen": -0.5304662585258484, | |
| "log_odds_ratio": -2.2777798175811768, | |
| "logps/chosen": -5.11728572845459, | |
| "logps/rejected": -4.571669578552246, | |
| "loss": 47.0867, | |
| "nll_loss": 2.9429495334625244, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": -0.2558642327785492, | |
| "rewards/margins": -0.027280762791633606, | |
| "rewards/rejected": -0.22858352959156036, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.8234362628661916, | |
| "grad_norm": 120.43927764892578, | |
| "learning_rate": 4.8405871765993435e-06, | |
| "log_odds_chosen": -0.2079111635684967, | |
| "log_odds_ratio": -1.2568554878234863, | |
| "logps/chosen": -2.487165927886963, | |
| "logps/rejected": -2.300987482070923, | |
| "loss": 37.4116, | |
| "nll_loss": 2.3382601737976074, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.12435829639434814, | |
| "rewards/margins": -0.009308922104537487, | |
| "rewards/rejected": -0.11504938453435898, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.8867775138558986, | |
| "grad_norm": 85.35801696777344, | |
| "learning_rate": 4.7876506411683e-06, | |
| "log_odds_chosen": 0.16362255811691284, | |
| "log_odds_ratio": -0.9617290496826172, | |
| "logps/chosen": -1.9240213632583618, | |
| "logps/rejected": -2.0835094451904297, | |
| "loss": 34.9569, | |
| "nll_loss": 2.1848533153533936, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.09620106220245361, | |
| "rewards/margins": 0.007974403910338879, | |
| "rewards/rejected": -0.10417548567056656, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9501187648456056, | |
| "grad_norm": 67.66626739501953, | |
| "learning_rate": 4.72751631047092e-06, | |
| "log_odds_chosen": 0.08022845536470413, | |
| "log_odds_ratio": -0.8937705755233765, | |
| "logps/chosen": -1.7418140172958374, | |
| "logps/rejected": -1.8140445947647095, | |
| "loss": 32.5467, | |
| "nll_loss": 2.0342295169830322, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.08709069341421127, | |
| "rewards/margins": 0.003611528780311346, | |
| "rewards/rejected": -0.09070222824811935, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.0253365003958828, | |
| "grad_norm": 67.96096801757812, | |
| "learning_rate": 4.660373389359137e-06, | |
| "log_odds_chosen": 0.220803365111351, | |
| "log_odds_ratio": -0.8697079420089722, | |
| "logps/chosen": -1.7520455121994019, | |
| "logps/rejected": -1.9549840688705444, | |
| "loss": 36.1895, | |
| "nll_loss": 2.0106112957000732, | |
| "rewards/accuracies": 0.5472221970558167, | |
| "rewards/chosen": -0.08760227262973785, | |
| "rewards/margins": 0.01014692522585392, | |
| "rewards/rejected": -0.09774920344352722, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0886777513855899, | |
| "grad_norm": 55.48482894897461, | |
| "learning_rate": 4.586433134303257e-06, | |
| "log_odds_chosen": 0.2403596192598343, | |
| "log_odds_ratio": -0.8039296865463257, | |
| "logps/chosen": -1.6341924667358398, | |
| "logps/rejected": -1.8617744445800781, | |
| "loss": 29.8033, | |
| "nll_loss": 1.862810730934143, | |
| "rewards/accuracies": 0.5406249761581421, | |
| "rewards/chosen": -0.08170963078737259, | |
| "rewards/margins": 0.011379102244973183, | |
| "rewards/rejected": -0.09308873116970062, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.152019002375297, | |
| "grad_norm": 67.8202896118164, | |
| "learning_rate": 4.505928188700946e-06, | |
| "log_odds_chosen": 0.4101799428462982, | |
| "log_odds_ratio": -0.825219452381134, | |
| "logps/chosen": -1.640615463256836, | |
| "logps/rejected": -2.0214312076568604, | |
| "loss": 29.6306, | |
| "nll_loss": 1.8520597219467163, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.0820307806134224, | |
| "rewards/margins": 0.019040774554014206, | |
| "rewards/rejected": -0.1010715514421463, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.2153602533650039, | |
| "grad_norm": 62.16216278076172, | |
| "learning_rate": 4.4191118508950286e-06, | |
| "log_odds_chosen": -0.0038254274986684322, | |
| "log_odds_ratio": -0.9625605344772339, | |
| "logps/chosen": -1.7339099645614624, | |
| "logps/rejected": -1.7298612594604492, | |
| "loss": 30.1161, | |
| "nll_loss": 1.8824611902236938, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.08669550716876984, | |
| "rewards/margins": -0.0002024378627538681, | |
| "rewards/rejected": -0.08649305999279022, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.278701504354711, | |
| "grad_norm": 61.8267936706543, | |
| "learning_rate": 4.326257277203194e-06, | |
| "log_odds_chosen": 0.19552679359912872, | |
| "log_odds_ratio": -0.8283188939094543, | |
| "logps/chosen": -1.5923553705215454, | |
| "logps/rejected": -1.7873952388763428, | |
| "loss": 28.8905, | |
| "nll_loss": 1.8059203624725342, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.07961776852607727, | |
| "rewards/margins": 0.00975199043750763, | |
| "rewards/rejected": -0.0893697664141655, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.342042755344418, | |
| "grad_norm": 51.51774215698242, | |
| "learning_rate": 4.227656622467162e-06, | |
| "log_odds_chosen": 0.08534004539251328, | |
| "log_odds_ratio": -0.8807274103164673, | |
| "logps/chosen": -1.6110286712646484, | |
| "logps/rejected": -1.6765788793563843, | |
| "loss": 28.3032, | |
| "nll_loss": 1.769310712814331, | |
| "rewards/accuracies": 0.5218750238418579, | |
| "rewards/chosen": -0.08055143058300018, | |
| "rewards/margins": 0.0032775108702480793, | |
| "rewards/rejected": -0.08382894098758698, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.405384006334125, | |
| "grad_norm": 63.78523635864258, | |
| "learning_rate": 4.123620120825459e-06, | |
| "log_odds_chosen": 0.01861184649169445, | |
| "log_odds_ratio": -0.907670795917511, | |
| "logps/chosen": -1.5369993448257446, | |
| "logps/rejected": -1.5605800151824951, | |
| "loss": 27.58, | |
| "nll_loss": 1.7242324352264404, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.07684997469186783, | |
| "rewards/margins": 0.0011790260905399919, | |
| "rewards/rejected": -0.07802899926900864, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.468725257323832, | |
| "grad_norm": 46.49420928955078, | |
| "learning_rate": 4.01447510960205e-06, | |
| "log_odds_chosen": 0.17250235378742218, | |
| "log_odds_ratio": -0.7509892582893372, | |
| "logps/chosen": -1.469199776649475, | |
| "logps/rejected": -1.6311143636703491, | |
| "loss": 28.0838, | |
| "nll_loss": 1.7559159994125366, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.07345998287200928, | |
| "rewards/margins": 0.008095743134617805, | |
| "rewards/rejected": -0.08155572414398193, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.5320665083135392, | |
| "grad_norm": 116.36309814453125, | |
| "learning_rate": 3.900564999382007e-06, | |
| "log_odds_chosen": 0.18579277396202087, | |
| "log_odds_ratio": -0.859032928943634, | |
| "logps/chosen": -1.5738681554794312, | |
| "logps/rejected": -1.7462905645370483, | |
| "loss": 28.7531, | |
| "nll_loss": 1.7980244159698486, | |
| "rewards/accuracies": 0.515625, | |
| "rewards/chosen": -0.07869341224431992, | |
| "rewards/margins": 0.008621118031442165, | |
| "rewards/rejected": -0.08731453120708466, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.5954077593032463, | |
| "grad_norm": 49.367523193359375, | |
| "learning_rate": 3.782248193514766e-06, | |
| "log_odds_chosen": 0.03143889456987381, | |
| "log_odds_ratio": -0.9138350486755371, | |
| "logps/chosen": -1.5310590267181396, | |
| "logps/rejected": -1.5749614238739014, | |
| "loss": 27.4813, | |
| "nll_loss": 1.7188360691070557, | |
| "rewards/accuracies": 0.4906249940395355, | |
| "rewards/chosen": -0.07655295729637146, | |
| "rewards/margins": 0.002195121254771948, | |
| "rewards/rejected": -0.07874806970357895, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.6587490102929534, | |
| "grad_norm": 82.95511627197266, | |
| "learning_rate": 3.6598969604445854e-06, | |
| "log_odds_chosen": 0.23761017620563507, | |
| "log_odds_ratio": -0.773469090461731, | |
| "logps/chosen": -1.401659369468689, | |
| "logps/rejected": -1.619177222251892, | |
| "loss": 26.8694, | |
| "nll_loss": 1.6810429096221924, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.07008296996355057, | |
| "rewards/margins": 0.010875897482037544, | |
| "rewards/rejected": -0.08095885813236237, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.7220902612826603, | |
| "grad_norm": 40.690589904785156, | |
| "learning_rate": 3.533896262416302e-06, | |
| "log_odds_chosen": 0.13715331256389618, | |
| "log_odds_ratio": -0.7931355237960815, | |
| "logps/chosen": -1.3749868869781494, | |
| "logps/rejected": -1.4867315292358398, | |
| "loss": 26.6296, | |
| "nll_loss": 1.666666030883789, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.06874934583902359, | |
| "rewards/margins": 0.005587225314229727, | |
| "rewards/rejected": -0.07433657348155975, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.7854315122723674, | |
| "grad_norm": 50.21955108642578, | |
| "learning_rate": 3.4046425442416807e-06, | |
| "log_odds_chosen": 0.1383642852306366, | |
| "log_odds_ratio": -0.8326429128646851, | |
| "logps/chosen": -1.381520390510559, | |
| "logps/rejected": -1.5053998231887817, | |
| "loss": 25.8257, | |
| "nll_loss": 1.617205262184143, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.06907602399587631, | |
| "rewards/margins": 0.0061939675360918045, | |
| "rewards/rejected": -0.07526998966932297, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.8487727632620743, | |
| "grad_norm": 66.92253875732422, | |
| "learning_rate": 3.272542485937369e-06, | |
| "log_odds_chosen": 0.1765662282705307, | |
| "log_odds_ratio": -0.7538725137710571, | |
| "logps/chosen": -1.3644089698791504, | |
| "logps/rejected": -1.5174672603607178, | |
| "loss": 26.2355, | |
| "nll_loss": 1.644059419631958, | |
| "rewards/accuracies": 0.5406249761581421, | |
| "rewards/chosen": -0.06822045147418976, | |
| "rewards/margins": 0.00765291228890419, | |
| "rewards/rejected": -0.07587336748838425, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.9121140142517814, | |
| "grad_norm": 76.11998748779297, | |
| "learning_rate": 3.138011723159107e-06, | |
| "log_odds_chosen": 0.030925732105970383, | |
| "log_odds_ratio": -0.8653675317764282, | |
| "logps/chosen": -1.4222267866134644, | |
| "logps/rejected": -1.4430891275405884, | |
| "loss": 25.9597, | |
| "nll_loss": 1.6283820867538452, | |
| "rewards/accuracies": 0.4906249940395355, | |
| "rewards/chosen": -0.07111133635044098, | |
| "rewards/margins": 0.0010431179543957114, | |
| "rewards/rejected": -0.0721544548869133, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.9754552652414885, | |
| "grad_norm": 56.858306884765625, | |
| "learning_rate": 3.0014735394581824e-06, | |
| "log_odds_chosen": 0.11269225925207138, | |
| "log_odds_ratio": -0.7712882161140442, | |
| "logps/chosen": -1.349379062652588, | |
| "logps/rejected": -1.4386845827102661, | |
| "loss": 25.4824, | |
| "nll_loss": 1.6006284952163696, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.06746895611286163, | |
| "rewards/margins": 0.004465264733880758, | |
| "rewards/rejected": -0.07193422317504883, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.0506730007917655, | |
| "grad_norm": 51.41761779785156, | |
| "learning_rate": 2.863357534474782e-06, | |
| "log_odds_chosen": 0.21707920730113983, | |
| "log_odds_ratio": -0.7254990339279175, | |
| "logps/chosen": -1.2790353298187256, | |
| "logps/rejected": -1.455739974975586, | |
| "loss": 27.9438, | |
| "nll_loss": 1.563090205192566, | |
| "rewards/accuracies": 0.5444444417953491, | |
| "rewards/chosen": -0.0639517679810524, | |
| "rewards/margins": 0.008835244923830032, | |
| "rewards/rejected": -0.07278700917959213, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.1140142517814726, | |
| "grad_norm": 56.75939178466797, | |
| "learning_rate": 2.724098272258584e-06, | |
| "log_odds_chosen": 0.22648778557777405, | |
| "log_odds_ratio": -0.7485660314559937, | |
| "logps/chosen": -1.2268383502960205, | |
| "logps/rejected": -1.4281115531921387, | |
| "loss": 24.2974, | |
| "nll_loss": 1.5330586433410645, | |
| "rewards/accuracies": 0.5218750238418579, | |
| "rewards/chosen": -0.061341919004917145, | |
| "rewards/margins": 0.010063661262392998, | |
| "rewards/rejected": -0.07140558212995529, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.1773555027711797, | |
| "grad_norm": 45.81589889526367, | |
| "learning_rate": 2.5841339139694856e-06, | |
| "log_odds_chosen": 0.23791304230690002, | |
| "log_odds_ratio": -0.7226032018661499, | |
| "logps/chosen": -1.2346957921981812, | |
| "logps/rejected": -1.4143835306167603, | |
| "loss": 24.1894, | |
| "nll_loss": 1.531683087348938, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.061734795570373535, | |
| "rewards/margins": 0.008984383195638657, | |
| "rewards/rejected": -0.0707191675901413, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.240696753760887, | |
| "grad_norm": 61.37849044799805, | |
| "learning_rate": 2.443904839260488e-06, | |
| "log_odds_chosen": 0.13805775344371796, | |
| "log_odds_ratio": -0.7800249457359314, | |
| "logps/chosen": -1.316514015197754, | |
| "logps/rejected": -1.434812307357788, | |
| "loss": 24.873, | |
| "nll_loss": 1.582626461982727, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.0658257007598877, | |
| "rewards/margins": 0.005914908833801746, | |
| "rewards/rejected": -0.07174061238765717, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.304038004750594, | |
| "grad_norm": 54.45339584350586, | |
| "learning_rate": 2.3038522606803882e-06, | |
| "log_odds_chosen": 0.295113205909729, | |
| "log_odds_ratio": -0.7289345264434814, | |
| "logps/chosen": -1.1970077753067017, | |
| "logps/rejected": -1.4469192028045654, | |
| "loss": 23.3061, | |
| "nll_loss": 1.4928674697875977, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.059850387275218964, | |
| "rewards/margins": 0.012495574541389942, | |
| "rewards/rejected": -0.07234595715999603, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.367379255740301, | |
| "grad_norm": 49.3673210144043, | |
| "learning_rate": 2.1644168354558623e-06, | |
| "log_odds_chosen": 0.24153447151184082, | |
| "log_odds_ratio": -0.7290275692939758, | |
| "logps/chosen": -1.1816279888153076, | |
| "logps/rejected": -1.363236665725708, | |
| "loss": 22.865, | |
| "nll_loss": 1.4779325723648071, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.05908140540122986, | |
| "rewards/margins": 0.009080426767468452, | |
| "rewards/rejected": -0.06816183030605316, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 2.4307205067300077, | |
| "grad_norm": 52.562843322753906, | |
| "learning_rate": 2.026037279020804e-06, | |
| "log_odds_chosen": 0.2805042862892151, | |
| "log_odds_ratio": -0.714387834072113, | |
| "logps/chosen": -1.2529584169387817, | |
| "logps/rejected": -1.496994972229004, | |
| "loss": 23.8218, | |
| "nll_loss": 1.5591580867767334, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.06264792382717133, | |
| "rewards/margins": 0.012201832607388496, | |
| "rewards/rejected": -0.07484976202249527, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.494061757719715, | |
| "grad_norm": 43.61114501953125, | |
| "learning_rate": 1.8891489846552645e-06, | |
| "log_odds_chosen": 0.29797351360321045, | |
| "log_odds_ratio": -0.6934127807617188, | |
| "logps/chosen": -1.1956589221954346, | |
| "logps/rejected": -1.4286657571792603, | |
| "loss": 22.5176, | |
| "nll_loss": 1.4987757205963135, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.05978294461965561, | |
| "rewards/margins": 0.01165033970028162, | |
| "rewards/rejected": -0.07143328338861465, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 2.557403008709422, | |
| "grad_norm": 41.446319580078125, | |
| "learning_rate": 1.75418265357711e-06, | |
| "log_odds_chosen": 0.33347612619400024, | |
| "log_odds_ratio": -0.6794474720954895, | |
| "logps/chosen": -1.2364885807037354, | |
| "logps/rejected": -1.4904797077178955, | |
| "loss": 22.3802, | |
| "nll_loss": 1.5241345167160034, | |
| "rewards/accuracies": 0.5843750238418579, | |
| "rewards/chosen": -0.061824433505535126, | |
| "rewards/margins": 0.01269955188035965, | |
| "rewards/rejected": -0.07452399283647537, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.620744259699129, | |
| "grad_norm": 45.948020935058594, | |
| "learning_rate": 1.6215629397966432e-06, | |
| "log_odds_chosen": 0.19341301918029785, | |
| "log_odds_ratio": -0.7365654706954956, | |
| "logps/chosen": -1.2314597368240356, | |
| "logps/rejected": -1.3811973333358765, | |
| "loss": 21.58, | |
| "nll_loss": 1.5147475004196167, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.06157298758625984, | |
| "rewards/margins": 0.007486879825592041, | |
| "rewards/rejected": -0.06905986368656158, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 2.684085510688836, | |
| "grad_norm": 70.68650817871094, | |
| "learning_rate": 1.4917071139979877e-06, | |
| "log_odds_chosen": 0.28118547797203064, | |
| "log_odds_ratio": -0.6911450624465942, | |
| "logps/chosen": -1.1725728511810303, | |
| "logps/rejected": -1.3876063823699951, | |
| "loss": 20.3847, | |
| "nll_loss": 1.48942232131958, | |
| "rewards/accuracies": 0.590624988079071, | |
| "rewards/chosen": -0.05862864851951599, | |
| "rewards/margins": 0.010751673951745033, | |
| "rewards/rejected": -0.06938032060861588, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.7474267616785433, | |
| "grad_norm": 43.78276443481445, | |
| "learning_rate": 1.3650237506511333e-06, | |
| "log_odds_chosen": 0.26468196511268616, | |
| "log_odds_ratio": -0.703626275062561, | |
| "logps/chosen": -1.1834118366241455, | |
| "logps/rejected": -1.3953739404678345, | |
| "loss": 19.2108, | |
| "nll_loss": 1.4794647693634033, | |
| "rewards/accuracies": 0.596875011920929, | |
| "rewards/chosen": -0.059170592576265335, | |
| "rewards/margins": 0.010598111897706985, | |
| "rewards/rejected": -0.06976870447397232, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 2.81076801266825, | |
| "grad_norm": 37.63428497314453, | |
| "learning_rate": 1.2419114424855e-06, | |
| "log_odds_chosen": 0.22841043770313263, | |
| "log_odds_ratio": -0.718733549118042, | |
| "logps/chosen": -1.152614951133728, | |
| "logps/rejected": -1.3169124126434326, | |
| "loss": 18.0619, | |
| "nll_loss": 1.4889500141143799, | |
| "rewards/accuracies": 0.5531250238418579, | |
| "rewards/chosen": -0.057630755007267, | |
| "rewards/margins": 0.008214866742491722, | |
| "rewards/rejected": -0.06584562361240387, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.8741092636579575, | |
| "grad_norm": 33.8170280456543, | |
| "learning_rate": 1.122757546369744e-06, | |
| "log_odds_chosen": 0.19230252504348755, | |
| "log_odds_ratio": -0.7320453524589539, | |
| "logps/chosen": -1.1981362104415894, | |
| "logps/rejected": -1.353736162185669, | |
| "loss": 16.7321, | |
| "nll_loss": 1.5025402307510376, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": -0.05990681052207947, | |
| "rewards/margins": 0.007779995445162058, | |
| "rewards/rejected": -0.06768681108951569, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 2.937450514647664, | |
| "grad_norm": 26.931453704833984, | |
| "learning_rate": 1.0079369645437411e-06, | |
| "log_odds_chosen": 0.2564031183719635, | |
| "log_odds_ratio": -0.7086187601089478, | |
| "logps/chosen": -1.181774377822876, | |
| "logps/rejected": -1.377415418624878, | |
| "loss": 14.6404, | |
| "nll_loss": 1.4596824645996094, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.05908871814608574, | |
| "rewards/margins": 0.009782059118151665, | |
| "rewards/rejected": -0.06887076795101166, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.0126682501979416, | |
| "grad_norm": 32.48230743408203, | |
| "learning_rate": 8.978109650374398e-07, | |
| "log_odds_chosen": 0.19491888582706451, | |
| "log_odds_ratio": -0.7259272933006287, | |
| "logps/chosen": -1.1522880792617798, | |
| "logps/rejected": -1.3237059116363525, | |
| "loss": 14.4248, | |
| "nll_loss": 1.4603744745254517, | |
| "rewards/accuracies": 0.5666666626930237, | |
| "rewards/chosen": -0.05761440098285675, | |
| "rewards/margins": 0.008570893667638302, | |
| "rewards/rejected": -0.06618530303239822, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 3.0760095011876483, | |
| "grad_norm": 18.48870277404785, | |
| "learning_rate": 7.927260449879828e-07, | |
| "log_odds_chosen": 0.38648003339767456, | |
| "log_odds_ratio": -0.6636689901351929, | |
| "logps/chosen": -1.115918517112732, | |
| "logps/rejected": -1.420893907546997, | |
| "loss": 10.7067, | |
| "nll_loss": 1.409071683883667, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.055795930325984955, | |
| "rewards/margins": 0.015248763374984264, | |
| "rewards/rejected": -0.07104469835758209, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.1393507521773554, | |
| "grad_norm": 20.157772064208984, | |
| "learning_rate": 6.930128404315214e-07, | |
| "log_odds_chosen": 0.3515693247318268, | |
| "log_odds_ratio": -0.6867735981941223, | |
| "logps/chosen": -1.1619575023651123, | |
| "logps/rejected": -1.4377037286758423, | |
| "loss": 9.4715, | |
| "nll_loss": 1.4781396389007568, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -0.058097876608371735, | |
| "rewards/margins": 0.01378730870783329, | |
| "rewards/rejected": -0.07188518345355988, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 3.2026920031670625, | |
| "grad_norm": 16.846302032470703, | |
| "learning_rate": 5.989850859999227e-07, | |
| "log_odds_chosen": 0.2286454439163208, | |
| "log_odds_ratio": -0.7409519553184509, | |
| "logps/chosen": -1.2201191186904907, | |
| "logps/rejected": -1.3998501300811768, | |
| "loss": 7.952, | |
| "nll_loss": 1.4928877353668213, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.061005957424640656, | |
| "rewards/margins": 0.008986548520624638, | |
| "rewards/rejected": -0.06999249756336212, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.2660332541567696, | |
| "grad_norm": 15.757951736450195, | |
| "learning_rate": 5.109386277955477e-07, | |
| "log_odds_chosen": 0.3435971736907959, | |
| "log_odds_ratio": -0.6829465627670288, | |
| "logps/chosen": -1.1322981119155884, | |
| "logps/rejected": -1.391939640045166, | |
| "loss": 6.2963, | |
| "nll_loss": 1.4439783096313477, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.05661489814519882, | |
| "rewards/margins": 0.01298207975924015, | |
| "rewards/rejected": -0.06959697604179382, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 3.3293745051464767, | |
| "grad_norm": 9.453774452209473, | |
| "learning_rate": 4.29150492549959e-07, | |
| "log_odds_chosen": 0.31623855233192444, | |
| "log_odds_ratio": -0.674622654914856, | |
| "logps/chosen": -1.0845999717712402, | |
| "logps/rejected": -1.3284567594528198, | |
| "loss": 4.9207, | |
| "nll_loss": 1.3823580741882324, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.05422999709844589, | |
| "rewards/margins": 0.012192841619253159, | |
| "rewards/rejected": -0.06642283499240875, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.392715756136184, | |
| "grad_norm": 13.926580429077148, | |
| "learning_rate": 3.538780159953348e-07, | |
| "log_odds_chosen": 0.33605459332466125, | |
| "log_odds_ratio": -0.6477686762809753, | |
| "logps/chosen": -1.1137669086456299, | |
| "logps/rejected": -1.3602466583251953, | |
| "loss": 4.1082, | |
| "nll_loss": 1.437723994255066, | |
| "rewards/accuracies": 0.6156250238418579, | |
| "rewards/chosen": -0.055688343942165375, | |
| "rewards/margins": 0.01232399232685566, | |
| "rewards/rejected": -0.06801234185695648, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 3.456057007125891, | |
| "grad_norm": 6.114862442016602, | |
| "learning_rate": 2.8535803319105047e-07, | |
| "log_odds_chosen": 0.28404486179351807, | |
| "log_odds_ratio": -0.7073220610618591, | |
| "logps/chosen": -1.134637475013733, | |
| "logps/rejected": -1.340289831161499, | |
| "loss": 3.3425, | |
| "nll_loss": 1.4215402603149414, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.05673186853528023, | |
| "rewards/margins": 0.010282614268362522, | |
| "rewards/rejected": -0.06701448559761047, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.519398258115598, | |
| "grad_norm": 7.700738430023193, | |
| "learning_rate": 2.2380613335296037e-07, | |
| "log_odds_chosen": 0.40184488892555237, | |
| "log_odds_ratio": -0.629717230796814, | |
| "logps/chosen": -1.0624561309814453, | |
| "logps/rejected": -1.356720209121704, | |
| "loss": 2.6924, | |
| "nll_loss": 1.4157472848892212, | |
| "rewards/accuracies": 0.6468750238418579, | |
| "rewards/chosen": -0.05312279984354973, | |
| "rewards/margins": 0.014713202603161335, | |
| "rewards/rejected": -0.06783600151538849, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 3.5827395091053047, | |
| "grad_norm": 6.366260528564453, | |
| "learning_rate": 1.6941598152996453e-07, | |
| "log_odds_chosen": 0.31379514932632446, | |
| "log_odds_ratio": -0.6721755266189575, | |
| "logps/chosen": -1.153221845626831, | |
| "logps/rejected": -1.3958936929702759, | |
| "loss": 2.3316, | |
| "nll_loss": 1.4696441888809204, | |
| "rewards/accuracies": 0.5843750238418579, | |
| "rewards/chosen": -0.05766110494732857, | |
| "rewards/margins": 0.012133581563830376, | |
| "rewards/rejected": -0.0697946771979332, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.646080760095012, | |
| "grad_norm": 8.542328834533691, | |
| "learning_rate": 1.223587092621162e-07, | |
| "log_odds_chosen": 0.4368967115879059, | |
| "log_odds_ratio": -0.6306554079055786, | |
| "logps/chosen": -1.1088006496429443, | |
| "logps/rejected": -1.4423551559448242, | |
| "loss": 1.9015, | |
| "nll_loss": 1.4303786754608154, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.055440038442611694, | |
| "rewards/margins": 0.016677727922797203, | |
| "rewards/rejected": -0.07211776077747345, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 3.709422011084719, | |
| "grad_norm": 5.483981609344482, | |
| "learning_rate": 8.278237613748408e-08, | |
| "log_odds_chosen": 0.25653380155563354, | |
| "log_odds_ratio": -0.6835523843765259, | |
| "logps/chosen": -1.115917444229126, | |
| "logps/rejected": -1.3009469509124756, | |
| "loss": 1.7019, | |
| "nll_loss": 1.435164451599121, | |
| "rewards/accuracies": 0.5843750238418579, | |
| "rewards/chosen": -0.05579587072134018, | |
| "rewards/margins": 0.009251468814909458, | |
| "rewards/rejected": -0.06504733860492706, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.772763262074426, | |
| "grad_norm": 5.989284992218018, | |
| "learning_rate": 5.0811503941911314e-08, | |
| "log_odds_chosen": 0.36020272970199585, | |
| "log_odds_ratio": -0.6700825691223145, | |
| "logps/chosen": -1.1353535652160645, | |
| "logps/rejected": -1.425724744796753, | |
| "loss": 1.4849, | |
| "nll_loss": 1.438443899154663, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.05676767975091934, | |
| "rewards/margins": 0.014518563635647297, | |
| "rewards/rejected": -0.07128624618053436, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 3.836104513064133, | |
| "grad_norm": 5.665014743804932, | |
| "learning_rate": 2.6546684867408412e-08, | |
| "log_odds_chosen": 0.2839321196079254, | |
| "log_odds_ratio": -0.693397045135498, | |
| "logps/chosen": -1.183774471282959, | |
| "logps/rejected": -1.4149149656295776, | |
| "loss": 1.3752, | |
| "nll_loss": 1.4539821147918701, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.05918872356414795, | |
| "rewards/margins": 0.011557026766240597, | |
| "rewards/rejected": -0.07074575871229172, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.89944576405384, | |
| "grad_norm": 6.5417280197143555, | |
| "learning_rate": 1.006426501190233e-08, | |
| "log_odds_chosen": 0.2721025347709656, | |
| "log_odds_ratio": -0.6981784105300903, | |
| "logps/chosen": -1.153509259223938, | |
| "logps/rejected": -1.3679428100585938, | |
| "loss": 1.2736, | |
| "nll_loss": 1.4551602602005005, | |
| "rewards/accuracies": 0.5843750238418579, | |
| "rewards/chosen": -0.05767546221613884, | |
| "rewards/margins": 0.010721677914261818, | |
| "rewards/rejected": -0.06839713454246521, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 3.9627870150435474, | |
| "grad_norm": 6.849119186401367, | |
| "learning_rate": 1.4161041661667208e-09, | |
| "log_odds_chosen": 0.20419207215309143, | |
| "log_odds_ratio": -0.7056440114974976, | |
| "logps/chosen": -1.1733808517456055, | |
| "logps/rejected": -1.3275495767593384, | |
| "loss": 1.2192, | |
| "nll_loss": 1.4886562824249268, | |
| "rewards/accuracies": 0.5718749761581421, | |
| "rewards/chosen": -0.058669041842222214, | |
| "rewards/margins": 0.00770843680948019, | |
| "rewards/rejected": -0.06637748330831528, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.9881235154394297, | |
| "step": 312, | |
| "total_flos": 0.0, | |
| "train_loss": 41.90841192007065, | |
| "train_runtime": 3951.2607, | |
| "train_samples_per_second": 5.112, | |
| "train_steps_per_second": 0.079 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 312, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |