{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9765925925925925, "eval_steps": 500, "global_step": 315, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.047407407407407405, "grad_norm": 1915.3714599609375, "learning_rate": 7.8125e-06, "log_odds_chosen": 1.65981125831604, "log_odds_ratio": -11.16843032836914, "logps/chosen": -22.020946502685547, "logps/rejected": -23.68042755126953, "loss": 320.1571, "nll_loss": 8.666691780090332, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -11.010473251342773, "rewards/margins": 0.8297405242919922, "rewards/rejected": -11.840213775634766, "step": 5 }, { "epoch": 0.09481481481481481, "grad_norm": 2617.00927734375, "learning_rate": 1.5625e-05, "log_odds_chosen": 1.09341299533844, "log_odds_ratio": -8.355111122131348, "logps/chosen": -19.82636833190918, "logps/rejected": -20.919193267822266, "loss": 223.029, "nll_loss": 7.8865966796875, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -9.91318416595459, "rewards/margins": 0.5464121699333191, "rewards/rejected": -10.459596633911133, "step": 10 }, { "epoch": 0.14222222222222222, "grad_norm": 1221.8326416015625, "learning_rate": 2.34375e-05, "log_odds_chosen": 4.4873456954956055, "log_odds_ratio": -6.951984405517578, "logps/chosen": -18.489765167236328, "logps/rejected": -22.975828170776367, "loss": 226.6759, "nll_loss": 8.182887077331543, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -9.244882583618164, "rewards/margins": 2.2430315017700195, "rewards/rejected": -11.487914085388184, "step": 15 }, { "epoch": 0.18962962962962962, "grad_norm": 2359.64111328125, "learning_rate": 3.125e-05, "log_odds_chosen": 0.6630983352661133, "log_odds_ratio": -8.002729415893555, "logps/chosen": -18.08315086364746, "logps/rejected": -18.74709129333496, "loss": 342.1809, "nll_loss": 8.18604564666748, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -9.04157543182373, "rewards/margins": 0.3319700062274933, "rewards/rejected": -9.37354564666748, "step": 20 }, { "epoch": 0.23703703703703705, "grad_norm": 2270.639892578125, "learning_rate": 3.90625e-05, "log_odds_chosen": 4.7319722175598145, "log_odds_ratio": -6.239144802093506, "logps/chosen": -15.107877731323242, "logps/rejected": -19.83966636657715, "loss": 59.8623, "nll_loss": 6.319466590881348, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -7.553938865661621, "rewards/margins": 2.3658950328826904, "rewards/rejected": -9.919833183288574, "step": 25 }, { "epoch": 0.28444444444444444, "grad_norm": 42054.76953125, "learning_rate": 4.6875e-05, "log_odds_chosen": 3.2522056102752686, "log_odds_ratio": -5.358423709869385, "logps/chosen": -19.751956939697266, "logps/rejected": -23.000202178955078, "loss": 210.8639, "nll_loss": 11.341104507446289, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -9.875978469848633, "rewards/margins": 1.6241226196289062, "rewards/rejected": -11.500101089477539, "step": 30 }, { "epoch": 0.33185185185185184, "grad_norm": 1386.9453125, "learning_rate": 4.998613757348784e-05, "log_odds_chosen": 2.529534339904785, "log_odds_ratio": -4.246035575866699, "logps/chosen": -15.55242919921875, "logps/rejected": -18.077491760253906, "loss": 259.2666, "nll_loss": 11.016453742980957, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -7.776214599609375, "rewards/margins": 1.2625317573547363, "rewards/rejected": -9.038745880126953, "step": 35 }, { "epoch": 0.37925925925925924, "grad_norm": 1015.165771484375, "learning_rate": 4.990147841143462e-05, "log_odds_chosen": 0.2810021936893463, "log_odds_ratio": -0.8303823471069336, "logps/chosen": -2.1894371509552, "logps/rejected": -2.4453959465026855, "loss": 75.6876, "nll_loss": 2.393183469772339, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -1.0947185754776, "rewards/margins": 0.12797939777374268, "rewards/rejected": -1.2226979732513428, "step": 40 }, { "epoch": 0.4266666666666667, "grad_norm": 1208.3837890625, "learning_rate": 4.97401218720448e-05, "log_odds_chosen": 0.16454455256462097, "log_odds_ratio": -1.0962440967559814, "logps/chosen": -2.4815239906311035, "logps/rejected": -2.6206681728363037, "loss": 61.1886, "nll_loss": 2.248060464859009, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -1.2407619953155518, "rewards/margins": 0.0695720762014389, "rewards/rejected": -1.3103340864181519, "step": 45 }, { "epoch": 0.4740740740740741, "grad_norm": 4378.82275390625, "learning_rate": 4.9502564938797946e-05, "log_odds_chosen": 0.6479941010475159, "log_odds_ratio": -1.2961242198944092, "logps/chosen": -3.2210915088653564, "logps/rejected": -3.855274200439453, "loss": 7.3654, "nll_loss": 2.6344618797302246, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -1.6105457544326782, "rewards/margins": 0.3170911371707916, "rewards/rejected": -1.9276371002197266, "step": 50 }, { "epoch": 0.5214814814814814, "grad_norm": 1226.081787109375, "learning_rate": 4.918953929490768e-05, "log_odds_chosen": 1.278847098350525, "log_odds_ratio": -2.351318836212158, "logps/chosen": -5.5019121170043945, "logps/rejected": -6.7642693519592285, "loss": 69.5193, "nll_loss": 3.3521831035614014, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -2.7509560585021973, "rewards/margins": 0.6311787366867065, "rewards/rejected": -3.3821346759796143, "step": 55 }, { "epoch": 0.5688888888888889, "grad_norm": 3815.735595703125, "learning_rate": 4.88020090697132e-05, "log_odds_chosen": 0.8591831922531128, "log_odds_ratio": -1.0965297222137451, "logps/chosen": -2.902388334274292, "logps/rejected": -3.7349720001220703, "loss": 24.2741, "nll_loss": 2.398529529571533, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -1.451194167137146, "rewards/margins": 0.41629156470298767, "rewards/rejected": -1.8674860000610352, "step": 60 }, { "epoch": 0.6162962962962963, "grad_norm": 1931.8660888671875, "learning_rate": 4.834116786912897e-05, "log_odds_chosen": 0.31718695163726807, "log_odds_ratio": -1.2613378763198853, "logps/chosen": -2.781818389892578, "logps/rejected": -3.0610077381134033, "loss": 81.3175, "nll_loss": 2.3013808727264404, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -1.390909194946289, "rewards/margins": 0.13959458470344543, "rewards/rejected": -1.5305038690567017, "step": 65 }, { "epoch": 0.6637037037037037, "grad_norm": 787.8285522460938, "learning_rate": 4.7808435099299045e-05, "log_odds_chosen": 0.5639177560806274, "log_odds_ratio": -0.8356220126152039, "logps/chosen": -2.251502752304077, "logps/rejected": -2.7904326915740967, "loss": 47.3395, "nll_loss": 2.168375015258789, "rewards/accuracies": 0.578125, "rewards/chosen": -1.1257513761520386, "rewards/margins": 0.2694648802280426, "rewards/rejected": -1.3952163457870483, "step": 70 }, { "epoch": 0.7111111111111111, "grad_norm": 2055.1884765625, "learning_rate": 4.720545159477922e-05, "log_odds_chosen": 1.0343811511993408, "log_odds_ratio": -1.943116545677185, "logps/chosen": -4.0394768714904785, "logps/rejected": -5.053744792938232, "loss": 15.8044, "nll_loss": 2.568474292755127, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.0197384357452393, "rewards/margins": 0.5071338415145874, "rewards/rejected": -2.526872396469116, "step": 75 }, { "epoch": 0.7585185185185185, "grad_norm": 3419.63525390625, "learning_rate": 4.653407456471222e-05, "log_odds_chosen": 1.538326621055603, "log_odds_ratio": -2.196194648742676, "logps/chosen": -5.185378074645996, "logps/rejected": -6.7065582275390625, "loss": 39.0928, "nll_loss": 2.9203972816467285, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -2.592689037322998, "rewards/margins": 0.7605901956558228, "rewards/rejected": -3.3532791137695312, "step": 80 }, { "epoch": 0.8059259259259259, "grad_norm": 3812.28271484375, "learning_rate": 4.579637187256222e-05, "log_odds_chosen": 2.206240177154541, "log_odds_ratio": -2.366931438446045, "logps/chosen": -6.021973609924316, "logps/rejected": -8.210701942443848, "loss": 57.2016, "nll_loss": 2.832935333251953, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -3.010986804962158, "rewards/margins": 1.0943641662597656, "rewards/rejected": -4.105350971221924, "step": 85 }, { "epoch": 0.8533333333333334, "grad_norm": 2624.8759765625, "learning_rate": 4.499461566702685e-05, "log_odds_chosen": 0.8822873830795288, "log_odds_ratio": -2.509356737136841, "logps/chosen": -4.698742389678955, "logps/rejected": -5.556033134460449, "loss": 103.7346, "nll_loss": 2.442578077316284, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.3493711948394775, "rewards/margins": 0.42864537239074707, "rewards/rejected": -2.7780165672302246, "step": 90 }, { "epoch": 0.9007407407407407, "grad_norm": 12379.837890625, "learning_rate": 4.413127538374411e-05, "log_odds_chosen": 0.29022759199142456, "log_odds_ratio": -1.0547641515731812, "logps/chosen": -2.508532762527466, "logps/rejected": -2.7733073234558105, "loss": 61.9094, "nll_loss": 2.3397486209869385, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -1.254266381263733, "rewards/margins": 0.1323871910572052, "rewards/rejected": -1.3866536617279053, "step": 95 }, { "epoch": 0.9481481481481482, "grad_norm": 2379.628173828125, "learning_rate": 4.320901013934887e-05, "log_odds_chosen": 0.09418745338916779, "log_odds_ratio": -1.3399364948272705, "logps/chosen": -2.7042553424835205, "logps/rejected": -2.785879373550415, "loss": 86.4669, "nll_loss": 2.2360591888427734, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -1.3521276712417603, "rewards/margins": 0.040812067687511444, "rewards/rejected": -1.3929396867752075, "step": 100 }, { "epoch": 0.9955555555555555, "grad_norm": 1130.39892578125, "learning_rate": 4.223066054130568e-05, "log_odds_chosen": 0.4360111653804779, "log_odds_ratio": -0.8215211629867554, "logps/chosen": -2.013885498046875, "logps/rejected": -2.4152350425720215, "loss": 57.9738, "nll_loss": 2.038440704345703, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -1.0069427490234375, "rewards/margins": 0.2006748616695404, "rewards/rejected": -1.2076175212860107, "step": 105 }, { "epoch": 1.037925925925926, "grad_norm": 3500.71533203125, "learning_rate": 4.1199239938743797e-05, "log_odds_chosen": 0.5743904709815979, "log_odds_ratio": -0.8129003047943115, "logps/chosen": -1.8548295497894287, "logps/rejected": -2.392976760864258, "loss": 30.7734, "nll_loss": 1.9196120500564575, "rewards/accuracies": 0.5769230723381042, "rewards/chosen": -0.9274147748947144, "rewards/margins": 0.2690735161304474, "rewards/rejected": -1.196488380432129, "step": 110 }, { "epoch": 1.0853333333333333, "grad_norm": 7732.44091796875, "learning_rate": 4.0117925141242174e-05, "log_odds_chosen": 0.8173832893371582, "log_odds_ratio": -1.355067253112793, "logps/chosen": -3.0429067611694336, "logps/rejected": -3.800440549850464, "loss": 32.8992, "nll_loss": 2.336174726486206, "rewards/accuracies": 0.653124988079071, "rewards/chosen": -1.5214533805847168, "rewards/margins": 0.3787666857242584, "rewards/rejected": -1.900220274925232, "step": 115 }, { "epoch": 1.1327407407407408, "grad_norm": 4230.470703125, "learning_rate": 3.899004663415084e-05, "log_odds_chosen": 1.6969165802001953, "log_odds_ratio": -1.324521541595459, "logps/chosen": -3.4614341259002686, "logps/rejected": -5.1248674392700195, "loss": -9.7343, "nll_loss": 2.7742135524749756, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -1.7307170629501343, "rewards/margins": 0.8317165374755859, "rewards/rejected": -2.5624337196350098, "step": 120 }, { "epoch": 1.1801481481481482, "grad_norm": 3288.0673828125, "learning_rate": 3.781907832058587e-05, "log_odds_chosen": 1.6559861898422241, "log_odds_ratio": -1.8177845478057861, "logps/chosen": -4.351998329162598, "logps/rejected": -5.973184108734131, "loss": 18.5195, "nll_loss": 2.8550593852996826, "rewards/accuracies": 0.6468750238418579, "rewards/chosen": -2.175999164581299, "rewards/margins": 0.8105929493904114, "rewards/rejected": -2.9865920543670654, "step": 125 }, { "epoch": 1.2275555555555555, "grad_norm": 30743.359375, "learning_rate": 3.660862682169282e-05, "log_odds_chosen": 0.7961785793304443, "log_odds_ratio": -2.717101573944092, "logps/chosen": -9.171316146850586, "logps/rejected": -9.938522338867188, "loss": 169.7847, "nll_loss": 5.80072546005249, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -4.585658073425293, "rewards/margins": 0.38360315561294556, "rewards/rejected": -4.969261169433594, "step": 130 }, { "epoch": 1.274962962962963, "grad_norm": 33299.71484375, "learning_rate": 3.5362420368134356e-05, "log_odds_chosen": 2.2118756771087646, "log_odds_ratio": -4.363597869873047, "logps/chosen": -21.064800262451172, "logps/rejected": -23.25614356994629, "loss": 276.0252, "nll_loss": 12.389029502868652, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -10.532400131225586, "rewards/margins": 1.0956722497940063, "rewards/rejected": -11.628071784973145, "step": 135 }, { "epoch": 1.3223703703703704, "grad_norm": 9129.3916015625, "learning_rate": 3.408429731701635e-05, "log_odds_chosen": -0.8955130577087402, "log_odds_ratio": -6.262269020080566, "logps/chosen": -13.717962265014648, "logps/rejected": -12.80627155303955, "loss": 237.298, "nll_loss": 6.492087364196777, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -6.858981132507324, "rewards/margins": -0.4558447003364563, "rewards/rejected": -6.403135776519775, "step": 140 }, { "epoch": 1.3697777777777778, "grad_norm": 4406.74853515625, "learning_rate": 3.2778194329621104e-05, "log_odds_chosen": 2.508333206176758, "log_odds_ratio": -1.729018211364746, "logps/chosen": -4.257506847381592, "logps/rejected": -6.73601770401001, "loss": -21.3999, "nll_loss": 2.9763360023498535, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -2.128753423690796, "rewards/margins": 1.2392549514770508, "rewards/rejected": -3.368008852005005, "step": 145 }, { "epoch": 1.417185185185185, "grad_norm": 3294.3505859375, "learning_rate": 3.144813424636031e-05, "log_odds_chosen": 0.41120272874832153, "log_odds_ratio": -2.209031581878662, "logps/chosen": -5.029679298400879, "logps/rejected": -5.439136981964111, "loss": 73.5667, "nll_loss": 3.1203856468200684, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -2.5148396492004395, "rewards/margins": 0.20472900569438934, "rewards/rejected": -2.7195684909820557, "step": 150 }, { "epoch": 1.4645925925925927, "grad_norm": 2727.369140625, "learning_rate": 3.0098213696293542e-05, "log_odds_chosen": 1.2927907705307007, "log_odds_ratio": -1.953330636024475, "logps/chosen": -5.769057273864746, "logps/rejected": -7.043553829193115, "loss": 66.51, "nll_loss": 3.7798728942871094, "rewards/accuracies": 0.59375, "rewards/chosen": -2.884528636932373, "rewards/margins": 0.6372483968734741, "rewards/rejected": -3.5217769145965576, "step": 155 }, { "epoch": 1.512, "grad_norm": 2540.416748046875, "learning_rate": 2.8732590479375165e-05, "log_odds_chosen": 1.328902006149292, "log_odds_ratio": -1.8827491998672485, "logps/chosen": -4.703896999359131, "logps/rejected": -6.023054599761963, "loss": 35.3034, "nll_loss": 3.127842664718628, "rewards/accuracies": 0.578125, "rewards/chosen": -2.3519484996795654, "rewards/margins": 0.6595786809921265, "rewards/rejected": -3.0115272998809814, "step": 160 }, { "epoch": 1.5594074074074074, "grad_norm": 2457.10205078125, "learning_rate": 2.7355470760292956e-05, "log_odds_chosen": 0.4289638102054596, "log_odds_ratio": -1.7100918292999268, "logps/chosen": -3.7828564643859863, "logps/rejected": -4.20039701461792, "loss": 72.3472, "nll_loss": 2.577268123626709, "rewards/accuracies": 0.546875, "rewards/chosen": -1.8914282321929932, "rewards/margins": 0.20877020061016083, "rewards/rejected": -2.10019850730896, "step": 165 }, { "epoch": 1.6068148148148147, "grad_norm": 2422.359130859375, "learning_rate": 2.597109611334169e-05, "log_odds_chosen": 0.3876183331012726, "log_odds_ratio": -1.1971830129623413, "logps/chosen": -2.719515800476074, "logps/rejected": -3.0870518684387207, "loss": 49.8125, "nll_loss": 2.2820823192596436, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -1.359757900238037, "rewards/margins": 0.1837681084871292, "rewards/rejected": -1.5435259342193604, "step": 170 }, { "epoch": 1.6542222222222223, "grad_norm": 601.5615234375, "learning_rate": 2.458373045823404e-05, "log_odds_chosen": 0.5851010680198669, "log_odds_ratio": -0.9167743921279907, "logps/chosen": -2.2165403366088867, "logps/rejected": -2.7729554176330566, "loss": 50.6215, "nll_loss": 2.1689133644104004, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -1.1082701683044434, "rewards/margins": 0.2782076299190521, "rewards/rejected": -1.3864777088165283, "step": 175 }, { "epoch": 1.7016296296296296, "grad_norm": 463.87890625, "learning_rate": 2.3197646927086697e-05, "log_odds_chosen": 0.9187017679214478, "log_odds_ratio": -1.155874490737915, "logps/chosen": -2.5220038890838623, "logps/rejected": -3.416661024093628, "loss": 9.5969, "nll_loss": 2.0762057304382324, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -1.2610019445419312, "rewards/margins": 0.4473283886909485, "rewards/rejected": -1.708330512046814, "step": 180 }, { "epoch": 1.749037037037037, "grad_norm": 3273.33935546875, "learning_rate": 2.1817114703032176e-05, "log_odds_chosen": 1.7500969171524048, "log_odds_ratio": -1.275059461593628, "logps/chosen": -3.396794557571411, "logps/rejected": -5.1389851570129395, "loss": -60.3306, "nll_loss": 2.238281726837158, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -1.6983972787857056, "rewards/margins": 0.8710952997207642, "rewards/rejected": -2.5694925785064697, "step": 185 }, { "epoch": 1.7964444444444445, "grad_norm": 1209.9384765625, "learning_rate": 2.0446385870993467e-05, "log_odds_chosen": 0.5481420755386353, "log_odds_ratio": -1.067756175994873, "logps/chosen": -2.3263049125671387, "logps/rejected": -2.851926803588867, "loss": 32.6693, "nll_loss": 2.037564754486084, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -1.1631524562835693, "rewards/margins": 0.26281076669692993, "rewards/rejected": -1.4259634017944336, "step": 190 }, { "epoch": 1.8438518518518519, "grad_norm": 2658.087646484375, "learning_rate": 1.9089682321121834e-05, "log_odds_chosen": 1.4211509227752686, "log_odds_ratio": -0.9490826725959778, "logps/chosen": -2.6055521965026855, "logps/rejected": -3.9891953468322754, "loss": -18.5069, "nll_loss": 2.2056031227111816, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -1.3027760982513428, "rewards/margins": 0.6918215155601501, "rewards/rejected": -1.9945976734161377, "step": 195 }, { "epoch": 1.8912592592592592, "grad_norm": 526.1814575195312, "learning_rate": 1.775118274523545e-05, "log_odds_chosen": 0.7827054858207703, "log_odds_ratio": -2.516727924346924, "logps/chosen": -4.788647651672363, "logps/rejected": -5.54547643661499, "loss": 63.2911, "nll_loss": 2.443591356277466, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -2.3943238258361816, "rewards/margins": 0.37841445207595825, "rewards/rejected": -2.772738218307495, "step": 200 }, { "epoch": 1.9386666666666668, "grad_norm": 1198.92138671875, "learning_rate": 1.643500976631037e-05, "log_odds_chosen": 0.6334174275398254, "log_odds_ratio": -1.686605453491211, "logps/chosen": -3.5008976459503174, "logps/rejected": -4.093779563903809, "loss": 57.0721, "nll_loss": 2.26170015335083, "rewards/accuracies": 0.565625011920929, "rewards/chosen": -1.7504488229751587, "rewards/margins": 0.29644104838371277, "rewards/rejected": -2.0468897819519043, "step": 205 }, { "epoch": 1.986074074074074, "grad_norm": 360.5643310546875, "learning_rate": 1.514521724066537e-05, "log_odds_chosen": 0.5420491099357605, "log_odds_ratio": -1.02804696559906, "logps/chosen": -2.0554840564727783, "logps/rejected": -2.5570180416107178, "loss": 26.4923, "nll_loss": 1.8558366298675537, "rewards/accuracies": 0.578125, "rewards/chosen": -1.0277420282363892, "rewards/margins": 0.25076690316200256, "rewards/rejected": -1.2785090208053589, "step": 210 }, { "epoch": 2.0284444444444443, "grad_norm": 10238.283203125, "learning_rate": 1.3885777771950348e-05, "log_odds_chosen": 0.5111017823219299, "log_odds_ratio": -0.8272331953048706, "logps/chosen": -1.7190930843353271, "logps/rejected": -2.203624963760376, "loss": 29.5791, "nll_loss": 1.8271044492721558, "rewards/accuracies": 0.5524475574493408, "rewards/chosen": -0.8595465421676636, "rewards/margins": 0.24226588010787964, "rewards/rejected": -1.101812481880188, "step": 215 }, { "epoch": 2.075851851851852, "grad_norm": 2326.157958984375, "learning_rate": 1.2660570475395683e-05, "log_odds_chosen": 0.5695599913597107, "log_odds_ratio": -0.9038535952568054, "logps/chosen": -1.8567044734954834, "logps/rejected": -2.3801803588867188, "loss": 42.0315, "nll_loss": 1.7851667404174805, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.9283522367477417, "rewards/margins": 0.26173779368400574, "rewards/rejected": -1.1900901794433594, "step": 220 }, { "epoch": 2.1232592592592594, "grad_norm": 1981.0318603515625, "learning_rate": 1.1473369030008974e-05, "log_odds_chosen": 0.4965842664241791, "log_odds_ratio": -0.9662116169929504, "logps/chosen": -1.9954092502593994, "logps/rejected": -2.455885648727417, "loss": 24.2707, "nll_loss": 1.8060328960418701, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.9977046251296997, "rewards/margins": 0.2302381992340088, "rewards/rejected": -1.2279428243637085, "step": 225 }, { "epoch": 2.1706666666666665, "grad_norm": 2371.164794921875, "learning_rate": 1.0327830055518842e-05, "log_odds_chosen": 0.8104255795478821, "log_odds_ratio": -0.9122349619865417, "logps/chosen": -1.928625464439392, "logps/rejected": -2.700075626373291, "loss": 0.4953, "nll_loss": 1.8198583126068115, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.964312732219696, "rewards/margins": 0.3857249617576599, "rewards/rejected": -1.3500378131866455, "step": 230 }, { "epoch": 2.218074074074074, "grad_norm": 510.7680969238281, "learning_rate": 9.227481849865235e-06, "log_odds_chosen": 0.9049872159957886, "log_odds_ratio": -0.8861944079399109, "logps/chosen": -2.1324591636657715, "logps/rejected": -2.9944934844970703, "loss": 4.731, "nll_loss": 1.871519684791565, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -1.0662295818328857, "rewards/margins": 0.4310172200202942, "rewards/rejected": -1.4972467422485352, "step": 235 }, { "epoch": 2.2654814814814817, "grad_norm": 2696.190185546875, "learning_rate": 8.175713521924978e-06, "log_odds_chosen": 0.4651837944984436, "log_odds_ratio": -1.2659015655517578, "logps/chosen": -2.480325937271118, "logps/rejected": -2.9123668670654297, "loss": 49.3961, "nll_loss": 1.8576265573501587, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -1.240162968635559, "rewards/margins": 0.21602031588554382, "rewards/rejected": -1.4561834335327148, "step": 240 }, { "epoch": 2.3128888888888888, "grad_norm": 1326.971435546875, "learning_rate": 7.1757645529443665e-06, "log_odds_chosen": 1.1274998188018799, "log_odds_ratio": -0.9266605377197266, "logps/chosen": -2.0514588356018066, "logps/rejected": -3.127443790435791, "loss": -21.5027, "nll_loss": 1.8539222478866577, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.0257294178009033, "rewards/margins": 0.5379923582077026, "rewards/rejected": -1.5637218952178955, "step": 245 }, { "epoch": 2.3602962962962963, "grad_norm": 4184.349609375, "learning_rate": 6.230714818829733e-06, "log_odds_chosen": 0.7846413850784302, "log_odds_ratio": -0.9509506225585938, "logps/chosen": -2.0468831062316895, "logps/rejected": -2.7701876163482666, "loss": 34.0259, "nll_loss": 1.8205235004425049, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -1.0234415531158447, "rewards/margins": 0.36165231466293335, "rewards/rejected": -1.3850938081741333, "step": 250 }, { "epoch": 2.407703703703704, "grad_norm": 72418.359375, "learning_rate": 5.343475104027743e-06, "log_odds_chosen": 0.7129810452461243, "log_odds_ratio": -1.4154326915740967, "logps/chosen": -2.65124249458313, "logps/rejected": -3.2982776165008545, "loss": 19.2975, "nll_loss": 1.873997688293457, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -1.325621247291565, "rewards/margins": 0.32351773977279663, "rewards/rejected": -1.6491388082504272, "step": 255 }, { "epoch": 2.455111111111111, "grad_norm": 6894.69677734375, "learning_rate": 4.516778136213037e-06, "log_odds_chosen": 1.0349304676055908, "log_odds_ratio": -1.0125768184661865, "logps/chosen": -2.1709659099578857, "logps/rejected": -3.1571593284606934, "loss": 5.3407, "nll_loss": 1.8703029155731201, "rewards/accuracies": 0.609375, "rewards/chosen": -1.0854829549789429, "rewards/margins": 0.4930966794490814, "rewards/rejected": -1.5785796642303467, "step": 260 }, { "epoch": 2.5025185185185186, "grad_norm": 864.9271850585938, "learning_rate": 3.7531701693965554e-06, "log_odds_chosen": 0.46679940819740295, "log_odds_ratio": -1.316929817199707, "logps/chosen": -2.535597562789917, "logps/rejected": -2.952941656112671, "loss": 71.3618, "nll_loss": 1.8557851314544678, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -1.2677987813949585, "rewards/margins": 0.20867201685905457, "rewards/rejected": -1.4764708280563354, "step": 265 }, { "epoch": 2.549925925925926, "grad_norm": 3560.4990234375, "learning_rate": 3.055003141378948e-06, "log_odds_chosen": 1.4798504114151, "log_odds_ratio": -1.3126966953277588, "logps/chosen": -2.62119722366333, "logps/rejected": -4.038577079772949, "loss": -37.0258, "nll_loss": 1.893930435180664, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -1.310598611831665, "rewards/margins": 0.7086899876594543, "rewards/rejected": -2.0192885398864746, "step": 270 }, { "epoch": 2.5973333333333333, "grad_norm": 1490.2098388671875, "learning_rate": 2.424427429704365e-06, "log_odds_chosen": 0.9033193588256836, "log_odds_ratio": -1.1426560878753662, "logps/chosen": -2.5527491569519043, "logps/rejected": -3.397388458251953, "loss": 29.2458, "nll_loss": 1.9291893243789673, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -1.2763745784759521, "rewards/margins": 0.42231959104537964, "rewards/rejected": -1.6986942291259766, "step": 275 }, { "epoch": 2.644740740740741, "grad_norm": 10416.1259765625, "learning_rate": 1.8633852284264508e-06, "log_odds_chosen": 0.7765440940856934, "log_odds_ratio": -1.2765750885009766, "logps/chosen": -2.5416433811187744, "logps/rejected": -3.276179552078247, "loss": 16.2592, "nll_loss": 1.885671615600586, "rewards/accuracies": 0.578125, "rewards/chosen": -1.2708216905593872, "rewards/margins": 0.3672682046890259, "rewards/rejected": -1.6380897760391235, "step": 280 }, { "epoch": 2.6921481481481484, "grad_norm": 987.6349487304688, "learning_rate": 1.3736045660864034e-06, "log_odds_chosen": 1.4070631265640259, "log_odds_ratio": -0.9240388870239258, "logps/chosen": -2.1706430912017822, "logps/rejected": -3.544604778289795, "loss": -18.614, "nll_loss": 1.9531824588775635, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -1.0853215456008911, "rewards/margins": 0.6869809031486511, "rewards/rejected": -1.7723023891448975, "step": 285 }, { "epoch": 2.7395555555555555, "grad_norm": 1785.149658203125, "learning_rate": 9.565939833279192e-07, "log_odds_chosen": 1.1818147897720337, "log_odds_ratio": -1.5213770866394043, "logps/chosen": -2.8310532569885254, "logps/rejected": -3.944000244140625, "loss": -20.8544, "nll_loss": 1.9278194904327393, "rewards/accuracies": 0.621874988079071, "rewards/chosen": -1.4155266284942627, "rewards/margins": 0.5564736127853394, "rewards/rejected": -1.9720001220703125, "step": 290 }, { "epoch": 2.786962962962963, "grad_norm": 3843.544677734375, "learning_rate": 6.136378865420872e-07, "log_odds_chosen": 0.7062476277351379, "log_odds_ratio": -1.199745535850525, "logps/chosen": -2.419466495513916, "logps/rejected": -3.0821032524108887, "loss": 9.6821, "nll_loss": 1.8187296390533447, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -1.209733247756958, "rewards/margins": 0.3313182294368744, "rewards/rejected": -1.5410516262054443, "step": 295 }, { "epoch": 2.83437037037037, "grad_norm": 1985.0562744140625, "learning_rate": 3.45792591853214e-07, "log_odds_chosen": 1.1894285678863525, "log_odds_ratio": -1.35343337059021, "logps/chosen": -2.6638786792755127, "logps/rejected": -3.7862019538879395, "loss": 5.4621, "nll_loss": 1.938586950302124, "rewards/accuracies": 0.625, "rewards/chosen": -1.3319393396377563, "rewards/margins": 0.5611615777015686, "rewards/rejected": -1.8931009769439697, "step": 300 }, { "epoch": 2.8817777777777778, "grad_norm": 1326.342529296875, "learning_rate": 1.538830716302092e-07, "log_odds_chosen": 2.1142306327819824, "log_odds_ratio": -0.9648601412773132, "logps/chosen": -2.3394112586975098, "logps/rejected": -4.382277011871338, "loss": -80.5969, "nll_loss": 1.8355882167816162, "rewards/accuracies": 0.659375011920929, "rewards/chosen": -1.1697056293487549, "rewards/margins": 1.0214331150054932, "rewards/rejected": -2.191138505935669, "step": 305 }, { "epoch": 2.9291851851851853, "grad_norm": 3704.05322265625, "learning_rate": 3.8500413544415025e-08, "log_odds_chosen": 1.72466242313385, "log_odds_ratio": -1.052673578262329, "logps/chosen": -2.2251315116882324, "logps/rejected": -3.870131731033325, "loss": -35.5622, "nll_loss": 1.8819787502288818, "rewards/accuracies": 0.684374988079071, "rewards/chosen": -1.1125657558441162, "rewards/margins": 0.8225000500679016, "rewards/rejected": -1.9350658655166626, "step": 310 }, { "epoch": 2.9765925925925925, "grad_norm": 492.51934814453125, "learning_rate": 0.0, "log_odds_chosen": 1.213081955909729, "log_odds_ratio": -1.0762965679168701, "logps/chosen": -2.213444232940674, "logps/rejected": -3.3574657440185547, "loss": -9.4288, "nll_loss": 1.8365122079849243, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.106722116470337, "rewards/margins": 0.5720106363296509, "rewards/rejected": -1.6787328720092773, "step": 315 }, { "epoch": 2.9765925925925925, "step": 315, "total_flos": 0.0, "train_loss": 59.38279808892144, "train_runtime": 9551.9831, "train_samples_per_second": 2.12, "train_steps_per_second": 0.033 } ], "logging_steps": 5, "max_steps": 315, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }