{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.986666666666667, "eval_steps": 500, "global_step": 315, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.047407407407407405, "grad_norm": 270.0, "learning_rate": 9.375000000000001e-06, "log_odds_chosen": 0.41771596670150757, "log_odds_ratio": -0.7694265246391296, "logits/chosen": -2.967926502227783, "logits/rejected": -2.8778510093688965, "logps/chosen": -1.2910274267196655, "logps/rejected": -1.6328433752059937, "loss": 51.9175, "nll_loss": 1.511154294013977, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06455137580633163, "rewards/margins": 0.017090797424316406, "rewards/rejected": -0.08164217323064804, "step": 5 }, { "epoch": 0.09481481481481481, "grad_norm": 61.75, "learning_rate": 1.8750000000000002e-05, "log_odds_chosen": 0.2715613842010498, "log_odds_ratio": -0.7063366174697876, "logits/chosen": -2.903649091720581, "logits/rejected": -2.737760066986084, "logps/chosen": -1.0549781322479248, "logps/rejected": -1.2600512504577637, "loss": 47.142, "nll_loss": 1.388285517692566, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0527489073574543, "rewards/margins": 0.010253657586872578, "rewards/rejected": -0.0630025640130043, "step": 10 }, { "epoch": 0.14222222222222222, "grad_norm": 71.0, "learning_rate": 2.8125e-05, "log_odds_chosen": 0.26208820939064026, "log_odds_ratio": -0.6782652139663696, "logits/chosen": -2.5858330726623535, "logits/rejected": -2.4748170375823975, "logps/chosen": -0.921225368976593, "logps/rejected": -1.081853985786438, "loss": 46.5682, "nll_loss": 1.4536203145980835, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.04606126993894577, "rewards/margins": 0.008031422272324562, "rewards/rejected": -0.05409269407391548, "step": 15 }, { "epoch": 0.18962962962962962, "grad_norm": 92.0, "learning_rate": 3.7500000000000003e-05, "log_odds_chosen": 0.18918053805828094, "log_odds_ratio": -0.7011532783508301, "logits/chosen": -2.4796886444091797, "logits/rejected": -2.3676044940948486, "logps/chosen": -0.8980884552001953, "logps/rejected": -1.0366885662078857, "loss": 43.8254, "nll_loss": 1.3132244348526, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.044904422014951706, "rewards/margins": 0.00693000853061676, "rewards/rejected": -0.05183442682027817, "step": 20 }, { "epoch": 0.23703703703703705, "grad_norm": 39.75, "learning_rate": 4.6875e-05, "log_odds_chosen": 0.20047792792320251, "log_odds_ratio": -0.704607367515564, "logits/chosen": -2.486077070236206, "logits/rejected": -2.380354881286621, "logps/chosen": -0.9169360995292664, "logps/rejected": -1.0517938137054443, "loss": 41.7393, "nll_loss": 1.3125925064086914, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04584680497646332, "rewards/margins": 0.006742885801941156, "rewards/rejected": -0.052589692175388336, "step": 25 }, { "epoch": 0.28444444444444444, "grad_norm": 48.25, "learning_rate": 5.625e-05, "log_odds_chosen": 0.13125675916671753, "log_odds_ratio": -0.714677631855011, "logits/chosen": -2.498034954071045, "logits/rejected": -2.1313350200653076, "logps/chosen": -0.8735687136650085, "logps/rejected": -0.9592132568359375, "loss": 41.493, "nll_loss": 1.2559503316879272, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.04367843270301819, "rewards/margins": 0.004282232839614153, "rewards/rejected": -0.047960661351680756, "step": 30 }, { "epoch": 0.33185185185185184, "grad_norm": 39.0, "learning_rate": 5.998336508818541e-05, "log_odds_chosen": 0.06586463749408722, "log_odds_ratio": -0.7540255188941956, "logits/chosen": -2.3031373023986816, "logits/rejected": -2.4485721588134766, "logps/chosen": -0.8969869613647461, "logps/rejected": -0.9382694363594055, "loss": 40.5282, "nll_loss": 1.2521088123321533, "rewards/accuracies": 0.53125, "rewards/chosen": -0.04484934359788895, "rewards/margins": 0.0020641214214265347, "rewards/rejected": -0.04691346734762192, "step": 35 }, { "epoch": 0.37925925925925924, "grad_norm": 35.25, "learning_rate": 5.988177409372154e-05, "log_odds_chosen": 0.21956849098205566, "log_odds_ratio": -0.6834356188774109, "logits/chosen": -2.2723240852355957, "logits/rejected": -2.065520763397217, "logps/chosen": -0.8550432324409485, "logps/rejected": -0.9936239123344421, "loss": 40.1999, "nll_loss": 1.2111032009124756, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04275216534733772, "rewards/margins": 0.006929035298526287, "rewards/rejected": -0.049681201577186584, "step": 40 }, { "epoch": 0.4266666666666667, "grad_norm": 33.0, "learning_rate": 5.968814624645376e-05, "log_odds_chosen": 0.1635906845331192, "log_odds_ratio": -0.7335126996040344, "logits/chosen": -1.9793834686279297, "logits/rejected": -2.0152127742767334, "logps/chosen": -0.8692102432250977, "logps/rejected": -0.975513756275177, "loss": 40.1299, "nll_loss": 1.2306907176971436, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.04346051067113876, "rewards/margins": 0.005315178073942661, "rewards/rejected": -0.04877568781375885, "step": 45 }, { "epoch": 0.4740740740740741, "grad_norm": 39.75, "learning_rate": 5.9403077926557534e-05, "log_odds_chosen": 0.16285523772239685, "log_odds_ratio": -0.7225431203842163, "logits/chosen": -1.9700477123260498, "logits/rejected": -1.93939208984375, "logps/chosen": -0.9150403738021851, "logps/rejected": -1.005976676940918, "loss": 42.3638, "nll_loss": 1.3180006742477417, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04575202241539955, "rewards/margins": 0.004546813666820526, "rewards/rejected": -0.05029883235692978, "step": 50 }, { "epoch": 0.5214814814814814, "grad_norm": 32.5, "learning_rate": 5.9027447153889215e-05, "log_odds_chosen": 0.074959896504879, "log_odds_ratio": -0.7347756624221802, "logits/chosen": -1.8091471195220947, "logits/rejected": -1.627111792564392, "logps/chosen": -0.8783036470413208, "logps/rejected": -0.9295312166213989, "loss": 39.4889, "nll_loss": 1.2200506925582886, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.04391518235206604, "rewards/margins": 0.002561377827078104, "rewards/rejected": -0.046476561576128006, "step": 55 }, { "epoch": 0.5688888888888889, "grad_norm": 31.875, "learning_rate": 5.856241088365584e-05, "log_odds_chosen": 0.21836993098258972, "log_odds_ratio": -0.6648741960525513, "logits/chosen": -2.2059853076934814, "logits/rejected": -1.8393570184707642, "logps/chosen": -0.8266533613204956, "logps/rejected": -0.944961428642273, "loss": 38.4828, "nll_loss": 1.1561448574066162, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0413326658308506, "rewards/margins": 0.005915405694395304, "rewards/rejected": -0.04724807292222977, "step": 60 }, { "epoch": 0.6162962962962963, "grad_norm": 31.75, "learning_rate": 5.800940144295476e-05, "log_odds_chosen": 0.14650335907936096, "log_odds_ratio": -0.7161463499069214, "logits/chosen": -1.9598195552825928, "logits/rejected": -1.8855581283569336, "logps/chosen": -0.890237033367157, "logps/rejected": -0.9888992309570312, "loss": 38.4474, "nll_loss": 1.1701605319976807, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.04451185464859009, "rewards/margins": 0.004933114163577557, "rewards/rejected": -0.04944496601819992, "step": 65 }, { "epoch": 0.6637037037037037, "grad_norm": 31.25, "learning_rate": 5.7370122119158855e-05, "log_odds_chosen": 0.2176527976989746, "log_odds_ratio": -0.6846314072608948, "logits/chosen": -2.401538610458374, "logits/rejected": -1.8277839422225952, "logps/chosen": -0.8448864817619324, "logps/rejected": -1.0054863691329956, "loss": 38.0337, "nll_loss": 1.1650002002716064, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0422443225979805, "rewards/margins": 0.00802999921143055, "rewards/rejected": -0.0502743236720562, "step": 70 }, { "epoch": 0.7111111111111111, "grad_norm": 34.5, "learning_rate": 5.6646541913735056e-05, "log_odds_chosen": 0.34958410263061523, "log_odds_ratio": -0.6144381761550903, "logits/chosen": -1.9800045490264893, "logits/rejected": -2.1104648113250732, "logps/chosen": -0.7909008264541626, "logps/rejected": -1.0040924549102783, "loss": 38.1687, "nll_loss": 1.1795135736465454, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03954503685235977, "rewards/margins": 0.01065958570688963, "rewards/rejected": -0.050204623490571976, "step": 75 }, { "epoch": 0.7585185185185185, "grad_norm": 30.75, "learning_rate": 5.5840889477654665e-05, "log_odds_chosen": 0.25651440024375916, "log_odds_ratio": -0.6959986686706543, "logits/chosen": -2.3197991847991943, "logits/rejected": -1.9464877843856812, "logps/chosen": -0.8813208341598511, "logps/rejected": -1.0397270917892456, "loss": 37.8718, "nll_loss": 1.1892088651657104, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.04406604543328285, "rewards/margins": 0.007920312695205212, "rewards/rejected": -0.05198635905981064, "step": 80 }, { "epoch": 0.8059259259259259, "grad_norm": 26.75, "learning_rate": 5.495564624707466e-05, "log_odds_chosen": 0.24601168930530548, "log_odds_ratio": -0.6635450720787048, "logits/chosen": -2.3646152019500732, "logits/rejected": -1.6053569316864014, "logps/chosen": -0.825157642364502, "logps/rejected": -0.9831274151802063, "loss": 37.7402, "nll_loss": 1.1433099508285522, "rewards/accuracies": 0.625, "rewards/chosen": -0.04125788062810898, "rewards/margins": 0.007898489013314247, "rewards/rejected": -0.04915637522935867, "step": 85 }, { "epoch": 0.8533333333333334, "grad_norm": 32.25, "learning_rate": 5.399353880043222e-05, "log_odds_chosen": 0.2743232548236847, "log_odds_ratio": -0.6547017097473145, "logits/chosen": -2.2998695373535156, "logits/rejected": -1.9147183895111084, "logps/chosen": -0.7987005710601807, "logps/rejected": -0.9570469856262207, "loss": 39.0598, "nll_loss": 1.165475606918335, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.03993503004312515, "rewards/margins": 0.00791732408106327, "rewards/rejected": -0.047852352261543274, "step": 90 }, { "epoch": 0.9007407407407407, "grad_norm": 27.875, "learning_rate": 5.295753046049293e-05, "log_odds_chosen": 0.3104208707809448, "log_odds_ratio": -0.6332544088363647, "logits/chosen": -2.3582139015197754, "logits/rejected": -1.8759187459945679, "logps/chosen": -0.7584289908409119, "logps/rejected": -0.9375408887863159, "loss": 38.0891, "nll_loss": 1.0971782207489014, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.037921447306871414, "rewards/margins": 0.00895559974014759, "rewards/rejected": -0.046877048909664154, "step": 95 }, { "epoch": 0.9481481481481482, "grad_norm": 28.0, "learning_rate": 5.1850812167218644e-05, "log_odds_chosen": 0.14483532309532166, "log_odds_ratio": -0.725937008857727, "logits/chosen": -2.2758800983428955, "logits/rejected": -1.807739019393921, "logps/chosen": -0.8715543746948242, "logps/rejected": -0.967276394367218, "loss": 38.3176, "nll_loss": 1.192031979560852, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.04357772320508957, "rewards/margins": 0.004786101635545492, "rewards/rejected": -0.0483638234436512, "step": 100 }, { "epoch": 0.9955555555555555, "grad_norm": 27.5, "learning_rate": 5.067679264956681e-05, "log_odds_chosen": 0.2537747621536255, "log_odds_ratio": -0.6505337953567505, "logits/chosen": -2.356247901916504, "logits/rejected": -1.834238052368164, "logps/chosen": -0.8050671815872192, "logps/rejected": -0.9783474206924438, "loss": 37.2872, "nll_loss": 1.1223804950714111, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.04025335982441902, "rewards/margins": 0.008664008229970932, "rewards/rejected": -0.04891737177968025, "step": 105 }, { "epoch": 1.0429629629629629, "grad_norm": 26.625, "learning_rate": 4.943908792649255e-05, "log_odds_chosen": 0.7013475298881531, "log_odds_ratio": -0.5124364495277405, "logits/chosen": -2.1879847049713135, "logits/rejected": -1.7397973537445068, "logps/chosen": -0.6286161541938782, "logps/rejected": -0.9900426864624023, "loss": 31.0738, "nll_loss": 0.9219255447387695, "rewards/accuracies": 0.75, "rewards/chosen": -0.03143080696463585, "rewards/margins": 0.018071329221129417, "rewards/rejected": -0.049502138048410416, "step": 110 }, { "epoch": 1.0903703703703704, "grad_norm": 30.25, "learning_rate": 4.814151016949061e-05, "log_odds_chosen": 0.9226773977279663, "log_odds_ratio": -0.42190057039260864, "logits/chosen": -2.087578058242798, "logits/rejected": -1.6346263885498047, "logps/chosen": -0.5833232998847961, "logps/rejected": -1.0628697872161865, "loss": 30.0304, "nll_loss": 0.8877116441726685, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02916616201400757, "rewards/margins": 0.02397732436656952, "rewards/rejected": -0.053143490105867386, "step": 115 }, { "epoch": 1.1377777777777778, "grad_norm": 28.375, "learning_rate": 4.6788055960981e-05, "log_odds_chosen": 0.9922162294387817, "log_odds_ratio": -0.40503960847854614, "logits/chosen": -2.222867488861084, "logits/rejected": -1.8629436492919922, "logps/chosen": -0.5679124593734741, "logps/rejected": -1.079012155532837, "loss": 30.4758, "nll_loss": 0.9042154550552368, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.028395622968673706, "rewards/margins": 0.02555498108267784, "rewards/rejected": -0.053950607776641846, "step": 120 }, { "epoch": 1.1851851851851851, "grad_norm": 27.625, "learning_rate": 4.538289398470304e-05, "log_odds_chosen": 0.9345417022705078, "log_odds_ratio": -0.4391079545021057, "logits/chosen": -2.159498453140259, "logits/rejected": -1.9323101043701172, "logps/chosen": -0.6106966733932495, "logps/rejected": -1.0727020502090454, "loss": 28.3606, "nll_loss": 0.874626636505127, "rewards/accuracies": 0.8125, "rewards/chosen": -0.030534833669662476, "rewards/margins": 0.023100275546312332, "rewards/rejected": -0.05363510921597481, "step": 125 }, { "epoch": 1.2325925925925927, "grad_norm": 25.75, "learning_rate": 4.393035218603139e-05, "log_odds_chosen": 0.7891913652420044, "log_odds_ratio": -0.4749962389469147, "logits/chosen": -2.1488184928894043, "logits/rejected": -1.8161399364471436, "logps/chosen": -0.6154332160949707, "logps/rejected": -1.0038516521453857, "loss": 29.5153, "nll_loss": 0.9258828163146973, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.030771661549806595, "rewards/margins": 0.019420918077230453, "rewards/rejected": -0.05019258335232735, "step": 130 }, { "epoch": 1.28, "grad_norm": 26.125, "learning_rate": 4.243490444176123e-05, "log_odds_chosen": 0.912939190864563, "log_odds_ratio": -0.4465225338935852, "logits/chosen": -2.023993492126465, "logits/rejected": -1.8624738454818726, "logps/chosen": -0.5600326061248779, "logps/rejected": -1.0113087892532349, "loss": 29.4182, "nll_loss": 0.8695603609085083, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.028001630678772926, "rewards/margins": 0.022563805803656578, "rewards/rejected": -0.0505654402077198, "step": 135 }, { "epoch": 1.3274074074074074, "grad_norm": 24.875, "learning_rate": 4.090115678041962e-05, "log_odds_chosen": 0.8396116495132446, "log_odds_ratio": -0.47167715430259705, "logits/chosen": -1.9779258966445923, "logits/rejected": -1.8630996942520142, "logps/chosen": -0.6393855214118958, "logps/rejected": -1.073813557624817, "loss": 30.4644, "nll_loss": 0.9436683654785156, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.031969279050827026, "rewards/margins": 0.021721404045820236, "rewards/rejected": -0.053690679371356964, "step": 140 }, { "epoch": 1.374814814814815, "grad_norm": 27.0, "learning_rate": 3.9333833195545325e-05, "log_odds_chosen": 0.8570321798324585, "log_odds_ratio": -0.4445961117744446, "logits/chosen": -2.178088903427124, "logits/rejected": -1.742640495300293, "logps/chosen": -0.6361523270606995, "logps/rejected": -1.0977928638458252, "loss": 29.9436, "nll_loss": 0.9138515591621399, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.03180761635303497, "rewards/margins": 0.023082025349140167, "rewards/rejected": -0.05488964170217514, "step": 145 }, { "epoch": 1.4222222222222223, "grad_norm": 28.75, "learning_rate": 3.7737761095632374e-05, "log_odds_chosen": 0.8333398699760437, "log_odds_ratio": -0.4819715619087219, "logits/chosen": -2.0540931224823, "logits/rejected": -2.0431177616119385, "logps/chosen": -0.5949512124061584, "logps/rejected": -0.9885191917419434, "loss": 29.3897, "nll_loss": 0.8951548337936401, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.029747556895017624, "rewards/margins": 0.019678404554724693, "rewards/rejected": -0.049425967037677765, "step": 150 }, { "epoch": 1.4696296296296296, "grad_norm": 28.5, "learning_rate": 3.611785643555225e-05, "log_odds_chosen": 0.8982599377632141, "log_odds_ratio": -0.44926947355270386, "logits/chosen": -2.2695367336273193, "logits/rejected": -1.7630071640014648, "logps/chosen": -0.6012392044067383, "logps/rejected": -1.050581693649292, "loss": 29.8918, "nll_loss": 0.9108700752258301, "rewards/accuracies": 0.78125, "rewards/chosen": -0.030061960220336914, "rewards/margins": 0.022467125207185745, "rewards/rejected": -0.05252908915281296, "step": 155 }, { "epoch": 1.5170370370370372, "grad_norm": 30.625, "learning_rate": 3.44791085752502e-05, "log_odds_chosen": 0.8571161031723022, "log_odds_ratio": -0.4356165826320648, "logits/chosen": -2.093273639678955, "logits/rejected": -2.1345930099487305, "logps/chosen": -0.6451684236526489, "logps/rejected": -1.1003749370574951, "loss": 30.6067, "nll_loss": 0.9544156193733215, "rewards/accuracies": 0.84375, "rewards/chosen": -0.03225841745734215, "rewards/margins": 0.02276032790541649, "rewards/rejected": -0.055018745362758636, "step": 160 }, { "epoch": 1.5644444444444443, "grad_norm": 25.0, "learning_rate": 3.2826564912351544e-05, "log_odds_chosen": 0.9319137334823608, "log_odds_ratio": -0.43036922812461853, "logits/chosen": -1.9680538177490234, "logits/rejected": -2.1300835609436035, "logps/chosen": -0.6165143251419067, "logps/rejected": -1.106245994567871, "loss": 29.5351, "nll_loss": 0.8909838795661926, "rewards/accuracies": 0.8125, "rewards/chosen": -0.030825715512037277, "rewards/margins": 0.024486582726240158, "rewards/rejected": -0.055312298238277435, "step": 165 }, { "epoch": 1.6118518518518519, "grad_norm": 27.0, "learning_rate": 3.116531533601003e-05, "log_odds_chosen": 1.0328500270843506, "log_odds_ratio": -0.4052696228027344, "logits/chosen": -2.1744275093078613, "logits/rejected": -1.9558875560760498, "logps/chosen": -0.587931752204895, "logps/rejected": -1.127718448638916, "loss": 29.1635, "nll_loss": 0.9016565084457397, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02939658798277378, "rewards/margins": 0.02698933705687523, "rewards/rejected": -0.05638592690229416, "step": 170 }, { "epoch": 1.6592592592592592, "grad_norm": 29.875, "learning_rate": 2.9500476549880848e-05, "log_odds_chosen": 0.8978468179702759, "log_odds_ratio": -0.45074257254600525, "logits/chosen": -1.9579814672470093, "logits/rejected": -1.6362476348876953, "logps/chosen": -0.5840794444084167, "logps/rejected": -1.0381691455841064, "loss": 28.8906, "nll_loss": 0.8974016308784485, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.029203975573182106, "rewards/margins": 0.022704491391777992, "rewards/rejected": -0.0519084632396698, "step": 175 }, { "epoch": 1.7066666666666666, "grad_norm": 31.75, "learning_rate": 2.7837176312504037e-05, "log_odds_chosen": 0.804090678691864, "log_odds_ratio": -0.46772366762161255, "logits/chosen": -1.7048003673553467, "logits/rejected": -1.6802536249160767, "logps/chosen": -0.6104881167411804, "logps/rejected": -1.00138521194458, "loss": 29.964, "nll_loss": 0.9319046139717102, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.03052440844476223, "rewards/margins": 0.019544851034879684, "rewards/rejected": -0.050069261342287064, "step": 180 }, { "epoch": 1.7540740740740741, "grad_norm": 24.25, "learning_rate": 2.618053764363861e-05, "log_odds_chosen": 0.9093812108039856, "log_odds_ratio": -0.4248902201652527, "logits/chosen": -2.1142642498016357, "logits/rejected": -1.8935844898223877, "logps/chosen": -0.5908008813858032, "logps/rejected": -1.051133632659912, "loss": 29.4169, "nll_loss": 0.8831952810287476, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.029540037736296654, "rewards/margins": 0.023016640916466713, "rewards/rejected": -0.052556682378053665, "step": 185 }, { "epoch": 1.8014814814814815, "grad_norm": 26.5, "learning_rate": 2.453566304519216e-05, "log_odds_chosen": 0.9450467228889465, "log_odds_ratio": -0.4345301687717438, "logits/chosen": -2.1107406616210938, "logits/rejected": -1.6903254985809326, "logps/chosen": -0.6346092820167542, "logps/rejected": -1.1103118658065796, "loss": 30.3828, "nll_loss": 0.9147791862487793, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.03173046559095383, "rewards/margins": 0.02378513291478157, "rewards/rejected": -0.0555155873298645, "step": 190 }, { "epoch": 1.8488888888888888, "grad_norm": 30.5, "learning_rate": 2.29076187853462e-05, "log_odds_chosen": 0.9741055369377136, "log_odds_ratio": -0.4274386465549469, "logits/chosen": -2.0201268196105957, "logits/rejected": -1.357716679573059, "logps/chosen": -0.5966172218322754, "logps/rejected": -1.1088060140609741, "loss": 29.2191, "nll_loss": 0.882198691368103, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.02983086369931698, "rewards/margins": 0.025609437376260757, "rewards/rejected": -0.05544029921293259, "step": 195 }, { "epoch": 1.8962962962962964, "grad_norm": 27.625, "learning_rate": 2.130141929428254e-05, "log_odds_chosen": 0.8030783534049988, "log_odds_ratio": -0.4836719036102295, "logits/chosen": -2.072028875350952, "logits/rejected": -1.7954127788543701, "logps/chosen": -0.6211769580841064, "logps/rejected": -1.0174511671066284, "loss": 31.2309, "nll_loss": 0.9159282445907593, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.031058847904205322, "rewards/margins": 0.01981370709836483, "rewards/rejected": -0.050872553139925, "step": 200 }, { "epoch": 1.9437037037037037, "grad_norm": 25.5, "learning_rate": 1.9722011719572444e-05, "log_odds_chosen": 0.8332887887954712, "log_odds_ratio": -0.46110400557518005, "logits/chosen": -2.209178924560547, "logits/rejected": -1.4569910764694214, "logps/chosen": -0.614986777305603, "logps/rejected": -1.0477509498596191, "loss": 28.0719, "nll_loss": 0.866260826587677, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03074934147298336, "rewards/margins": 0.021638209000229836, "rewards/rejected": -0.0523875467479229, "step": 205 }, { "epoch": 1.991111111111111, "grad_norm": 27.375, "learning_rate": 1.8174260688798445e-05, "log_odds_chosen": 0.7869575619697571, "log_odds_ratio": -0.4784061312675476, "logits/chosen": -1.8811867237091064, "logits/rejected": -2.0677828788757324, "logps/chosen": -0.5915592908859253, "logps/rejected": -0.9474382400512695, "loss": 28.0122, "nll_loss": 0.8690091967582703, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.029577964916825294, "rewards/margins": 0.017793944105505943, "rewards/rejected": -0.04737190902233124, "step": 210 }, { "epoch": 2.0385185185185186, "grad_norm": 23.75, "learning_rate": 1.666293332634042e-05, "log_odds_chosen": 1.401928186416626, "log_odds_ratio": -0.33335039019584656, "logits/chosen": -1.884316086769104, "logits/rejected": -1.4572067260742188, "logps/chosen": -0.4979814887046814, "logps/rejected": -1.1203409433364868, "loss": 24.9695, "nll_loss": 0.7534885406494141, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.02489907667040825, "rewards/margins": 0.03111797571182251, "rewards/rejected": -0.05601705238223076, "step": 215 }, { "epoch": 2.0859259259259257, "grad_norm": 38.5, "learning_rate": 1.519268457047482e-05, "log_odds_chosen": 1.6839864253997803, "log_odds_ratio": -0.2904171049594879, "logits/chosen": -1.8016027212142944, "logits/rejected": -1.8718116283416748, "logps/chosen": -0.4482901096343994, "logps/rejected": -1.188301682472229, "loss": 23.232, "nll_loss": 0.7302739024162292, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.02241450548171997, "rewards/margins": 0.03700058162212372, "rewards/rejected": -0.05941509082913399, "step": 220 }, { "epoch": 2.1333333333333333, "grad_norm": 27.5, "learning_rate": 1.3768042836010768e-05, "log_odds_chosen": 1.6373440027236938, "log_odds_ratio": -0.2984515130519867, "logits/chosen": -1.8258718252182007, "logits/rejected": -1.6223289966583252, "logps/chosen": -0.44031524658203125, "logps/rejected": -1.1581284999847412, "loss": 24.139, "nll_loss": 0.7237830758094788, "rewards/accuracies": 0.90625, "rewards/chosen": -0.022015761584043503, "rewards/margins": 0.035890672355890274, "rewards/rejected": -0.05790643021464348, "step": 225 }, { "epoch": 2.180740740740741, "grad_norm": 27.5, "learning_rate": 1.239339606662261e-05, "log_odds_chosen": 1.801990270614624, "log_odds_ratio": -0.25359493494033813, "logits/chosen": -1.93035089969635, "logits/rejected": -1.6016725301742554, "logps/chosen": -0.4278429448604584, "logps/rejected": -1.2159802913665771, "loss": 22.8267, "nll_loss": 0.7034687995910645, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.02139214798808098, "rewards/margins": 0.039406873285770416, "rewards/rejected": -0.060799021273851395, "step": 230 }, { "epoch": 2.228148148148148, "grad_norm": 34.5, "learning_rate": 1.1072978219838283e-05, "log_odds_chosen": 1.565932035446167, "log_odds_ratio": -0.3256310820579529, "logits/chosen": -1.9141308069229126, "logits/rejected": -1.976017951965332, "logps/chosen": -0.4726741313934326, "logps/rejected": -1.1302521228790283, "loss": 23.1599, "nll_loss": 0.7224346399307251, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.02363370731472969, "rewards/margins": 0.03287890553474426, "rewards/rejected": -0.05651261284947395, "step": 235 }, { "epoch": 2.2755555555555556, "grad_norm": 32.25, "learning_rate": 9.810856226309972e-06, "log_odds_chosen": 1.7595332860946655, "log_odds_ratio": -0.2692697048187256, "logits/chosen": -1.8822906017303467, "logits/rejected": -1.698127031326294, "logps/chosen": -0.430245578289032, "logps/rejected": -1.2083370685577393, "loss": 23.1595, "nll_loss": 0.7202972173690796, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.02151227928698063, "rewards/margins": 0.0389045774936676, "rewards/rejected": -0.06041685491800308, "step": 240 }, { "epoch": 2.322962962962963, "grad_norm": 26.5, "learning_rate": 8.61091746353324e-06, "log_odds_chosen": 1.702959418296814, "log_odds_ratio": -0.2750469446182251, "logits/chosen": -2.1500630378723145, "logits/rejected": -1.591073751449585, "logps/chosen": -0.4450320601463318, "logps/rejected": -1.1653035879135132, "loss": 23.0947, "nll_loss": 0.7247873544692993, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.022251605987548828, "rewards/margins": 0.03601358085870743, "rewards/rejected": -0.058265186846256256, "step": 245 }, { "epoch": 2.3703703703703702, "grad_norm": 29.125, "learning_rate": 7.47685778259568e-06, "log_odds_chosen": 1.729418158531189, "log_odds_ratio": -0.25526946783065796, "logits/chosen": -1.865269660949707, "logits/rejected": -1.8307578563690186, "logps/chosen": -0.43531733751296997, "logps/rejected": -1.197790503501892, "loss": 22.4449, "nll_loss": 0.6787145733833313, "rewards/accuracies": 0.9375, "rewards/chosen": -0.02176586538553238, "rewards/margins": 0.038123659789562225, "rewards/rejected": -0.059889525175094604, "step": 250 }, { "epoch": 2.417777777777778, "grad_norm": 27.125, "learning_rate": 6.4121701248332905e-06, "log_odds_chosen": 1.894997000694275, "log_odds_ratio": -0.2565176784992218, "logits/chosen": -1.9798578023910522, "logits/rejected": -1.3841679096221924, "logps/chosen": -0.3930845260620117, "logps/rejected": -1.2068579196929932, "loss": 22.3353, "nll_loss": 0.6793208122253418, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.019654225558042526, "rewards/margins": 0.04068866744637489, "rewards/rejected": -0.06034289672970772, "step": 255 }, { "epoch": 2.4651851851851854, "grad_norm": 29.625, "learning_rate": 5.420133763455645e-06, "log_odds_chosen": 1.909266710281372, "log_odds_ratio": -0.25379735231399536, "logits/chosen": -1.9765899181365967, "logits/rejected": -1.7865279912948608, "logps/chosen": -0.4143601059913635, "logps/rejected": -1.225185751914978, "loss": 22.3829, "nll_loss": 0.6900944709777832, "rewards/accuracies": 0.90625, "rewards/chosen": -0.020718006417155266, "rewards/margins": 0.04054127633571625, "rewards/rejected": -0.06125928834080696, "step": 260 }, { "epoch": 2.5125925925925925, "grad_norm": 32.25, "learning_rate": 4.503804203275866e-06, "log_odds_chosen": 1.7796869277954102, "log_odds_ratio": -0.30406466126441956, "logits/chosen": -1.8215770721435547, "logits/rejected": -1.862717866897583, "logps/chosen": -0.4358927607536316, "logps/rejected": -1.197788953781128, "loss": 22.2978, "nll_loss": 0.6913371086120605, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0217946395277977, "rewards/margins": 0.03809480741620064, "rewards/rejected": -0.05988944694399834, "step": 265 }, { "epoch": 2.56, "grad_norm": 29.0, "learning_rate": 3.6660037696547376e-06, "log_odds_chosen": 1.7562096118927002, "log_odds_ratio": -0.25910764932632446, "logits/chosen": -2.1091978549957275, "logits/rejected": -1.8953710794448853, "logps/chosen": -0.4530642628669739, "logps/rejected": -1.2265712022781372, "loss": 23.2665, "nll_loss": 0.7341790199279785, "rewards/accuracies": 0.9375, "rewards/chosen": -0.022653216496109962, "rewards/margins": 0.03867534175515175, "rewards/rejected": -0.06132856011390686, "step": 270 }, { "epoch": 2.6074074074074076, "grad_norm": 30.875, "learning_rate": 2.909312915645238e-06, "log_odds_chosen": 1.7647335529327393, "log_odds_ratio": -0.28405773639678955, "logits/chosen": -2.033613681793213, "logits/rejected": -1.289603590965271, "logps/chosen": -0.4545009732246399, "logps/rejected": -1.2115771770477295, "loss": 23.1922, "nll_loss": 0.7150126695632935, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.022725049406290054, "rewards/margins": 0.03785381466150284, "rewards/rejected": -0.060578860342502594, "step": 275 }, { "epoch": 2.6548148148148147, "grad_norm": 34.75, "learning_rate": 2.236062274111741e-06, "log_odds_chosen": 1.6408923864364624, "log_odds_ratio": -0.2776089906692505, "logits/chosen": -1.8170640468597412, "logits/rejected": -1.9727897644042969, "logps/chosen": -0.4261111319065094, "logps/rejected": -1.1319156885147095, "loss": 22.1786, "nll_loss": 0.6713584661483765, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.02130555734038353, "rewards/margins": 0.035290226340293884, "rewards/rejected": -0.056595779955387115, "step": 280 }, { "epoch": 2.7022222222222223, "grad_norm": 30.75, "learning_rate": 1.648325479303684e-06, "log_odds_chosen": 1.6113086938858032, "log_odds_ratio": -0.2935238778591156, "logits/chosen": -2.0707173347473145, "logits/rejected": -1.4977672100067139, "logps/chosen": -0.4346179962158203, "logps/rejected": -1.1482003927230835, "loss": 23.1607, "nll_loss": 0.6985403895378113, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.021730897948145866, "rewards/margins": 0.03567912429571152, "rewards/rejected": -0.05741002410650253, "step": 285 }, { "epoch": 2.74962962962963, "grad_norm": 29.75, "learning_rate": 1.1479127799935029e-06, "log_odds_chosen": 1.8265297412872314, "log_odds_ratio": -0.2631281614303589, "logits/chosen": -1.841491937637329, "logits/rejected": -1.922586441040039, "logps/chosen": -0.4327624440193176, "logps/rejected": -1.2341258525848389, "loss": 22.9795, "nll_loss": 0.7204877734184265, "rewards/accuracies": 0.9375, "rewards/chosen": -0.021638119593262672, "rewards/margins": 0.04006817191839218, "rewards/rejected": -0.0617062933743, "step": 290 }, { "epoch": 2.797037037037037, "grad_norm": 31.625, "learning_rate": 7.363654638505046e-07, "log_odds_chosen": 1.7081098556518555, "log_odds_ratio": -0.29199516773223877, "logits/chosen": -1.7930389642715454, "logits/rejected": -1.7181438207626343, "logps/chosen": -0.449666827917099, "logps/rejected": -1.2206642627716064, "loss": 22.9709, "nll_loss": 0.7106753587722778, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.0224833432585001, "rewards/margins": 0.03854987770318985, "rewards/rejected": -0.0610332190990448, "step": 295 }, { "epoch": 2.8444444444444446, "grad_norm": 31.25, "learning_rate": 4.149511102238568e-07, "log_odds_chosen": 1.5754259824752808, "log_odds_ratio": -0.3034347891807556, "logits/chosen": -2.2559409141540527, "logits/rejected": -1.71217942237854, "logps/chosen": -0.46836423873901367, "logps/rejected": -1.2223981618881226, "loss": 22.8601, "nll_loss": 0.7257949113845825, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.023418214172124863, "rewards/margins": 0.037701696157455444, "rewards/rejected": -0.06111990660429001, "step": 300 }, { "epoch": 2.891851851851852, "grad_norm": 28.625, "learning_rate": 1.8465968595625105e-07, "log_odds_chosen": 1.6639511585235596, "log_odds_ratio": -0.2808656096458435, "logits/chosen": -2.1249499320983887, "logits/rejected": -1.6745857000350952, "logps/chosen": -0.475193589925766, "logps/rejected": -1.1988952159881592, "loss": 21.8942, "nll_loss": 0.6743995547294617, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.02375968173146248, "rewards/margins": 0.03618507459759712, "rewards/rejected": -0.0599447600543499, "step": 305 }, { "epoch": 2.9392592592592592, "grad_norm": 30.75, "learning_rate": 4.620049625329803e-08, "log_odds_chosen": 1.7966537475585938, "log_odds_ratio": -0.25377795100212097, "logits/chosen": -1.9389528036117554, "logits/rejected": -1.399864912033081, "logps/chosen": -0.4379648268222809, "logps/rejected": -1.1957590579986572, "loss": 22.7814, "nll_loss": 0.6846402883529663, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.021898243576288223, "rewards/margins": 0.03788971155881882, "rewards/rejected": -0.05978795886039734, "step": 310 }, { "epoch": 2.986666666666667, "grad_norm": 32.75, "learning_rate": 0.0, "log_odds_chosen": 1.8841956853866577, "log_odds_ratio": -0.24858447909355164, "logits/chosen": -1.8983337879180908, "logits/rejected": -1.5074989795684814, "logps/chosen": -0.40289902687072754, "logps/rejected": -1.209084391593933, "loss": 22.233, "nll_loss": 0.6980301737785339, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.020144950598478317, "rewards/margins": 0.040309272706508636, "rewards/rejected": -0.06045422703027725, "step": 315 }, { "epoch": 2.986666666666667, "step": 315, "total_flos": 0.0, "train_loss": 31.135096304757255, "train_runtime": 6745.6063, "train_samples_per_second": 3.002, "train_steps_per_second": 0.047 } ], "logging_steps": 5, "max_steps": 315, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }