|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.986666666666667, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.047407407407407405, |
|
"grad_norm": 270.0, |
|
"learning_rate": 9.375000000000001e-06, |
|
"log_odds_chosen": 0.41771596670150757, |
|
"log_odds_ratio": -0.7694265246391296, |
|
"logits/chosen": -2.967926502227783, |
|
"logits/rejected": -2.8778510093688965, |
|
"logps/chosen": -1.2910274267196655, |
|
"logps/rejected": -1.6328433752059937, |
|
"loss": 51.9175, |
|
"nll_loss": 1.511154294013977, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.06455137580633163, |
|
"rewards/margins": 0.017090797424316406, |
|
"rewards/rejected": -0.08164217323064804, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09481481481481481, |
|
"grad_norm": 61.75, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"log_odds_chosen": 0.2715613842010498, |
|
"log_odds_ratio": -0.7063366174697876, |
|
"logits/chosen": -2.903649091720581, |
|
"logits/rejected": -2.737760066986084, |
|
"logps/chosen": -1.0549781322479248, |
|
"logps/rejected": -1.2600512504577637, |
|
"loss": 47.142, |
|
"nll_loss": 1.388285517692566, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0527489073574543, |
|
"rewards/margins": 0.010253657586872578, |
|
"rewards/rejected": -0.0630025640130043, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 71.0, |
|
"learning_rate": 2.8125e-05, |
|
"log_odds_chosen": 0.26208820939064026, |
|
"log_odds_ratio": -0.6782652139663696, |
|
"logits/chosen": -2.5858330726623535, |
|
"logits/rejected": -2.4748170375823975, |
|
"logps/chosen": -0.921225368976593, |
|
"logps/rejected": -1.081853985786438, |
|
"loss": 46.5682, |
|
"nll_loss": 1.4536203145980835, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04606126993894577, |
|
"rewards/margins": 0.008031422272324562, |
|
"rewards/rejected": -0.05409269407391548, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18962962962962962, |
|
"grad_norm": 92.0, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"log_odds_chosen": 0.18918053805828094, |
|
"log_odds_ratio": -0.7011532783508301, |
|
"logits/chosen": -2.4796886444091797, |
|
"logits/rejected": -2.3676044940948486, |
|
"logps/chosen": -0.8980884552001953, |
|
"logps/rejected": -1.0366885662078857, |
|
"loss": 43.8254, |
|
"nll_loss": 1.3132244348526, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.044904422014951706, |
|
"rewards/margins": 0.00693000853061676, |
|
"rewards/rejected": -0.05183442682027817, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 39.75, |
|
"learning_rate": 4.6875e-05, |
|
"log_odds_chosen": 0.20047792792320251, |
|
"log_odds_ratio": -0.704607367515564, |
|
"logits/chosen": -2.486077070236206, |
|
"logits/rejected": -2.380354881286621, |
|
"logps/chosen": -0.9169360995292664, |
|
"logps/rejected": -1.0517938137054443, |
|
"loss": 41.7393, |
|
"nll_loss": 1.3125925064086914, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.04584680497646332, |
|
"rewards/margins": 0.006742885801941156, |
|
"rewards/rejected": -0.052589692175388336, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 48.25, |
|
"learning_rate": 5.625e-05, |
|
"log_odds_chosen": 0.13125675916671753, |
|
"log_odds_ratio": -0.714677631855011, |
|
"logits/chosen": -2.498034954071045, |
|
"logits/rejected": -2.1313350200653076, |
|
"logps/chosen": -0.8735687136650085, |
|
"logps/rejected": -0.9592132568359375, |
|
"loss": 41.493, |
|
"nll_loss": 1.2559503316879272, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.04367843270301819, |
|
"rewards/margins": 0.004282232839614153, |
|
"rewards/rejected": -0.047960661351680756, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33185185185185184, |
|
"grad_norm": 39.0, |
|
"learning_rate": 5.998336508818541e-05, |
|
"log_odds_chosen": 0.06586463749408722, |
|
"log_odds_ratio": -0.7540255188941956, |
|
"logits/chosen": -2.3031373023986816, |
|
"logits/rejected": -2.4485721588134766, |
|
"logps/chosen": -0.8969869613647461, |
|
"logps/rejected": -0.9382694363594055, |
|
"loss": 40.5282, |
|
"nll_loss": 1.2521088123321533, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.04484934359788895, |
|
"rewards/margins": 0.0020641214214265347, |
|
"rewards/rejected": -0.04691346734762192, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.37925925925925924, |
|
"grad_norm": 35.25, |
|
"learning_rate": 5.988177409372154e-05, |
|
"log_odds_chosen": 0.21956849098205566, |
|
"log_odds_ratio": -0.6834356188774109, |
|
"logits/chosen": -2.2723240852355957, |
|
"logits/rejected": -2.065520763397217, |
|
"logps/chosen": -0.8550432324409485, |
|
"logps/rejected": -0.9936239123344421, |
|
"loss": 40.1999, |
|
"nll_loss": 1.2111032009124756, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04275216534733772, |
|
"rewards/margins": 0.006929035298526287, |
|
"rewards/rejected": -0.049681201577186584, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 33.0, |
|
"learning_rate": 5.968814624645376e-05, |
|
"log_odds_chosen": 0.1635906845331192, |
|
"log_odds_ratio": -0.7335126996040344, |
|
"logits/chosen": -1.9793834686279297, |
|
"logits/rejected": -2.0152127742767334, |
|
"logps/chosen": -0.8692102432250977, |
|
"logps/rejected": -0.975513756275177, |
|
"loss": 40.1299, |
|
"nll_loss": 1.2306907176971436, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.04346051067113876, |
|
"rewards/margins": 0.005315178073942661, |
|
"rewards/rejected": -0.04877568781375885, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 39.75, |
|
"learning_rate": 5.9403077926557534e-05, |
|
"log_odds_chosen": 0.16285523772239685, |
|
"log_odds_ratio": -0.7225431203842163, |
|
"logits/chosen": -1.9700477123260498, |
|
"logits/rejected": -1.93939208984375, |
|
"logps/chosen": -0.9150403738021851, |
|
"logps/rejected": -1.005976676940918, |
|
"loss": 42.3638, |
|
"nll_loss": 1.3180006742477417, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.04575202241539955, |
|
"rewards/margins": 0.004546813666820526, |
|
"rewards/rejected": -0.05029883235692978, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5214814814814814, |
|
"grad_norm": 32.5, |
|
"learning_rate": 5.9027447153889215e-05, |
|
"log_odds_chosen": 0.074959896504879, |
|
"log_odds_ratio": -0.7347756624221802, |
|
"logits/chosen": -1.8091471195220947, |
|
"logits/rejected": -1.627111792564392, |
|
"logps/chosen": -0.8783036470413208, |
|
"logps/rejected": -0.9295312166213989, |
|
"loss": 39.4889, |
|
"nll_loss": 1.2200506925582886, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.04391518235206604, |
|
"rewards/margins": 0.002561377827078104, |
|
"rewards/rejected": -0.046476561576128006, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 31.875, |
|
"learning_rate": 5.856241088365584e-05, |
|
"log_odds_chosen": 0.21836993098258972, |
|
"log_odds_ratio": -0.6648741960525513, |
|
"logits/chosen": -2.2059853076934814, |
|
"logits/rejected": -1.8393570184707642, |
|
"logps/chosen": -0.8266533613204956, |
|
"logps/rejected": -0.944961428642273, |
|
"loss": 38.4828, |
|
"nll_loss": 1.1561448574066162, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0413326658308506, |
|
"rewards/margins": 0.005915405694395304, |
|
"rewards/rejected": -0.04724807292222977, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6162962962962963, |
|
"grad_norm": 31.75, |
|
"learning_rate": 5.800940144295476e-05, |
|
"log_odds_chosen": 0.14650335907936096, |
|
"log_odds_ratio": -0.7161463499069214, |
|
"logits/chosen": -1.9598195552825928, |
|
"logits/rejected": -1.8855581283569336, |
|
"logps/chosen": -0.890237033367157, |
|
"logps/rejected": -0.9888992309570312, |
|
"loss": 38.4474, |
|
"nll_loss": 1.1701605319976807, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.04451185464859009, |
|
"rewards/margins": 0.004933114163577557, |
|
"rewards/rejected": -0.04944496601819992, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6637037037037037, |
|
"grad_norm": 31.25, |
|
"learning_rate": 5.7370122119158855e-05, |
|
"log_odds_chosen": 0.2176527976989746, |
|
"log_odds_ratio": -0.6846314072608948, |
|
"logits/chosen": -2.401538610458374, |
|
"logits/rejected": -1.8277839422225952, |
|
"logps/chosen": -0.8448864817619324, |
|
"logps/rejected": -1.0054863691329956, |
|
"loss": 38.0337, |
|
"nll_loss": 1.1650002002716064, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.0422443225979805, |
|
"rewards/margins": 0.00802999921143055, |
|
"rewards/rejected": -0.0502743236720562, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 34.5, |
|
"learning_rate": 5.6646541913735056e-05, |
|
"log_odds_chosen": 0.34958410263061523, |
|
"log_odds_ratio": -0.6144381761550903, |
|
"logits/chosen": -1.9800045490264893, |
|
"logits/rejected": -2.1104648113250732, |
|
"logps/chosen": -0.7909008264541626, |
|
"logps/rejected": -1.0040924549102783, |
|
"loss": 38.1687, |
|
"nll_loss": 1.1795135736465454, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.03954503685235977, |
|
"rewards/margins": 0.01065958570688963, |
|
"rewards/rejected": -0.050204623490571976, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7585185185185185, |
|
"grad_norm": 30.75, |
|
"learning_rate": 5.5840889477654665e-05, |
|
"log_odds_chosen": 0.25651440024375916, |
|
"log_odds_ratio": -0.6959986686706543, |
|
"logits/chosen": -2.3197991847991943, |
|
"logits/rejected": -1.9464877843856812, |
|
"logps/chosen": -0.8813208341598511, |
|
"logps/rejected": -1.0397270917892456, |
|
"loss": 37.8718, |
|
"nll_loss": 1.1892088651657104, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04406604543328285, |
|
"rewards/margins": 0.007920312695205212, |
|
"rewards/rejected": -0.05198635905981064, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8059259259259259, |
|
"grad_norm": 26.75, |
|
"learning_rate": 5.495564624707466e-05, |
|
"log_odds_chosen": 0.24601168930530548, |
|
"log_odds_ratio": -0.6635450720787048, |
|
"logits/chosen": -2.3646152019500732, |
|
"logits/rejected": -1.6053569316864014, |
|
"logps/chosen": -0.825157642364502, |
|
"logps/rejected": -0.9831274151802063, |
|
"loss": 37.7402, |
|
"nll_loss": 1.1433099508285522, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04125788062810898, |
|
"rewards/margins": 0.007898489013314247, |
|
"rewards/rejected": -0.04915637522935867, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 32.25, |
|
"learning_rate": 5.399353880043222e-05, |
|
"log_odds_chosen": 0.2743232548236847, |
|
"log_odds_ratio": -0.6547017097473145, |
|
"logits/chosen": -2.2998695373535156, |
|
"logits/rejected": -1.9147183895111084, |
|
"logps/chosen": -0.7987005710601807, |
|
"logps/rejected": -0.9570469856262207, |
|
"loss": 39.0598, |
|
"nll_loss": 1.165475606918335, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.03993503004312515, |
|
"rewards/margins": 0.00791732408106327, |
|
"rewards/rejected": -0.047852352261543274, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9007407407407407, |
|
"grad_norm": 27.875, |
|
"learning_rate": 5.295753046049293e-05, |
|
"log_odds_chosen": 0.3104208707809448, |
|
"log_odds_ratio": -0.6332544088363647, |
|
"logits/chosen": -2.3582139015197754, |
|
"logits/rejected": -1.8759187459945679, |
|
"logps/chosen": -0.7584289908409119, |
|
"logps/rejected": -0.9375408887863159, |
|
"loss": 38.0891, |
|
"nll_loss": 1.0971782207489014, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.037921447306871414, |
|
"rewards/margins": 0.00895559974014759, |
|
"rewards/rejected": -0.046877048909664154, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 28.0, |
|
"learning_rate": 5.1850812167218644e-05, |
|
"log_odds_chosen": 0.14483532309532166, |
|
"log_odds_ratio": -0.725937008857727, |
|
"logits/chosen": -2.2758800983428955, |
|
"logits/rejected": -1.807739019393921, |
|
"logps/chosen": -0.8715543746948242, |
|
"logps/rejected": -0.967276394367218, |
|
"loss": 38.3176, |
|
"nll_loss": 1.192031979560852, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.04357772320508957, |
|
"rewards/margins": 0.004786101635545492, |
|
"rewards/rejected": -0.0483638234436512, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 27.5, |
|
"learning_rate": 5.067679264956681e-05, |
|
"log_odds_chosen": 0.2537747621536255, |
|
"log_odds_ratio": -0.6505337953567505, |
|
"logits/chosen": -2.356247901916504, |
|
"logits/rejected": -1.834238052368164, |
|
"logps/chosen": -0.8050671815872192, |
|
"logps/rejected": -0.9783474206924438, |
|
"loss": 37.2872, |
|
"nll_loss": 1.1223804950714111, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.04025335982441902, |
|
"rewards/margins": 0.008664008229970932, |
|
"rewards/rejected": -0.04891737177968025, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.0429629629629629, |
|
"grad_norm": 26.625, |
|
"learning_rate": 4.943908792649255e-05, |
|
"log_odds_chosen": 0.7013475298881531, |
|
"log_odds_ratio": -0.5124364495277405, |
|
"logits/chosen": -2.1879847049713135, |
|
"logits/rejected": -1.7397973537445068, |
|
"logps/chosen": -0.6286161541938782, |
|
"logps/rejected": -0.9900426864624023, |
|
"loss": 31.0738, |
|
"nll_loss": 0.9219255447387695, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.03143080696463585, |
|
"rewards/margins": 0.018071329221129417, |
|
"rewards/rejected": -0.049502138048410416, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0903703703703704, |
|
"grad_norm": 30.25, |
|
"learning_rate": 4.814151016949061e-05, |
|
"log_odds_chosen": 0.9226773977279663, |
|
"log_odds_ratio": -0.42190057039260864, |
|
"logits/chosen": -2.087578058242798, |
|
"logits/rejected": -1.6346263885498047, |
|
"logps/chosen": -0.5833232998847961, |
|
"logps/rejected": -1.0628697872161865, |
|
"loss": 30.0304, |
|
"nll_loss": 0.8877116441726685, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.02916616201400757, |
|
"rewards/margins": 0.02397732436656952, |
|
"rewards/rejected": -0.053143490105867386, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1377777777777778, |
|
"grad_norm": 28.375, |
|
"learning_rate": 4.6788055960981e-05, |
|
"log_odds_chosen": 0.9922162294387817, |
|
"log_odds_ratio": -0.40503960847854614, |
|
"logits/chosen": -2.222867488861084, |
|
"logits/rejected": -1.8629436492919922, |
|
"logps/chosen": -0.5679124593734741, |
|
"logps/rejected": -1.079012155532837, |
|
"loss": 30.4758, |
|
"nll_loss": 0.9042154550552368, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.028395622968673706, |
|
"rewards/margins": 0.02555498108267784, |
|
"rewards/rejected": -0.053950607776641846, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1851851851851851, |
|
"grad_norm": 27.625, |
|
"learning_rate": 4.538289398470304e-05, |
|
"log_odds_chosen": 0.9345417022705078, |
|
"log_odds_ratio": -0.4391079545021057, |
|
"logits/chosen": -2.159498453140259, |
|
"logits/rejected": -1.9323101043701172, |
|
"logps/chosen": -0.6106966733932495, |
|
"logps/rejected": -1.0727020502090454, |
|
"loss": 28.3606, |
|
"nll_loss": 0.874626636505127, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.030534833669662476, |
|
"rewards/margins": 0.023100275546312332, |
|
"rewards/rejected": -0.05363510921597481, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2325925925925927, |
|
"grad_norm": 25.75, |
|
"learning_rate": 4.393035218603139e-05, |
|
"log_odds_chosen": 0.7891913652420044, |
|
"log_odds_ratio": -0.4749962389469147, |
|
"logits/chosen": -2.1488184928894043, |
|
"logits/rejected": -1.8161399364471436, |
|
"logps/chosen": -0.6154332160949707, |
|
"logps/rejected": -1.0038516521453857, |
|
"loss": 29.5153, |
|
"nll_loss": 0.9258828163146973, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.030771661549806595, |
|
"rewards/margins": 0.019420918077230453, |
|
"rewards/rejected": -0.05019258335232735, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 26.125, |
|
"learning_rate": 4.243490444176123e-05, |
|
"log_odds_chosen": 0.912939190864563, |
|
"log_odds_ratio": -0.4465225338935852, |
|
"logits/chosen": -2.023993492126465, |
|
"logits/rejected": -1.8624738454818726, |
|
"logps/chosen": -0.5600326061248779, |
|
"logps/rejected": -1.0113087892532349, |
|
"loss": 29.4182, |
|
"nll_loss": 0.8695603609085083, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.028001630678772926, |
|
"rewards/margins": 0.022563805803656578, |
|
"rewards/rejected": -0.0505654402077198, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.3274074074074074, |
|
"grad_norm": 24.875, |
|
"learning_rate": 4.090115678041962e-05, |
|
"log_odds_chosen": 0.8396116495132446, |
|
"log_odds_ratio": -0.47167715430259705, |
|
"logits/chosen": -1.9779258966445923, |
|
"logits/rejected": -1.8630996942520142, |
|
"logps/chosen": -0.6393855214118958, |
|
"logps/rejected": -1.073813557624817, |
|
"loss": 30.4644, |
|
"nll_loss": 0.9436683654785156, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.031969279050827026, |
|
"rewards/margins": 0.021721404045820236, |
|
"rewards/rejected": -0.053690679371356964, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.374814814814815, |
|
"grad_norm": 27.0, |
|
"learning_rate": 3.9333833195545325e-05, |
|
"log_odds_chosen": 0.8570321798324585, |
|
"log_odds_ratio": -0.4445961117744446, |
|
"logits/chosen": -2.178088903427124, |
|
"logits/rejected": -1.742640495300293, |
|
"logps/chosen": -0.6361523270606995, |
|
"logps/rejected": -1.0977928638458252, |
|
"loss": 29.9436, |
|
"nll_loss": 0.9138515591621399, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.03180761635303497, |
|
"rewards/margins": 0.023082025349140167, |
|
"rewards/rejected": -0.05488964170217514, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 28.75, |
|
"learning_rate": 3.7737761095632374e-05, |
|
"log_odds_chosen": 0.8333398699760437, |
|
"log_odds_ratio": -0.4819715619087219, |
|
"logits/chosen": -2.0540931224823, |
|
"logits/rejected": -2.0431177616119385, |
|
"logps/chosen": -0.5949512124061584, |
|
"logps/rejected": -0.9885191917419434, |
|
"loss": 29.3897, |
|
"nll_loss": 0.8951548337936401, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.029747556895017624, |
|
"rewards/margins": 0.019678404554724693, |
|
"rewards/rejected": -0.049425967037677765, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4696296296296296, |
|
"grad_norm": 28.5, |
|
"learning_rate": 3.611785643555225e-05, |
|
"log_odds_chosen": 0.8982599377632141, |
|
"log_odds_ratio": -0.44926947355270386, |
|
"logits/chosen": -2.2695367336273193, |
|
"logits/rejected": -1.7630071640014648, |
|
"logps/chosen": -0.6012392044067383, |
|
"logps/rejected": -1.050581693649292, |
|
"loss": 29.8918, |
|
"nll_loss": 0.9108700752258301, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.030061960220336914, |
|
"rewards/margins": 0.022467125207185745, |
|
"rewards/rejected": -0.05252908915281296, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.5170370370370372, |
|
"grad_norm": 30.625, |
|
"learning_rate": 3.44791085752502e-05, |
|
"log_odds_chosen": 0.8571161031723022, |
|
"log_odds_ratio": -0.4356165826320648, |
|
"logits/chosen": -2.093273639678955, |
|
"logits/rejected": -2.1345930099487305, |
|
"logps/chosen": -0.6451684236526489, |
|
"logps/rejected": -1.1003749370574951, |
|
"loss": 30.6067, |
|
"nll_loss": 0.9544156193733215, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.03225841745734215, |
|
"rewards/margins": 0.02276032790541649, |
|
"rewards/rejected": -0.055018745362758636, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5644444444444443, |
|
"grad_norm": 25.0, |
|
"learning_rate": 3.2826564912351544e-05, |
|
"log_odds_chosen": 0.9319137334823608, |
|
"log_odds_ratio": -0.43036922812461853, |
|
"logits/chosen": -1.9680538177490234, |
|
"logits/rejected": -2.1300835609436035, |
|
"logps/chosen": -0.6165143251419067, |
|
"logps/rejected": -1.106245994567871, |
|
"loss": 29.5351, |
|
"nll_loss": 0.8909838795661926, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.030825715512037277, |
|
"rewards/margins": 0.024486582726240158, |
|
"rewards/rejected": -0.055312298238277435, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6118518518518519, |
|
"grad_norm": 27.0, |
|
"learning_rate": 3.116531533601003e-05, |
|
"log_odds_chosen": 1.0328500270843506, |
|
"log_odds_ratio": -0.4052696228027344, |
|
"logits/chosen": -2.1744275093078613, |
|
"logits/rejected": -1.9558875560760498, |
|
"logps/chosen": -0.587931752204895, |
|
"logps/rejected": -1.127718448638916, |
|
"loss": 29.1635, |
|
"nll_loss": 0.9016565084457397, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.02939658798277378, |
|
"rewards/margins": 0.02698933705687523, |
|
"rewards/rejected": -0.05638592690229416, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6592592592592592, |
|
"grad_norm": 29.875, |
|
"learning_rate": 2.9500476549880848e-05, |
|
"log_odds_chosen": 0.8978468179702759, |
|
"log_odds_ratio": -0.45074257254600525, |
|
"logits/chosen": -1.9579814672470093, |
|
"logits/rejected": -1.6362476348876953, |
|
"logps/chosen": -0.5840794444084167, |
|
"logps/rejected": -1.0381691455841064, |
|
"loss": 28.8906, |
|
"nll_loss": 0.8974016308784485, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.029203975573182106, |
|
"rewards/margins": 0.022704491391777992, |
|
"rewards/rejected": -0.0519084632396698, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7066666666666666, |
|
"grad_norm": 31.75, |
|
"learning_rate": 2.7837176312504037e-05, |
|
"log_odds_chosen": 0.804090678691864, |
|
"log_odds_ratio": -0.46772366762161255, |
|
"logits/chosen": -1.7048003673553467, |
|
"logits/rejected": -1.6802536249160767, |
|
"logps/chosen": -0.6104881167411804, |
|
"logps/rejected": -1.00138521194458, |
|
"loss": 29.964, |
|
"nll_loss": 0.9319046139717102, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.03052440844476223, |
|
"rewards/margins": 0.019544851034879684, |
|
"rewards/rejected": -0.050069261342287064, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.7540740740740741, |
|
"grad_norm": 24.25, |
|
"learning_rate": 2.618053764363861e-05, |
|
"log_odds_chosen": 0.9093812108039856, |
|
"log_odds_ratio": -0.4248902201652527, |
|
"logits/chosen": -2.1142642498016357, |
|
"logits/rejected": -1.8935844898223877, |
|
"logps/chosen": -0.5908008813858032, |
|
"logps/rejected": -1.051133632659912, |
|
"loss": 29.4169, |
|
"nll_loss": 0.8831952810287476, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.029540037736296654, |
|
"rewards/margins": 0.023016640916466713, |
|
"rewards/rejected": -0.052556682378053665, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.8014814814814815, |
|
"grad_norm": 26.5, |
|
"learning_rate": 2.453566304519216e-05, |
|
"log_odds_chosen": 0.9450467228889465, |
|
"log_odds_ratio": -0.4345301687717438, |
|
"logits/chosen": -2.1107406616210938, |
|
"logits/rejected": -1.6903254985809326, |
|
"logps/chosen": -0.6346092820167542, |
|
"logps/rejected": -1.1103118658065796, |
|
"loss": 30.3828, |
|
"nll_loss": 0.9147791862487793, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.03173046559095383, |
|
"rewards/margins": 0.02378513291478157, |
|
"rewards/rejected": -0.0555155873298645, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8488888888888888, |
|
"grad_norm": 30.5, |
|
"learning_rate": 2.29076187853462e-05, |
|
"log_odds_chosen": 0.9741055369377136, |
|
"log_odds_ratio": -0.4274386465549469, |
|
"logits/chosen": -2.0201268196105957, |
|
"logits/rejected": -1.357716679573059, |
|
"logps/chosen": -0.5966172218322754, |
|
"logps/rejected": -1.1088060140609741, |
|
"loss": 29.2191, |
|
"nll_loss": 0.882198691368103, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.02983086369931698, |
|
"rewards/margins": 0.025609437376260757, |
|
"rewards/rejected": -0.05544029921293259, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.8962962962962964, |
|
"grad_norm": 27.625, |
|
"learning_rate": 2.130141929428254e-05, |
|
"log_odds_chosen": 0.8030783534049988, |
|
"log_odds_ratio": -0.4836719036102295, |
|
"logits/chosen": -2.072028875350952, |
|
"logits/rejected": -1.7954127788543701, |
|
"logps/chosen": -0.6211769580841064, |
|
"logps/rejected": -1.0174511671066284, |
|
"loss": 31.2309, |
|
"nll_loss": 0.9159282445907593, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.031058847904205322, |
|
"rewards/margins": 0.01981370709836483, |
|
"rewards/rejected": -0.050872553139925, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9437037037037037, |
|
"grad_norm": 25.5, |
|
"learning_rate": 1.9722011719572444e-05, |
|
"log_odds_chosen": 0.8332887887954712, |
|
"log_odds_ratio": -0.46110400557518005, |
|
"logits/chosen": -2.209178924560547, |
|
"logits/rejected": -1.4569910764694214, |
|
"logps/chosen": -0.614986777305603, |
|
"logps/rejected": -1.0477509498596191, |
|
"loss": 28.0719, |
|
"nll_loss": 0.866260826587677, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.03074934147298336, |
|
"rewards/margins": 0.021638209000229836, |
|
"rewards/rejected": -0.0523875467479229, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.991111111111111, |
|
"grad_norm": 27.375, |
|
"learning_rate": 1.8174260688798445e-05, |
|
"log_odds_chosen": 0.7869575619697571, |
|
"log_odds_ratio": -0.4784061312675476, |
|
"logits/chosen": -1.8811867237091064, |
|
"logits/rejected": -2.0677828788757324, |
|
"logps/chosen": -0.5915592908859253, |
|
"logps/rejected": -0.9474382400512695, |
|
"loss": 28.0122, |
|
"nll_loss": 0.8690091967582703, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.029577964916825294, |
|
"rewards/margins": 0.017793944105505943, |
|
"rewards/rejected": -0.04737190902233124, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0385185185185186, |
|
"grad_norm": 23.75, |
|
"learning_rate": 1.666293332634042e-05, |
|
"log_odds_chosen": 1.401928186416626, |
|
"log_odds_ratio": -0.33335039019584656, |
|
"logits/chosen": -1.884316086769104, |
|
"logits/rejected": -1.4572067260742188, |
|
"logps/chosen": -0.4979814887046814, |
|
"logps/rejected": -1.1203409433364868, |
|
"loss": 24.9695, |
|
"nll_loss": 0.7534885406494141, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.02489907667040825, |
|
"rewards/margins": 0.03111797571182251, |
|
"rewards/rejected": -0.05601705238223076, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.0859259259259257, |
|
"grad_norm": 38.5, |
|
"learning_rate": 1.519268457047482e-05, |
|
"log_odds_chosen": 1.6839864253997803, |
|
"log_odds_ratio": -0.2904171049594879, |
|
"logits/chosen": -1.8016027212142944, |
|
"logits/rejected": -1.8718116283416748, |
|
"logps/chosen": -0.4482901096343994, |
|
"logps/rejected": -1.188301682472229, |
|
"loss": 23.232, |
|
"nll_loss": 0.7302739024162292, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.02241450548171997, |
|
"rewards/margins": 0.03700058162212372, |
|
"rewards/rejected": -0.05941509082913399, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 27.5, |
|
"learning_rate": 1.3768042836010768e-05, |
|
"log_odds_chosen": 1.6373440027236938, |
|
"log_odds_ratio": -0.2984515130519867, |
|
"logits/chosen": -1.8258718252182007, |
|
"logits/rejected": -1.6223289966583252, |
|
"logps/chosen": -0.44031524658203125, |
|
"logps/rejected": -1.1581284999847412, |
|
"loss": 24.139, |
|
"nll_loss": 0.7237830758094788, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.022015761584043503, |
|
"rewards/margins": 0.035890672355890274, |
|
"rewards/rejected": -0.05790643021464348, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.180740740740741, |
|
"grad_norm": 27.5, |
|
"learning_rate": 1.239339606662261e-05, |
|
"log_odds_chosen": 1.801990270614624, |
|
"log_odds_ratio": -0.25359493494033813, |
|
"logits/chosen": -1.93035089969635, |
|
"logits/rejected": -1.6016725301742554, |
|
"logps/chosen": -0.4278429448604584, |
|
"logps/rejected": -1.2159802913665771, |
|
"loss": 22.8267, |
|
"nll_loss": 0.7034687995910645, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.02139214798808098, |
|
"rewards/margins": 0.039406873285770416, |
|
"rewards/rejected": -0.060799021273851395, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.228148148148148, |
|
"grad_norm": 34.5, |
|
"learning_rate": 1.1072978219838283e-05, |
|
"log_odds_chosen": 1.565932035446167, |
|
"log_odds_ratio": -0.3256310820579529, |
|
"logits/chosen": -1.9141308069229126, |
|
"logits/rejected": -1.976017951965332, |
|
"logps/chosen": -0.4726741313934326, |
|
"logps/rejected": -1.1302521228790283, |
|
"loss": 23.1599, |
|
"nll_loss": 0.7224346399307251, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.02363370731472969, |
|
"rewards/margins": 0.03287890553474426, |
|
"rewards/rejected": -0.05651261284947395, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.2755555555555556, |
|
"grad_norm": 32.25, |
|
"learning_rate": 9.810856226309972e-06, |
|
"log_odds_chosen": 1.7595332860946655, |
|
"log_odds_ratio": -0.2692697048187256, |
|
"logits/chosen": -1.8822906017303467, |
|
"logits/rejected": -1.698127031326294, |
|
"logps/chosen": -0.430245578289032, |
|
"logps/rejected": -1.2083370685577393, |
|
"loss": 23.1595, |
|
"nll_loss": 0.7202972173690796, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.02151227928698063, |
|
"rewards/margins": 0.0389045774936676, |
|
"rewards/rejected": -0.06041685491800308, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.322962962962963, |
|
"grad_norm": 26.5, |
|
"learning_rate": 8.61091746353324e-06, |
|
"log_odds_chosen": 1.702959418296814, |
|
"log_odds_ratio": -0.2750469446182251, |
|
"logits/chosen": -2.1500630378723145, |
|
"logits/rejected": -1.591073751449585, |
|
"logps/chosen": -0.4450320601463318, |
|
"logps/rejected": -1.1653035879135132, |
|
"loss": 23.0947, |
|
"nll_loss": 0.7247873544692993, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.022251605987548828, |
|
"rewards/margins": 0.03601358085870743, |
|
"rewards/rejected": -0.058265186846256256, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.3703703703703702, |
|
"grad_norm": 29.125, |
|
"learning_rate": 7.47685778259568e-06, |
|
"log_odds_chosen": 1.729418158531189, |
|
"log_odds_ratio": -0.25526946783065796, |
|
"logits/chosen": -1.865269660949707, |
|
"logits/rejected": -1.8307578563690186, |
|
"logps/chosen": -0.43531733751296997, |
|
"logps/rejected": -1.197790503501892, |
|
"loss": 22.4449, |
|
"nll_loss": 0.6787145733833313, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.02176586538553238, |
|
"rewards/margins": 0.038123659789562225, |
|
"rewards/rejected": -0.059889525175094604, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.417777777777778, |
|
"grad_norm": 27.125, |
|
"learning_rate": 6.4121701248332905e-06, |
|
"log_odds_chosen": 1.894997000694275, |
|
"log_odds_ratio": -0.2565176784992218, |
|
"logits/chosen": -1.9798578023910522, |
|
"logits/rejected": -1.3841679096221924, |
|
"logps/chosen": -0.3930845260620117, |
|
"logps/rejected": -1.2068579196929932, |
|
"loss": 22.3353, |
|
"nll_loss": 0.6793208122253418, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.019654225558042526, |
|
"rewards/margins": 0.04068866744637489, |
|
"rewards/rejected": -0.06034289672970772, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.4651851851851854, |
|
"grad_norm": 29.625, |
|
"learning_rate": 5.420133763455645e-06, |
|
"log_odds_chosen": 1.909266710281372, |
|
"log_odds_ratio": -0.25379735231399536, |
|
"logits/chosen": -1.9765899181365967, |
|
"logits/rejected": -1.7865279912948608, |
|
"logps/chosen": -0.4143601059913635, |
|
"logps/rejected": -1.225185751914978, |
|
"loss": 22.3829, |
|
"nll_loss": 0.6900944709777832, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.020718006417155266, |
|
"rewards/margins": 0.04054127633571625, |
|
"rewards/rejected": -0.06125928834080696, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.5125925925925925, |
|
"grad_norm": 32.25, |
|
"learning_rate": 4.503804203275866e-06, |
|
"log_odds_chosen": 1.7796869277954102, |
|
"log_odds_ratio": -0.30406466126441956, |
|
"logits/chosen": -1.8215770721435547, |
|
"logits/rejected": -1.862717866897583, |
|
"logps/chosen": -0.4358927607536316, |
|
"logps/rejected": -1.197788953781128, |
|
"loss": 22.2978, |
|
"nll_loss": 0.6913371086120605, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.0217946395277977, |
|
"rewards/margins": 0.03809480741620064, |
|
"rewards/rejected": -0.05988944694399834, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 29.0, |
|
"learning_rate": 3.6660037696547376e-06, |
|
"log_odds_chosen": 1.7562096118927002, |
|
"log_odds_ratio": -0.25910764932632446, |
|
"logits/chosen": -2.1091978549957275, |
|
"logits/rejected": -1.8953710794448853, |
|
"logps/chosen": -0.4530642628669739, |
|
"logps/rejected": -1.2265712022781372, |
|
"loss": 23.2665, |
|
"nll_loss": 0.7341790199279785, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.022653216496109962, |
|
"rewards/margins": 0.03867534175515175, |
|
"rewards/rejected": -0.06132856011390686, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.6074074074074076, |
|
"grad_norm": 30.875, |
|
"learning_rate": 2.909312915645238e-06, |
|
"log_odds_chosen": 1.7647335529327393, |
|
"log_odds_ratio": -0.28405773639678955, |
|
"logits/chosen": -2.033613681793213, |
|
"logits/rejected": -1.289603590965271, |
|
"logps/chosen": -0.4545009732246399, |
|
"logps/rejected": -1.2115771770477295, |
|
"loss": 23.1922, |
|
"nll_loss": 0.7150126695632935, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.022725049406290054, |
|
"rewards/margins": 0.03785381466150284, |
|
"rewards/rejected": -0.060578860342502594, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.6548148148148147, |
|
"grad_norm": 34.75, |
|
"learning_rate": 2.236062274111741e-06, |
|
"log_odds_chosen": 1.6408923864364624, |
|
"log_odds_ratio": -0.2776089906692505, |
|
"logits/chosen": -1.8170640468597412, |
|
"logits/rejected": -1.9727897644042969, |
|
"logps/chosen": -0.4261111319065094, |
|
"logps/rejected": -1.1319156885147095, |
|
"loss": 22.1786, |
|
"nll_loss": 0.6713584661483765, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.02130555734038353, |
|
"rewards/margins": 0.035290226340293884, |
|
"rewards/rejected": -0.056595779955387115, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.7022222222222223, |
|
"grad_norm": 30.75, |
|
"learning_rate": 1.648325479303684e-06, |
|
"log_odds_chosen": 1.6113086938858032, |
|
"log_odds_ratio": -0.2935238778591156, |
|
"logits/chosen": -2.0707173347473145, |
|
"logits/rejected": -1.4977672100067139, |
|
"logps/chosen": -0.4346179962158203, |
|
"logps/rejected": -1.1482003927230835, |
|
"loss": 23.1607, |
|
"nll_loss": 0.6985403895378113, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.021730897948145866, |
|
"rewards/margins": 0.03567912429571152, |
|
"rewards/rejected": -0.05741002410650253, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.74962962962963, |
|
"grad_norm": 29.75, |
|
"learning_rate": 1.1479127799935029e-06, |
|
"log_odds_chosen": 1.8265297412872314, |
|
"log_odds_ratio": -0.2631281614303589, |
|
"logits/chosen": -1.841491937637329, |
|
"logits/rejected": -1.922586441040039, |
|
"logps/chosen": -0.4327624440193176, |
|
"logps/rejected": -1.2341258525848389, |
|
"loss": 22.9795, |
|
"nll_loss": 0.7204877734184265, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.021638119593262672, |
|
"rewards/margins": 0.04006817191839218, |
|
"rewards/rejected": -0.0617062933743, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.797037037037037, |
|
"grad_norm": 31.625, |
|
"learning_rate": 7.363654638505046e-07, |
|
"log_odds_chosen": 1.7081098556518555, |
|
"log_odds_ratio": -0.29199516773223877, |
|
"logits/chosen": -1.7930389642715454, |
|
"logits/rejected": -1.7181438207626343, |
|
"logps/chosen": -0.449666827917099, |
|
"logps/rejected": -1.2206642627716064, |
|
"loss": 22.9709, |
|
"nll_loss": 0.7106753587722778, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.0224833432585001, |
|
"rewards/margins": 0.03854987770318985, |
|
"rewards/rejected": -0.0610332190990448, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.8444444444444446, |
|
"grad_norm": 31.25, |
|
"learning_rate": 4.149511102238568e-07, |
|
"log_odds_chosen": 1.5754259824752808, |
|
"log_odds_ratio": -0.3034347891807556, |
|
"logits/chosen": -2.2559409141540527, |
|
"logits/rejected": -1.71217942237854, |
|
"logps/chosen": -0.46836423873901367, |
|
"logps/rejected": -1.2223981618881226, |
|
"loss": 22.8601, |
|
"nll_loss": 0.7257949113845825, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.023418214172124863, |
|
"rewards/margins": 0.037701696157455444, |
|
"rewards/rejected": -0.06111990660429001, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.891851851851852, |
|
"grad_norm": 28.625, |
|
"learning_rate": 1.8465968595625105e-07, |
|
"log_odds_chosen": 1.6639511585235596, |
|
"log_odds_ratio": -0.2808656096458435, |
|
"logits/chosen": -2.1249499320983887, |
|
"logits/rejected": -1.6745857000350952, |
|
"logps/chosen": -0.475193589925766, |
|
"logps/rejected": -1.1988952159881592, |
|
"loss": 21.8942, |
|
"nll_loss": 0.6743995547294617, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.02375968173146248, |
|
"rewards/margins": 0.03618507459759712, |
|
"rewards/rejected": -0.0599447600543499, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.9392592592592592, |
|
"grad_norm": 30.75, |
|
"learning_rate": 4.620049625329803e-08, |
|
"log_odds_chosen": 1.7966537475585938, |
|
"log_odds_ratio": -0.25377795100212097, |
|
"logits/chosen": -1.9389528036117554, |
|
"logits/rejected": -1.399864912033081, |
|
"logps/chosen": -0.4379648268222809, |
|
"logps/rejected": -1.1957590579986572, |
|
"loss": 22.7814, |
|
"nll_loss": 0.6846402883529663, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.021898243576288223, |
|
"rewards/margins": 0.03788971155881882, |
|
"rewards/rejected": -0.05978795886039734, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"grad_norm": 32.75, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 1.8841956853866577, |
|
"log_odds_ratio": -0.24858447909355164, |
|
"logits/chosen": -1.8983337879180908, |
|
"logits/rejected": -1.5074989795684814, |
|
"logps/chosen": -0.40289902687072754, |
|
"logps/rejected": -1.209084391593933, |
|
"loss": 22.233, |
|
"nll_loss": 0.6980301737785339, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.020144950598478317, |
|
"rewards/margins": 0.040309272706508636, |
|
"rewards/rejected": -0.06045422703027725, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"step": 315, |
|
"total_flos": 0.0, |
|
"train_loss": 31.135096304757255, |
|
"train_runtime": 6745.6063, |
|
"train_samples_per_second": 3.002, |
|
"train_steps_per_second": 0.047 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|